diff options
Diffstat (limited to 'lib/lua/src/llex.c')
-rw-r--r-- | lib/lua/src/llex.c | 559 |
1 files changed, 288 insertions, 271 deletions
diff --git a/lib/lua/src/llex.c b/lib/lua/src/llex.c index c06ea9f..f48a277 100644 --- a/lib/lua/src/llex.c +++ b/lib/lua/src/llex.c @@ -1,14 +1,16 @@ /* -** $Id: llex.c,v 1.6 2004-12-19 16:14:04 pixel Exp $ +** $Id: llex.c,v 1.7 2007-07-27 10:05:54 pixel Exp $ ** Lexical Analyzer ** See Copyright Notice in lua.h */ #include <ctype.h> +#include <locale.h> #include <string.h> #define llex_c +#define LUA_CORE #include "lua.h" @@ -18,32 +20,54 @@ #include "lparser.h" #include "lstate.h" #include "lstring.h" +#include "ltable.h" #include "lzio.h" -#define next(LS) (LS->current = zgetc(LS->z)) +#define next(ls) (ls->current = zgetc(ls->z)) + +#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') + + /* ORDER RESERVED */ -static const char *const token2string [] = { +const char *const luaX_tokens [] = { "and", "break", "do", "else", "elseif", "end", "false", "for", "function", "if", "in", "local", "nil", "not", "or", "repeat", - "return", "then", "true", "until", "while", "*name", + "return", "then", "true", "until", "while", "..", "...", "==", ">=", "<=", "~=", - "*number", "*string", "<eof>" + "<number>", "<name>", "<string>", "<eof>", + NULL }; +#define save_and_next(ls) (save(ls, ls->current), next(ls)) + + +static void save (LexState *ls, int c) { + Mbuffer *b = ls->buff; + if (b->n + 1 > b->buffsize) { + size_t newsize; + if (b->buffsize >= MAX_SIZET/2) + luaX_lexerror(ls, "lexical element too long", 0); + newsize = b->buffsize * 2; + luaZ_resizebuffer(ls->L, b, newsize); + } + b->buffer[b->n++] = cast(char, c); +} + + void luaX_init (lua_State *L) { int i; for (i=0; i<NUM_RESERVED; i++) { - TString *ts = luaS_new(L, token2string[i]); + TString *ts = luaS_new(L, luaX_tokens[i]); luaS_fix(ts); /* reserved words are never collected */ - lua_assert(strlen(token2string[i])+1 <= TOKEN_LEN); - ts->tsv.reserved = cast(lu_byte, i+1); /* reserved word */ + lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN); + ts->tsv.reserved = cast_byte(i+1); /* reserved word */ } } @@ -51,85 +75,77 @@ void luaX_init (lua_State *L) { #define MAXSRC 80 -void luaX_checklimit (LexState *ls, int val, int limit, const char *msg) { - if (val > limit) { - msg = luaO_pushfstring(ls->L, "too many %s (limit=%d)", msg, limit); - luaX_syntaxerror(ls, msg); +const char *luaX_token2str (LexState *ls, int token) { + if (token < FIRST_RESERVED) { + lua_assert(token == cast(unsigned char, token)); + return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) : + luaO_pushfstring(ls->L, "%c", token); } + else + return luaX_tokens[token-FIRST_RESERVED]; } -void luaX_errorline (LexState *ls, const char *s, const char *token, int line) { - lua_State *L = ls->L; - char buff[MAXSRC]; - luaO_chunkid(buff, getstr(ls->source), MAXSRC); - luaO_pushfstring(L, "%s:%d: %s near `%s'", buff, line, s, token); - luaD_throw(L, LUA_ERRSYNTAX); -} - - -static void luaX_error (LexState *ls, const char *s, const char *token) { - luaX_errorline(ls, s, token, ls->linenumber); -} - - -void luaX_syntaxerror (LexState *ls, const char *msg) { - const char *lasttoken; - switch (ls->t.token) { +static const char *txtToken (LexState *ls, int token) { + switch (token) { case TK_NAME: - lasttoken = getstr(ls->t.seminfo.ts); - break; case TK_STRING: case TK_NUMBER: - lasttoken = luaZ_buffer(ls->buff); - break; + save(ls, '\0'); + return luaZ_buffer(ls->buff); default: - lasttoken = luaX_token2str(ls, ls->t.token); - break; + return luaX_token2str(ls, token); } - luaX_error(ls, msg, lasttoken); } -const char *luaX_token2str (LexState *ls, int token) { - if (token < FIRST_RESERVED) { - lua_assert(token == (unsigned char)token); - return luaO_pushfstring(ls->L, "%c", token); - } - else - return token2string[token-FIRST_RESERVED]; +void luaX_lexerror (LexState *ls, const char *msg, int token) { + char buff[MAXSRC]; + luaO_chunkid(buff, getstr(ls->source), MAXSRC); + msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); + if (token) + luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token)); + luaD_throw(ls->L, LUA_ERRSYNTAX); } -static void luaX_lexerror (LexState *ls, const char *s, int token) { - if (token == TK_EOS) - luaX_error(ls, s, luaX_token2str(ls, token)); - else - luaX_error(ls, s, luaZ_buffer(ls->buff)); +void luaX_syntaxerror (LexState *ls, const char *msg) { + luaX_lexerror(ls, msg, ls->t.token); } -static void inclinenumber (LexState *LS) { - next(LS); /* skip `\n' */ - ++LS->linenumber; - luaX_checklimit(LS, LS->linenumber, MAX_INT, "lines in a chunk"); +TString *luaX_newstring (LexState *ls, const char *str, size_t l) { + lua_State *L = ls->L; + TString *ts = luaS_newlstr(L, str, l); + TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */ + if (ttisnil(o)) + setbvalue(o, 1); /* make sure `str' will not be collected */ + return ts; } -void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) { - LS->L = L; - LS->lookahead.token = TK_EOS; /* no look-ahead token */ - LS->z = z; - LS->fs = NULL; - LS->linenumber = 1; - LS->lastline = 1; - LS->source = source; - next(LS); /* read first char */ - if (LS->current == '#') { - do { /* skip first line */ - next(LS); - } while (LS->current != '\n' && LS->current != EOZ); - } +static void inclinenumber (LexState *ls) { + int old = ls->current; + lua_assert(currIsNewline(ls)); + next(ls); /* skip `\n' or `\r' */ + if (currIsNewline(ls) && ls->current != old) + next(ls); /* skip `\n\r' or `\r\n' */ + if (++ls->linenumber >= MAX_INT) + luaX_syntaxerror(ls, "chunk has too many lines"); +} + + +void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { + ls->decpoint = '.'; + ls->L = L; + ls->lookahead.token = TK_EOS; /* no look-ahead token */ + ls->z = z; + ls->fs = NULL; + ls->linenumber = 1; + ls->lastline = 1; + ls->source = source; + luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ + next(ls); /* read first char */ } @@ -141,269 +157,250 @@ void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) { */ -/* use buffer to store names, literal strings and numbers */ -/* extra space to allocate when growing buffer */ -#define EXTRABUFF 32 +static int check_next (LexState *ls, const char *set) { + if (!strchr(set, ls->current)) + return 0; + save_and_next(ls); + return 1; +} + -/* maximum number of chars that can be read without checking buffer size */ -#define MAXNOCHECK 5 +static void buffreplace (LexState *ls, char from, char to) { + size_t n = luaZ_bufflen(ls->buff); + char *p = luaZ_buffer(ls->buff); + while (n--) + if (p[n] == from) p[n] = to; +} -#define checkbuffer(LS, len) \ - if (((len)+MAXNOCHECK)*sizeof(char) > luaZ_sizebuffer((LS)->buff)) \ - luaZ_openspace((LS)->L, (LS)->buff, (len)+EXTRABUFF) -#define save(LS, c, l) \ - (luaZ_buffer((LS)->buff)[l++] = cast(char, c)) -#define save_and_next(LS, l) (save(LS, LS->current, l), next(LS)) +static void trydecpoint (LexState *ls, SemInfo *seminfo) { + /* format error: try to update decimal point separator */ + struct lconv *cv = localeconv(); + char old = ls->decpoint; + ls->decpoint = (cv ? cv->decimal_point[0] : '.'); + buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */ + if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { + /* format error with correct decimal point: no more options */ + buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ + luaX_lexerror(ls, "malformed number", TK_NUMBER); + } +} -static size_t readname (LexState *LS) { - size_t l = 0; - checkbuffer(LS, l); +/* LUA_NUMBER */ +static void read_numeral (LexState *ls, SemInfo *seminfo) { + lua_assert(isdigit(ls->current)); do { - checkbuffer(LS, l); - save_and_next(LS, l); - } while (isalnum(LS->current) || LS->current == '_'); - save(LS, '\0', l); - return l-1; + save_and_next(ls); + } while (isdigit(ls->current) || ls->current == '.'); + if (check_next(ls, "Ee")) /* `E'? */ + check_next(ls, "+-"); /* optional exponent sign */ + while (isalnum(ls->current) || ls->current == '_') + save_and_next(ls); + save(ls, '\0'); + buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ + if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */ + trydecpoint(ls, seminfo); /* try to update decimal point separator */ } -/* LUA_NUMBER */ -static void read_numeral (LexState *LS, int comma, SemInfo *seminfo) { - int oct = 0, hex = 0; - size_t l = 0; - checkbuffer(LS, l); - if (comma) save(LS, '.', l); - else if (LS->current == '0') { - oct = 1; - checkbuffer(LS, 1); - save_and_next(LS, l); - if (LS->current == 'x') { - oct = 0; - hex = 1; - checkbuffer(LS, 1); - save_and_next(LS, l); - } else if (LS->current == '.') { - oct = hex = 0; - } - } - while (isdigit(LS->current) || (hex && isxdigit(LS->current))) { - checkbuffer(LS, l); - save_and_next(LS, l); - } - checkbuffer(LS, 1); - if (LS->current == '.') { - save_and_next(LS, l); - if (hex || oct) { - save(LS, '\0', l); - luaX_lexerror(LS, - "error in number, mixing decimal point with octal or hexadecimal", - TK_NUMBER); - } - if (LS->current == '.') { - save_and_next(LS, l); - save(LS, '\0', l); - luaX_lexerror(LS, - "ambiguous syntax (decimal point x string concatenation)", - TK_NUMBER); - } - } - while (isdigit(LS->current)) { - checkbuffer(LS, l); - save_and_next(LS, l); +static int skip_sep (LexState *ls) { + int count = 0; + int s = ls->current; + lua_assert(s == '[' || s == ']'); + save_and_next(ls); + while (ls->current == '=') { + save_and_next(ls); + count++; } - if (LS->current == 'e' || LS->current == 'E') { - save_and_next(LS, l); /* read `E' */ - if (hex || oct) { - save(LS, '\0', l); - luaX_lexerror(LS, - "error in number, mixing exponential with octal or hexadecimal", - TK_NUMBER); - } - if (LS->current == '+' || LS->current == '-') - save_and_next(LS, l); /* optional exponent sign */ - while (isdigit(LS->current)) { - checkbuffer(LS, l); - save_and_next(LS, l); - } - } - save(LS, '\0', l); - if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r)) - luaX_lexerror(LS, "malformed number", TK_NUMBER); + return (ls->current == s) ? count : (-count) - 1; } -static void read_long_string (LexState *LS, SemInfo *seminfo) { +static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { int cont = 0; - size_t l = 0; - checkbuffer(LS, l); - save(LS, '[', l); /* save first `[' */ - save_and_next(LS, l); /* pass the second `[' */ - if (LS->current == '\n') /* string starts with a newline? */ - inclinenumber(LS); /* skip it */ + (void)(cont); /* avoid warnings when `cont' is not used */ + save_and_next(ls); /* skip 2nd `[' */ + if (currIsNewline(ls)) /* string starts with a newline? */ + inclinenumber(ls); /* skip it */ for (;;) { - checkbuffer(LS, l); - switch (LS->current) { + switch (ls->current) { case EOZ: - save(LS, '\0', l); - luaX_lexerror(LS, (seminfo) ? "unfinished long string" : + luaX_lexerror(ls, (seminfo) ? "unfinished long string" : "unfinished long comment", TK_EOS); break; /* to avoid warnings */ - case '[': - save_and_next(LS, l); - if (LS->current == '[') { +#if defined(LUA_COMPAT_LSTR) + case '[': { + if (skip_sep(ls) == sep) { + save_and_next(ls); /* skip 2nd `[' */ cont++; - save_and_next(LS, l); +#if LUA_COMPAT_LSTR == 1 + if (sep == 0) + luaX_lexerror(ls, "nesting of [[...]] is deprecated", '['); +#endif } - continue; - case ']': - save_and_next(LS, l); - if (LS->current == ']') { - if (cont == 0) goto endloop; + break; + } +#endif + case ']': { + if (skip_sep(ls) == sep) { + save_and_next(ls); /* skip 2nd `]' */ +#if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2 cont--; - save_and_next(LS, l); + if (sep == 0 && cont >= 0) break; +#endif + goto endloop; } - continue; + break; + } case '\n': - save(LS, '\n', l); - inclinenumber(LS); - if (!seminfo) l = 0; /* reset buffer to avoid wasting space */ - continue; - default: - save_and_next(LS, l); + case '\r': { + save(ls, '\n'); + inclinenumber(ls); + if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ + break; + } + default: { + if (seminfo) save_and_next(ls); + else next(ls); + } } } endloop: - save_and_next(LS, l); /* skip the second `]' */ - save(LS, '\0', l); if (seminfo) - seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 2, l - 5); + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), + luaZ_bufflen(ls->buff) - 2*(2 + sep)); } -static void read_string (LexState *LS, int del, SemInfo *seminfo) { - size_t l = 0; - checkbuffer(LS, l); - save_and_next(LS, l); - while (LS->current != del) { - checkbuffer(LS, l); - switch (LS->current) { +static void read_string (LexState *ls, int del, SemInfo *seminfo) { + save_and_next(ls); + while (ls->current != del) { + switch (ls->current) { case EOZ: - save(LS, '\0', l); - luaX_lexerror(LS, "unfinished string", TK_EOS); - break; /* to avoid warnings */ + luaX_lexerror(ls, "unfinished string", TK_EOS); + continue; /* to avoid warnings */ case '\n': - save(LS, '\0', l); - luaX_lexerror(LS, "unfinished string", TK_STRING); - break; /* to avoid warnings */ - case '\\': - next(LS); /* do not save the `\' */ - switch (LS->current) { - case 'a': save(LS, '\a', l); next(LS); break; - case 'b': save(LS, '\b', l); next(LS); break; - case 'f': save(LS, '\f', l); next(LS); break; - case 'n': save(LS, '\n', l); next(LS); break; - case 'r': save(LS, '\r', l); next(LS); break; - case 't': save(LS, '\t', l); next(LS); break; - case 'v': save(LS, '\v', l); next(LS); break; - case '\n': save(LS, '\n', l); inclinenumber(LS); break; - case EOZ: break; /* will raise an error next loop */ + case '\r': + luaX_lexerror(ls, "unfinished string", TK_STRING); + continue; /* to avoid warnings */ + case '\\': { + int c; + next(ls); /* do not save the `\' */ + switch (ls->current) { + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case '\n': /* go through */ + case '\r': save(ls, '\n'); inclinenumber(ls); continue; + case EOZ: continue; /* will raise an error next loop */ default: { - if (!isdigit(LS->current)) - save_and_next(LS, l); /* handles \\, \", \', and \? */ + if (!isdigit(ls->current)) + save_and_next(ls); /* handles \\, \", \', and \? */ else { /* \xxx */ - int c = 0; int i = 0; + c = 0; do { - c = 10*c + (LS->current-'0'); - next(LS); - } while (++i<3 && isdigit(LS->current)); - if (c > UCHAR_MAX) { - save(LS, '\0', l); - luaX_lexerror(LS, "escape sequence too large", TK_STRING); - } - save(LS, c, l); + c = 10*c + (ls->current-'0'); + next(ls); + } while (++i<3 && isdigit(ls->current)); + if (c > UCHAR_MAX) + luaX_lexerror(ls, "escape sequence too large", TK_STRING); + save(ls, c); } + continue; } } - break; + save(ls, c); + next(ls); + continue; + } default: - save_and_next(LS, l); + save_and_next(ls); } } - save_and_next(LS, l); /* skip delimiter */ - save(LS, '\0', l); - seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 1, l - 3); + save_and_next(ls); /* skip delimiter */ + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, + luaZ_bufflen(ls->buff) - 2); } -int luaX_lex (LexState *LS, SemInfo *seminfo) { +static int llex (LexState *ls, SemInfo *seminfo) { + luaZ_resetbuffer(ls->buff); for (;;) { - switch (LS->current) { - - case '\n': { - inclinenumber(LS); + switch (ls->current) { + case '\n': + case '\r': { + inclinenumber(ls); continue; } case '-': { - next(LS); - if (LS->current != '-') return '-'; + next(ls); + if (ls->current != '-') return '-'; /* else is a comment */ - next(LS); - if (LS->current == '[' && (next(LS), LS->current == '[')) - read_long_string(LS, NULL); /* long comment */ - else /* short comment */ - while (LS->current != '\n' && LS->current != EOZ) - next(LS); + next(ls); + if (ls->current == '[') { + int sep = skip_sep(ls); + luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ + if (sep >= 0) { + read_long_string(ls, NULL, sep); /* long comment */ + luaZ_resetbuffer(ls->buff); + continue; + } + } + /* else short comment */ + while (!currIsNewline(ls) && ls->current != EOZ) + next(ls); continue; } case '[': { - next(LS); - if (LS->current != '[') return '['; - else { - read_long_string(LS, seminfo); + int sep = skip_sep(ls); + if (sep >= 0) { + read_long_string(ls, seminfo, sep); return TK_STRING; } + else if (sep == -1) return '['; + else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); } case '=': { - next(LS); - if (LS->current != '=') return '='; - else { next(LS); return TK_EQ; } + next(ls); + if (ls->current != '=') return '='; + else { next(ls); return TK_EQ; } } case '<': { - next(LS); - if (LS->current != '=') return '<'; - else { next(LS); return TK_LE; } + next(ls); + if (ls->current != '=') return '<'; + else { next(ls); return TK_LE; } } case '>': { - next(LS); - if (LS->current != '=') return '>'; - else { next(LS); return TK_GE; } + next(ls); + if (ls->current != '=') return '>'; + else { next(ls); return TK_GE; } } case '~': { - next(LS); - if (LS->current != '=') return '~'; - else { next(LS); return TK_NE; } + next(ls); + if (ls->current != '=') return '~'; + else { next(ls); return TK_NE; } } case '"': case '\'': { - read_string(LS, LS->current, seminfo); + read_string(ls, ls->current, seminfo); return TK_STRING; } case '.': { - next(LS); - if (LS->current == '.') { - next(LS); - if (LS->current == '.') { - next(LS); + save_and_next(ls); + if (check_next(ls, ".")) { + if (check_next(ls, ".")) return TK_DOTS; /* ... */ - } else return TK_CONCAT; /* .. */ } - else if (!isdigit(LS->current)) return '.'; + else if (!isdigit(ls->current)) return '.'; else { - read_numeral(LS, 1, seminfo); + read_numeral(ls, seminfo); return TK_NUMBER; } } @@ -411,29 +408,33 @@ int luaX_lex (LexState *LS, SemInfo *seminfo) { return TK_EOS; } default: { - if (isspace(LS->current)) { - next(LS); + if (isspace(ls->current)) { + lua_assert(!currIsNewline(ls)); + next(ls); continue; } - else if (isdigit(LS->current)) { - read_numeral(LS, 0, seminfo); + else if (isdigit(ls->current)) { + read_numeral(ls, seminfo); return TK_NUMBER; } - else if (isalpha(LS->current) || LS->current == '_') { + else if (isalpha(ls->current) || ls->current == '_') { /* identifier or reserved word */ - size_t l = readname(LS); - TString *ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff), l); + TString *ts; + do { + save_and_next(ls); + } while (isalnum(ls->current) || ls->current == '_'); + ts = luaX_newstring(ls, luaZ_buffer(ls->buff), + luaZ_bufflen(ls->buff)); if (ts->tsv.reserved > 0) /* reserved word? */ return ts->tsv.reserved - 1 + FIRST_RESERVED; - seminfo->ts = ts; - return TK_NAME; + else { + seminfo->ts = ts; + return TK_NAME; + } } else { - int c = LS->current; - if (iscntrl(c)) - luaX_error(LS, "invalid control char", - luaO_pushfstring(LS->L, "char(%d)", c)); - next(LS); + int c = ls->current; + next(ls); return c; /* single-char tokens (+ - / ...) */ } } @@ -441,4 +442,20 @@ int luaX_lex (LexState *LS, SemInfo *seminfo) { } } -#undef next + +void luaX_next (LexState *ls) { + ls->lastline = ls->linenumber; + if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ + ls->t = ls->lookahead; /* use this one */ + ls->lookahead.token = TK_EOS; /* and discharge it */ + } + else + ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ +} + + +void luaX_lookahead (LexState *ls) { + lua_assert(ls->lookahead.token == TK_EOS); + ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); +} + |