📄 lexer.cpp
字号:
// terminate string buffer8[pos8] = '\0';#ifdef KJS_DEBUG_LEX fprintf(stderr, "line: %d ", lineNo()); fprintf(stderr, "yytext (%x): ", buffer8[0]); fprintf(stderr, "%s ", buffer8);#endif double dval = 0; if (state == Number) { dval = strtod(buffer8, 0L); } else if (state == Hex) { // scan hex numbers // TODO: support long unsigned int unsigned int i; sscanf(buffer8, "%x", &i); dval = i; state = Number; } else if (state == Octal) { // scan octal number unsigned int ui; sscanf(buffer8, "%o", &ui); dval = ui; state = Number; }#ifdef KJS_DEBUG_LEX switch (state) { case Eof: printf("(EOF)\n"); break; case Other: printf("(Other)\n"); break; case Identifier: printf("(Identifier)/(Keyword)\n"); break; case String: printf("(String)\n"); break; case Number: printf("(Number)\n"); break; default: printf("(unknown)"); }#endif if (state != Identifier && eatNextIdentifier) eatNextIdentifier = false; restrKeyword = false; delimited = false; kjsyylloc.first_line = yylineno; // ??? kjsyylloc.last_line = yylineno; switch (state) { case Eof: token = 0; break; case Other: if(token == '}' || token == ';') { delimited = true; } break; case Identifier: if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) { // Lookup for keyword failed, means this is an identifier // Apply anonymous-function hack below (eat the identifier) if (eatNextIdentifier) { eatNextIdentifier = false; token = lex(); break; } kjsyylval.ident = makeIdentifier(buffer16, pos16); token = IDENT; break; } eatNextIdentifier = false; // Hack for "f = function somename() { ... }", too hard to get into the grammar if (token == FUNCTION && lastToken == '=' ) eatNextIdentifier = true; if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW) restrKeyword = true; break; case String: kjsyylval.ustr = makeUString(buffer16, pos16); token = STRING; break; case Number: kjsyylval.dval = dval; token = NUMBER; break; case Bad: fprintf(stderr, "yylex: ERROR.\n"); return -1; default: assert(!"unhandled numeration value in switch"); return -1; } lastToken = token; return token;}bool Lexer::isWhiteSpace() const{ return (current == ' ' || current == '\t' || current == 0x0b || current == 0x0c || current == 0xa0);}bool Lexer::isLineTerminator(){ bool cr = (current == '\r'); bool lf = (current == '\n'); if (cr) skipLF = true; else if (lf) skipCR = true; return cr || lf;}bool Lexer::isIdentLetter(unsigned short c){ /* TODO: allow other legitimate unicode chars */ return (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '$' || c == '_');}bool Lexer::isDecimalDigit(unsigned short c){ return (c >= '0' && c <= '9');}bool Lexer::isHexDigit(unsigned short c) const{ return (c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F');}bool Lexer::isOctalDigit(unsigned short c) const{ return (c >= '0' && c <= '7');}int Lexer::matchPunctuator(unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4){ if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { shift(4); return URSHIFTEQUAL; } else if (c1 == '=' && c2 == '=' && c3 == '=') { shift(3); return STREQ; } else if (c1 == '!' && c2 == '=' && c3 == '=') { shift(3); return STRNEQ; } else if (c1 == '>' && c2 == '>' && c3 == '>') { shift(3); return URSHIFT; } else if (c1 == '<' && c2 == '<' && c3 == '=') { shift(3); return LSHIFTEQUAL; } else if (c1 == '>' && c2 == '>' && c3 == '=') { shift(3); return RSHIFTEQUAL; } else if (c1 == '<' && c2 == '=') { shift(2); return LE; } else if (c1 == '>' && c2 == '=') { shift(2); return GE; } else if (c1 == '!' && c2 == '=') { shift(2); return NE; } else if (c1 == '+' && c2 == '+') { shift(2); if (terminator) return AUTOPLUSPLUS; else return PLUSPLUS; } else if (c1 == '-' && c2 == '-') { shift(2); if (terminator) return AUTOMINUSMINUS; else return MINUSMINUS; } else if (c1 == '=' && c2 == '=') { shift(2); return EQEQ; } else if (c1 == '+' && c2 == '=') { shift(2); return PLUSEQUAL; } else if (c1 == '-' && c2 == '=') { shift(2); return MINUSEQUAL; } else if (c1 == '*' && c2 == '=') { shift(2); return MULTEQUAL; } else if (c1 == '/' && c2 == '=') { shift(2); return DIVEQUAL; } else if (c1 == '&' && c2 == '=') { shift(2); return ANDEQUAL; } else if (c1 == '^' && c2 == '=') { shift(2); return XOREQUAL; } else if (c1 == '%' && c2 == '=') { shift(2); return MODEQUAL; } else if (c1 == '|' && c2 == '=') { shift(2); return OREQUAL; } else if (c1 == '<' && c2 == '<') { shift(2); return LSHIFT; } else if (c1 == '>' && c2 == '>') { shift(2); return RSHIFT; } else if (c1 == '&' && c2 == '&') { shift(2); return AND; } else if (c1 == '|' && c2 == '|') { shift(2); return OR; } switch(c1) { case '=': case '>': case '<': case ',': case '!': case '~': case '?': case ':': case '.': case '+': case '-': case '*': case '/': case '&': case '|': case '^': case '%': case '(': case ')': case '{': case '}': case '[': case ']': case ';': shift(1); return static_cast<int>(c1); default: return -1; }}unsigned short Lexer::singleEscape(unsigned short c) const{ switch(c) { case 'b': return 0x08; case 't': return 0x09; case 'n': return 0x0A; case 'v': return 0x0B; case 'f': return 0x0C; case 'r': return 0x0D; case '"': return 0x22; case '\'': return 0x27; case '\\': return 0x5C; default: return c; }}unsigned short Lexer::convertOctal(unsigned short c1, unsigned short c2, unsigned short c3) const{ return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');}unsigned char Lexer::convertHex(unsigned short c){ if (c >= '0' && c <= '9') return (c - '0'); else if (c >= 'a' && c <= 'f') return (c - 'a' + 10); else return (c - 'A' + 10);}unsigned char Lexer::convertHex(unsigned short c1, unsigned short c2){ return ((convertHex(c1) << 4) + convertHex(c2));}UChar Lexer::convertUnicode(unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4){ return UChar((convertHex(c1) << 4) + convertHex(c2), (convertHex(c3) << 4) + convertHex(c4));}void Lexer::record8(unsigned short c){ assert(c <= 0xff); // enlarge buffer if full if (pos8 >= size8 - 1) { char *tmp = new char[2 * size8]; memcpy(tmp, buffer8, size8 * sizeof(char)); delete [] buffer8; buffer8 = tmp; size8 *= 2; } buffer8[pos8++] = (char) c;}void Lexer::record16(UChar c){ // enlarge buffer if full if (pos16 >= size16 - 1) { UChar *tmp = new UChar[2 * size16]; memcpy(tmp, buffer16, size16 * sizeof(UChar)); delete [] buffer16; buffer16 = tmp; size16 *= 2; } buffer16[pos16++] = c;}bool Lexer::scanRegExp(){ pos16 = 0; bool lastWasEscape = false; bool inBrackets = false; while (1) { if (isLineTerminator() || current == 0) return false; else if (current != '/' || lastWasEscape == true || inBrackets == true) { // keep track of '[' and ']' if ( !lastWasEscape ) { if ( current == '[' && !inBrackets ) inBrackets = true; if ( current == ']' && inBrackets ) inBrackets = false; } record16(current); lastWasEscape = !lastWasEscape && (current == '\\'); } else { // end of regexp pattern = UString(buffer16, pos16); pos16 = 0; shift(1); break; } shift(1); } while (isIdentLetter(current)) { record16(current); shift(1); } flags = UString(buffer16, pos16); return true;}void Lexer::doneParsing(){ for (unsigned i = 0; i < numIdentifiers; i++) { delete identifiers[i]; } free (identifiers); identifiers = 0; numIdentifiers = 0; identifiersCapacity = 0; for (unsigned i = 0; i < numStrings; i++) { delete strings[i]; } free (strings); strings = 0; numStrings = 0; stringsCapacity = 0;}const int initialCapacity = 64;const int growthFactor = 2;Identifier *Lexer::makeIdentifier(UChar *buffer, unsigned int pos){ if (numIdentifiers == identifiersCapacity) { identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor; identifiers = (KJS::Identifier **)realloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity); } KJS::Identifier *identifier = new KJS::Identifier(buffer16, pos16); identifiers[numIdentifiers++] = identifier; return identifier;} UString *Lexer::makeUString(UChar *buffer, unsigned int pos){ if (numStrings == stringsCapacity) { stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor; strings = (UString **)realloc(strings, sizeof(UString *) * stringsCapacity); } UString *string = new UString(buffer16, pos16); strings[numStrings++] = string; return string;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -