📄 lexer.cpp
字号:
ASSERT(!"Unhandled state in switch statement"); } // move on to the next character if (!m_done) shift(1); if (m_state != Start && m_state != InSingleLineComment) m_atLineStart = false; } // no identifiers allowed directly after numeric literal, e.g. "3in" is bad if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current)) m_state = Bad; // terminate string m_buffer8.append('\0');#ifdef JSC_DEBUG_LEX fprintf(stderr, "line: %d ", lineNo()); fprintf(stderr, "yytext (%x): ", m_buffer8[0]); fprintf(stderr, "%s ", m_buffer8.data());#endif double dval = 0; if (m_state == Number) dval = WTF::strtod(m_buffer8.data(), 0L); else if (m_state == Hex) { // scan hex numbers const char* p = m_buffer8.data() + 2; while (char c = *p++) { dval *= 16; dval += convertHex(c); } if (dval >= mantissaOverflowLowerBound) dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16); m_state = Number; } else if (m_state == Octal) { // scan octal number const char* p = m_buffer8.data() + 1; while (char c = *p++) { dval *= 8; dval += c - '0'; } if (dval >= mantissaOverflowLowerBound) dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8); m_state = Number; }#ifdef JSC_DEBUG_LEX switch (m_state) { case Eof: printf("(EOF)\n"); break; case Other: printf("(Other)\n"); break; case Identifier: printf("(Identifier)/(Keyword)\n"); break; case String: printf("(String)\n"); break; case Number: printf("(Number)\n"); break; default: printf("(unknown)"); }#endif if (m_state != Identifier) m_eatNextIdentifier = false; m_restrKeyword = false; m_delimited = false; llocp->first_line = yylineno; llocp->last_line = yylineno; llocp->first_column = startOffset; llocp->last_column = m_currentOffset; switch (m_state) { case Eof: token = 0; break; case Other: if (token == '}' || token == ';') m_delimited = true; break; case Identifier: // Apply anonymous-function hack below (eat the identifier). if (m_eatNextIdentifier) { m_eatNextIdentifier = false; token = lex(lvalp, llocp); break; } lvalp->ident = makeIdentifier(m_buffer16); token = IDENT; break; case IdentifierOrKeyword: { lvalp->ident = makeIdentifier(m_buffer16); const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident); if (!entry) { // Lookup for keyword failed, means this is an identifier. token = IDENT; break; } token = entry->lexerValue(); // Hack for "f = function somename() { ... }"; too hard to get into the grammar. m_eatNextIdentifier = token == FUNCTION && m_lastToken == '='; if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW) m_restrKeyword = true; break; } case String: // Atomize constant strings in case they're later used in property lookup. lvalp->ident = makeIdentifier(m_buffer16); token = STRING; break; case Number: lvalp->doubleValue = dval; token = NUMBER; break; case Bad:#ifdef JSC_DEBUG_LEX fprintf(stderr, "yylex: ERROR.\n");#endif m_error = true; return -1; default: ASSERT(!"unhandled numeration value in switch"); m_error = true; return -1; } m_lastToken = token; return token;}bool Lexer::isWhiteSpace() const{ return isWhiteSpace(m_current);}bool Lexer::isLineTerminator(){ bool cr = (m_current == '\r'); bool lf = (m_current == '\n'); if (cr) m_skipLF = true; else if (lf) m_skipCR = true; return cr || lf || m_current == 0x2028 || m_current == 0x2029;}bool Lexer::isIdentStart(int c){ return isASCIIAlpha(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)));}bool Lexer::isIdentPart(int c){ return isASCIIAlphanumeric(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)));}static bool isDecimalDigit(int c){ return isASCIIDigit(c);}bool Lexer::isHexDigit(int c){ return isASCIIHexDigit(c); }bool Lexer::isOctalDigit(int c){ return isASCIIOctalDigit(c);}int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4){ if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { shift(4); return URSHIFTEQUAL; } if (c1 == '=' && c2 == '=' && c3 == '=') { shift(3); return STREQ; } if (c1 == '!' && c2 == '=' && c3 == '=') { shift(3); return STRNEQ; } if (c1 == '>' && c2 == '>' && c3 == '>') { shift(3); return URSHIFT; } if (c1 == '<' && c2 == '<' && c3 == '=') { shift(3); return LSHIFTEQUAL; } if (c1 == '>' && c2 == '>' && c3 == '=') { shift(3); return RSHIFTEQUAL; } if (c1 == '<' && c2 == '=') { shift(2); return LE; } if (c1 == '>' && c2 == '=') { shift(2); return GE; } if (c1 == '!' && c2 == '=') { shift(2); return NE; } if (c1 == '+' && c2 == '+') { shift(2); if (m_terminator) return AUTOPLUSPLUS; return PLUSPLUS; } if (c1 == '-' && c2 == '-') { shift(2); if (m_terminator) return AUTOMINUSMINUS; return MINUSMINUS; } if (c1 == '=' && c2 == '=') { shift(2); return EQEQ; } if (c1 == '+' && c2 == '=') { shift(2); return PLUSEQUAL; } if (c1 == '-' && c2 == '=') { shift(2); return MINUSEQUAL; } if (c1 == '*' && c2 == '=') { shift(2); return MULTEQUAL; } if (c1 == '/' && c2 == '=') { shift(2); return DIVEQUAL; } if (c1 == '&' && c2 == '=') { shift(2); return ANDEQUAL; } if (c1 == '^' && c2 == '=') { shift(2); return XOREQUAL; } if (c1 == '%' && c2 == '=') { shift(2); return MODEQUAL; } if (c1 == '|' && c2 == '=') { shift(2); return OREQUAL; } if (c1 == '<' && c2 == '<') { shift(2); return LSHIFT; } if (c1 == '>' && c2 == '>') { shift(2); return RSHIFT; } if (c1 == '&' && c2 == '&') { shift(2); return AND; } if (c1 == '|' && c2 == '|') { shift(2); return OR; } switch (c1) { case '=': case '>': case '<': case ',': case '!': case '~': case '?': case ':': case '.': case '+': case '-': case '*': case '/': case '&': case '|': case '^': case '%': case '(': case ')': case '[': case ']': case ';': shift(1); return static_cast<int>(c1); case '{': charPos = m_currentOffset; shift(1); return OPENBRACE; case '}': charPos = m_currentOffset; shift(1); return CLOSEBRACE; default: return -1; }}unsigned short Lexer::singleEscape(unsigned short c){ switch (c) { case 'b': return 0x08; case 't': return 0x09; case 'n': return 0x0A; case 'v': return 0x0B; case 'f': return 0x0C; case 'r': return 0x0D; case '"': return 0x22; case '\'': return 0x27; case '\\': return 0x5C; default: return c; }}unsigned short Lexer::convertOctal(int c1, int c2, int c3){ return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');}unsigned char Lexer::convertHex(int c){ if (c >= '0' && c <= '9') return static_cast<unsigned char>(c - '0'); if (c >= 'a' && c <= 'f') return static_cast<unsigned char>(c - 'a' + 10); return static_cast<unsigned char>(c - 'A' + 10);}unsigned char Lexer::convertHex(int c1, int c2){ return ((convertHex(c1) << 4) + convertHex(c2));}UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4){ unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2); unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4); return (highByte << 8 | lowByte);}void Lexer::record8(int c){ ASSERT(c >= 0); ASSERT(c <= 0xff); m_buffer8.append(static_cast<char>(c));}void Lexer::record16(int c){ ASSERT(c >= 0); ASSERT(c <= USHRT_MAX); record16(UChar(static_cast<unsigned short>(c)));}void Lexer::record16(UChar c){ m_buffer16.append(c);}bool Lexer::scanRegExp(){ m_buffer16.clear(); bool lastWasEscape = false; bool inBrackets = false; while (1) { if (isLineTerminator() || m_current == -1) return false; else if (m_current != '/' || lastWasEscape == true || inBrackets == true) { // keep track of '[' and ']' if (!lastWasEscape) { if ( m_current == '[' && !inBrackets ) inBrackets = true; if ( m_current == ']' && inBrackets ) inBrackets = false; } record16(m_current); lastWasEscape = !lastWasEscape && (m_current == '\\'); } else { // end of regexp m_pattern = UString(m_buffer16); m_buffer16.clear(); shift(1); break; } shift(1); } while (isIdentPart(m_current)) { record16(m_current); shift(1); } m_flags = UString(m_buffer16); return true;}void Lexer::clear(){ m_identifiers.clear(); Vector<char> newBuffer8; newBuffer8.reserveInitialCapacity(initialReadBufferCapacity); m_buffer8.swap(newBuffer8); Vector<UChar> newBuffer16; newBuffer16.reserveInitialCapacity(initialReadBufferCapacity); m_buffer16.swap(newBuffer16); m_isReparsing = false; m_pattern = 0; m_flags = 0;}} // namespace JSC
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -