📄 lexer.cpp
字号:
/* * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved. * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */#include "config.h"#include "Lexer.h"#include "JSFunction.h"#include "JSGlobalObjectFunctions.h"#include "NodeInfo.h"#include "Nodes.h"#include "dtoa.h"#include <ctype.h>#include <limits.h>#include <string.h>#include <wtf/ASCIICType.h>#include <wtf/Assertions.h>using namespace WTF;using namespace Unicode;// we can't specify the namespace in yacc's C output, so do it hereusing namespace JSC;#ifndef KDE_USE_FINAL#include "Grammar.h"#endif#include "Lookup.h"#include "Lexer.lut.h"// a bridge for yacc from the C world to C++int jscyylex(void* lvalp, void* llocp, void* globalData){ return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);}namespace JSC {static bool isDecimalDigit(int);Lexer::Lexer(JSGlobalData* globalData) : yylineno(1) , m_restrKeyword(false) , m_eatNextIdentifier(false) , m_stackToken(-1) , m_lastToken(-1) , m_position(0) , m_code(0) , m_length(0) , m_isReparsing(false) , m_atLineStart(true) , m_current(0) , m_next1(0) , m_next2(0) , m_next3(0) , m_currentOffset(0) , m_nextOffset1(0) , m_nextOffset2(0) , m_nextOffset3(0) , m_globalData(globalData) , m_mainTable(JSC::mainTable){ m_buffer8.reserveInitialCapacity(initialReadBufferCapacity); m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);}Lexer::~Lexer(){ m_mainTable.deleteTable();}void Lexer::setCode(const SourceCode& source){ yylineno = source.firstLine(); m_restrKeyword = false; m_delimited = false; m_eatNextIdentifier = false; m_stackToken = -1; m_lastToken = -1; m_position = source.startOffset(); m_source = &source; m_code = source.provider()->data(); m_length = source.endOffset(); m_skipLF = false; m_skipCR = false; m_error = false; m_atLineStart = true; // read first characters shift(4);}void Lexer::shift(unsigned p){ // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM, // see <https://bugs.webkit.org/show_bug.cgi?id=4931>. while (p--) { m_current = m_next1; m_next1 = m_next2; m_next2 = m_next3; m_currentOffset = m_nextOffset1; m_nextOffset1 = m_nextOffset2; m_nextOffset2 = m_nextOffset3; do { if (m_position >= m_length) { m_nextOffset3 = m_position; m_position++; m_next3 = -1; break; } m_nextOffset3 = m_position; m_next3 = m_code[m_position++]; } while (m_next3 == 0xFEFF); }}// called on each new linevoid Lexer::nextLine(){ yylineno++; m_atLineStart = true;}void Lexer::setDone(State s){ m_state = s; m_done = true;}int Lexer::lex(void* p1, void* p2){ YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); int token = 0; m_state = Start; unsigned short stringType = 0; // either single or double quotes m_buffer8.clear(); m_buffer16.clear(); m_done = false; m_terminator = false; m_skipLF = false; m_skipCR = false; // did we push a token on the stack previously ? // (after an automatic semicolon insertion) if (m_stackToken >= 0) { setDone(Other); token = m_stackToken; m_stackToken = 0; } int startOffset = m_currentOffset; while (!m_done) { if (m_skipLF && m_current != '\n') // found \r but not \n afterwards m_skipLF = false; if (m_skipCR && m_current != '\r') // found \n but not \r afterwards m_skipCR = false; if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one m_skipLF = false; m_skipCR = false; shift(1); } switch (m_state) { case Start: startOffset = m_currentOffset; if (isWhiteSpace()) { // do nothing } else if (m_current == '/' && m_next1 == '/') { shift(1); m_state = InSingleLineComment; } else if (m_current == '/' && m_next1 == '*') { shift(1); m_state = InMultiLineComment; } else if (m_current == -1) { if (!m_terminator && !m_delimited && !m_isReparsing) { // automatic semicolon insertion if program incomplete token = ';'; m_stackToken = 0; setDone(Other); } else setDone(Eof); } else if (isLineTerminator()) { nextLine(); m_terminator = true; if (m_restrKeyword) { token = ';'; setDone(Other); } } else if (m_current == '"' || m_current == '\'') { m_state = InString; stringType = static_cast<unsigned short>(m_current); } else if (isIdentStart(m_current)) { record16(m_current); m_state = InIdentifierOrKeyword; } else if (m_current == '\\') m_state = InIdentifierStartUnicodeEscapeStart; else if (m_current == '0') { record8(m_current); m_state = InNum0; } else if (isDecimalDigit(m_current)) { record8(m_current); m_state = InNum; } else if (m_current == '.' && isDecimalDigit(m_next1)) { record8(m_current); m_state = InDecimal; // <!-- marks the beginning of a line comment (for www usage) } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { shift(3); m_state = InSingleLineComment; // same for --> } else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') { shift(2); m_state = InSingleLineComment; } else { token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3); if (token != -1) setDone(Other); else setDone(Bad); } break; case InString: if (m_current == stringType) { shift(1); setDone(String); } else if (isLineTerminator() || m_current == -1) setDone(Bad); else if (m_current == '\\') m_state = InEscapeSequence; else record16(m_current); break; // Escape Sequences inside of strings case InEscapeSequence: if (isOctalDigit(m_current)) { if (m_current >= '0' && m_current <= '3' && isOctalDigit(m_next1) && isOctalDigit(m_next2)) { record16(convertOctal(m_current, m_next1, m_next2)); shift(2); m_state = InString; } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) { record16(convertOctal('0', m_current, m_next1)); shift(1); m_state = InString; } else if (isOctalDigit(m_current)) { record16(convertOctal('0', '0', m_current)); m_state = InString; } else setDone(Bad); } else if (m_current == 'x') m_state = InHexEscape; else if (m_current == 'u') m_state = InUnicodeEscape; else if (isLineTerminator()) { nextLine(); m_state = InString; } else { record16(singleEscape(static_cast<unsigned short>(m_current))); m_state = InString; } break; case InHexEscape: if (isHexDigit(m_current) && isHexDigit(m_next1)) { m_state = InString; record16(convertHex(m_current, m_next1)); shift(1); } else if (m_current == stringType) { record16('x'); shift(1); setDone(String); } else { record16('x'); record16(m_current); m_state = InString; } break; case InUnicodeEscape: if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) { record16(convertUnicode(m_current, m_next1, m_next2, m_next3)); shift(3); m_state = InString; } else if (m_current == stringType) { record16('u'); shift(1); setDone(String); } else setDone(Bad); break; case InSingleLineComment: if (isLineTerminator()) { nextLine(); m_terminator = true; if (m_restrKeyword) { token = ';'; setDone(Other); } else m_state = Start; } else if (m_current == -1) setDone(Eof); break; case InMultiLineComment: if (m_current == -1) setDone(Bad); else if (isLineTerminator()) nextLine(); else if (m_current == '*' && m_next1 == '/') { m_state = Start; shift(1); } break; case InIdentifierOrKeyword: case InIdentifier: if (isIdentPart(m_current)) record16(m_current); else if (m_current == '\\') m_state = InIdentifierPartUnicodeEscapeStart; else setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier); break; case InNum0: if (m_current == 'x' || m_current == 'X') { record8(m_current); m_state = InHex; } else if (m_current == '.') { record8(m_current); m_state = InDecimal; } else if (m_current == 'e' || m_current == 'E') { record8(m_current); m_state = InExponentIndicator; } else if (isOctalDigit(m_current)) { record8(m_current); m_state = InOctal; } else if (isDecimalDigit(m_current)) { record8(m_current); m_state = InDecimal; } else setDone(Number); break; case InHex: if (isHexDigit(m_current)) record8(m_current); else setDone(Hex); break; case InOctal: if (isOctalDigit(m_current)) record8(m_current); else if (isDecimalDigit(m_current)) { record8(m_current); m_state = InDecimal; } else setDone(Octal); break; case InNum: if (isDecimalDigit(m_current)) record8(m_current); else if (m_current == '.') { record8(m_current); m_state = InDecimal; } else if (m_current == 'e' || m_current == 'E') { record8(m_current); m_state = InExponentIndicator; } else setDone(Number); break; case InDecimal: if (isDecimalDigit(m_current)) record8(m_current); else if (m_current == 'e' || m_current == 'E') { record8(m_current); m_state = InExponentIndicator; } else setDone(Number); break; case InExponentIndicator: if (m_current == '+' || m_current == '-') record8(m_current); else if (isDecimalDigit(m_current)) { record8(m_current); m_state = InExponent; } else setDone(Bad); break; case InExponent: if (isDecimalDigit(m_current)) record8(m_current); else setDone(Number); break; case InIdentifierStartUnicodeEscapeStart: if (m_current == 'u') m_state = InIdentifierStartUnicodeEscape; else setDone(Bad); break; case InIdentifierPartUnicodeEscapeStart: if (m_current == 'u') m_state = InIdentifierPartUnicodeEscape; else setDone(Bad); break; case InIdentifierStartUnicodeEscape: if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) { setDone(Bad); break; } token = convertUnicode(m_current, m_next1, m_next2, m_next3); shift(3); if (!isIdentStart(token)) { setDone(Bad); break; } record16(token); m_state = InIdentifier; break; case InIdentifierPartUnicodeEscape: if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) { setDone(Bad); break; } token = convertUnicode(m_current, m_next1, m_next2, m_next3); shift(3); if (!isIdentPart(token)) { setDone(Bad); break; } record16(token); m_state = InIdentifier; break; default:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -