lexer.cpp

来自「linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自Web」· C++ 代码 · 共 900 行 · 第 1/2 页
CPP
900 行
/* *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org) *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved. *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) * *  This library is free software; you can redistribute it and/or *  modify it under the terms of the GNU Library General Public *  License as published by the Free Software Foundation; either *  version 2 of the License, or (at your option) any later version. * *  This library is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU *  Library General Public License for more details. * *  You should have received a copy of the GNU Library General Public License *  along with this library; see the file COPYING.LIB.  If not, write to *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, *  Boston, MA 02110-1301, USA. * */#include "config.h"#include "Lexer.h"#include "JSFunction.h"#include "JSGlobalObjectFunctions.h"#include "NodeInfo.h"#include "Nodes.h"#include "dtoa.h"#include <ctype.h>#include <limits.h>#include <string.h>#include <wtf/ASCIICType.h>#include <wtf/Assertions.h>using namespace WTF;using namespace Unicode;// we can't specify the namespace in yacc's C output, so do it hereusing namespace JSC;#ifndef KDE_USE_FINAL#include "Grammar.h"#endif#include "Lookup.h"#include "Lexer.lut.h"// a bridge for yacc from the C world to C++int jscyylex(void* lvalp, void* llocp, void* globalData){    return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);}namespace JSC {static bool isDecimalDigit(int);Lexer::Lexer(JSGlobalData* globalData)    : yylineno(1)    , m_restrKeyword(false)    , m_eatNextIdentifier(false)    , m_stackToken(-1)    , m_lastToken(-1)    , m_position(0)    , m_code(0)    , m_length(0)    , m_isReparsing(false)    , m_atLineStart(true)    , m_current(0)    , m_next1(0)    , m_next2(0)    , m_next3(0)    , m_currentOffset(0)    , m_nextOffset1(0)    , m_nextOffset2(0)    , m_nextOffset3(0)    , m_globalData(globalData)    , m_mainTable(JSC::mainTable){    m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);    m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);}Lexer::~Lexer(){    m_mainTable.deleteTable();}void Lexer::setCode(const SourceCode& source){    yylineno = source.firstLine();    m_restrKeyword = false;    m_delimited = false;    m_eatNextIdentifier = false;    m_stackToken = -1;    m_lastToken = -1;    m_position = source.startOffset();    m_source = &source;    m_code = source.provider()->data();    m_length = source.endOffset();    m_skipLF = false;    m_skipCR = false;    m_error = false;    m_atLineStart = true;    // read first characters    shift(4);}void Lexer::shift(unsigned p){    // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,    // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.    while (p--) {        m_current = m_next1;        m_next1 = m_next2;        m_next2 = m_next3;        m_currentOffset = m_nextOffset1;        m_nextOffset1 = m_nextOffset2;        m_nextOffset2 = m_nextOffset3;        do {            if (m_position >= m_length) {                m_nextOffset3 = m_position;                m_position++;                m_next3 = -1;                break;            }            m_nextOffset3 = m_position;            m_next3 = m_code[m_position++];        } while (m_next3 == 0xFEFF);    }}// called on each new linevoid Lexer::nextLine(){    yylineno++;    m_atLineStart = true;}void Lexer::setDone(State s){    m_state = s;    m_done = true;}int Lexer::lex(void* p1, void* p2){    YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);    YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);    int token = 0;    m_state = Start;    unsigned short stringType = 0; // either single or double quotes    m_buffer8.clear();    m_buffer16.clear();    m_done = false;    m_terminator = false;    m_skipLF = false;    m_skipCR = false;    // did we push a token on the stack previously ?    // (after an automatic semicolon insertion)    if (m_stackToken >= 0) {        setDone(Other);        token = m_stackToken;        m_stackToken = 0;    }    int startOffset = m_currentOffset;    while (!m_done) {        if (m_skipLF && m_current != '\n') // found \r but not \n afterwards            m_skipLF = false;        if (m_skipCR && m_current != '\r') // found \n but not \r afterwards            m_skipCR = false;        if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one            m_skipLF = false;            m_skipCR = false;            shift(1);        }        switch (m_state) {            case Start:                startOffset = m_currentOffset;                if (isWhiteSpace()) {                    // do nothing                } else if (m_current == '/' && m_next1 == '/') {                    shift(1);                    m_state = InSingleLineComment;                } else if (m_current == '/' && m_next1 == '*') {                    shift(1);                    m_state = InMultiLineComment;                } else if (m_current == -1) {                    if (!m_terminator && !m_delimited && !m_isReparsing) {                        // automatic semicolon insertion if program incomplete                        token = ';';                        m_stackToken = 0;                        setDone(Other);                    } else                        setDone(Eof);                } else if (isLineTerminator()) {                    nextLine();                    m_terminator = true;                    if (m_restrKeyword) {                        token = ';';                        setDone(Other);                    }                } else if (m_current == '"' || m_current == '\'') {                    m_state = InString;                    stringType = static_cast<unsigned short>(m_current);                } else if (isIdentStart(m_current)) {                    record16(m_current);                    m_state = InIdentifierOrKeyword;                } else if (m_current == '\\')                    m_state = InIdentifierStartUnicodeEscapeStart;                else if (m_current == '0') {                    record8(m_current);                    m_state = InNum0;                } else if (isDecimalDigit(m_current)) {                    record8(m_current);                    m_state = InNum;                } else if (m_current == '.' && isDecimalDigit(m_next1)) {                    record8(m_current);                    m_state = InDecimal;                    // <!-- marks the beginning of a line comment (for www usage)                } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {                    shift(3);                    m_state = InSingleLineComment;                    // same for -->                } else if (m_atLineStart && m_current == '-' && m_next1 == '-' &&  m_next2 == '>') {                    shift(2);                    m_state = InSingleLineComment;                } else {                    token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);                    if (token != -1)                        setDone(Other);                    else                        setDone(Bad);                }                break;            case InString:                if (m_current == stringType) {                    shift(1);                    setDone(String);                } else if (isLineTerminator() || m_current == -1)                    setDone(Bad);                else if (m_current == '\\')                    m_state = InEscapeSequence;                else                    record16(m_current);                break;            // Escape Sequences inside of strings            case InEscapeSequence:                if (isOctalDigit(m_current)) {                    if (m_current >= '0' && m_current <= '3' &&                        isOctalDigit(m_next1) && isOctalDigit(m_next2)) {                        record16(convertOctal(m_current, m_next1, m_next2));                        shift(2);                        m_state = InString;                    } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {                        record16(convertOctal('0', m_current, m_next1));                        shift(1);                        m_state = InString;                    } else if (isOctalDigit(m_current)) {                        record16(convertOctal('0', '0', m_current));                        m_state = InString;                    } else                        setDone(Bad);                } else if (m_current == 'x')                    m_state = InHexEscape;                else if (m_current == 'u')                    m_state = InUnicodeEscape;                else if (isLineTerminator()) {                    nextLine();                    m_state = InString;                } else {                    record16(singleEscape(static_cast<unsigned short>(m_current)));                    m_state = InString;                }                break;            case InHexEscape:                if (isHexDigit(m_current) && isHexDigit(m_next1)) {                    m_state = InString;                    record16(convertHex(m_current, m_next1));                    shift(1);                } else if (m_current == stringType) {                    record16('x');                    shift(1);                    setDone(String);                } else {                    record16('x');                    record16(m_current);                    m_state = InString;                }                break;            case InUnicodeEscape:                if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {                    record16(convertUnicode(m_current, m_next1, m_next2, m_next3));                    shift(3);                    m_state = InString;                } else if (m_current == stringType) {                    record16('u');                    shift(1);                    setDone(String);                } else                    setDone(Bad);                break;            case InSingleLineComment:                if (isLineTerminator()) {                    nextLine();                    m_terminator = true;                    if (m_restrKeyword) {                        token = ';';                        setDone(Other);                    } else                        m_state = Start;                } else if (m_current == -1)                    setDone(Eof);                break;            case InMultiLineComment:                if (m_current == -1)                    setDone(Bad);                else if (isLineTerminator())                    nextLine();                else if (m_current == '*' && m_next1 == '/') {                    m_state = Start;                    shift(1);                }                break;            case InIdentifierOrKeyword:            case InIdentifier:                if (isIdentPart(m_current))                    record16(m_current);                else if (m_current == '\\')                    m_state = InIdentifierPartUnicodeEscapeStart;                else                    setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);                break;            case InNum0:                if (m_current == 'x' || m_current == 'X') {                    record8(m_current);                    m_state = InHex;                } else if (m_current == '.') {                    record8(m_current);                    m_state = InDecimal;                } else if (m_current == 'e' || m_current == 'E') {                    record8(m_current);                    m_state = InExponentIndicator;                } else if (isOctalDigit(m_current)) {                    record8(m_current);                    m_state = InOctal;                } else if (isDecimalDigit(m_current)) {                    record8(m_current);                    m_state = InDecimal;                } else                    setDone(Number);                break;            case InHex:                if (isHexDigit(m_current))                    record8(m_current);                else                    setDone(Hex);                break;            case InOctal:                if (isOctalDigit(m_current))                    record8(m_current);                else if (isDecimalDigit(m_current)) {                    record8(m_current);                    m_state = InDecimal;                } else                    setDone(Octal);                break;            case InNum:                if (isDecimalDigit(m_current))                    record8(m_current);                else if (m_current == '.') {                    record8(m_current);                    m_state = InDecimal;                } else if (m_current == 'e' || m_current == 'E') {                    record8(m_current);                    m_state = InExponentIndicator;                } else                    setDone(Number);                break;            case InDecimal:                if (isDecimalDigit(m_current))                    record8(m_current);                else if (m_current == 'e' || m_current == 'E') {                    record8(m_current);                    m_state = InExponentIndicator;                } else                    setDone(Number);                break;            case InExponentIndicator:                if (m_current == '+' || m_current == '-')                    record8(m_current);                else if (isDecimalDigit(m_current)) {                    record8(m_current);                    m_state = InExponent;                } else                    setDone(Bad);                break;            case InExponent:                if (isDecimalDigit(m_current))                    record8(m_current);                else                    setDone(Number);                break;            case InIdentifierStartUnicodeEscapeStart:                if (m_current == 'u')                    m_state = InIdentifierStartUnicodeEscape;                else                    setDone(Bad);                break;            case InIdentifierPartUnicodeEscapeStart:                if (m_current == 'u')                    m_state = InIdentifierPartUnicodeEscape;                else                    setDone(Bad);                break;            case InIdentifierStartUnicodeEscape:                if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {                    setDone(Bad);                    break;                }                token = convertUnicode(m_current, m_next1, m_next2, m_next3);                shift(3);                if (!isIdentStart(token)) {                    setDone(Bad);                    break;                }                record16(token);                m_state = InIdentifier;                break;            case InIdentifierPartUnicodeEscape:                if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {                    setDone(Bad);                    break;                }                token = convertUnicode(m_current, m_next1, m_next2, m_next3);                shift(3);                if (!isIdentPart(token)) {                    setDone(Bad);                    break;                }                record16(token);                m_state = InIdentifier;                break;            default:
lexer.cpp - 源码说明

本页面展示了「linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit」中的 lexer.cpp 源码文件，采用 C++ 编程语言编写，共 900 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与WebKit相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?