📄 lexer.cpp
字号:
// -*- c-basic-offset: 2 -*-/* * This file is part of the KDE libraries * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. * */#ifdef HAVE_CONFIG_H#include <config.h>#endif#include <ctype.h>#include <stdlib.h>#include <stdio.h>#include <string.h>#include <assert.h>#include "value.h"#include "object.h"#include "types.h"#include "interpreter.h"#include "nodes.h"#include "lexer.h"#include "identifier.h"#include "lookup.h"#include "internal.h"// we can't specify the namespace in yacc's C output, so do it hereusing namespace KJS;static Lexer *currLexer = 0;#ifndef KDE_USE_FINAL#include "grammar.h"#endif#include "lexer.lut.h"extern YYLTYPE kjsyylloc; // global bison variable holding token info// a bridge for yacc from the C world to C++int kjsyylex(){ return Lexer::curr()->lex();}Lexer::Lexer() : yylineno(1), size8(128), size16(128), restrKeyword(false), eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0), code(0), length(0),#ifndef KJS_PURE_ECMA bol(true),#endif current(0), next1(0), next2(0), next3(0), strings(0), numStrings(0), stringsCapacity(0), identifiers(0), numIdentifiers(0), identifiersCapacity(0){ // allocate space for read buffers buffer8 = new char[size8]; buffer16 = new UChar[size16]; currLexer = this;}Lexer::~Lexer(){ doneParsing(); delete [] buffer8; delete [] buffer16;}Lexer *Lexer::curr(){ if (!currLexer) { // create singleton instance currLexer = new Lexer(); } return currLexer;}#ifdef KJS_DEBUG_MEMvoid Lexer::globalClear(){ delete currLexer; currLexer = 0L;}#endifvoid Lexer::setCode(const UString &sourceURL, int startingLineNumber, const UChar *c, unsigned int len){ yylineno = 1 + startingLineNumber; m_sourceURL = sourceURL; restrKeyword = false; delimited = false; eatNextIdentifier = false; stackToken = -1; lastToken = -1; pos = 0; code = c; length = len; skipLF = false; skipCR = false;#ifndef KJS_PURE_ECMA bol = true;#endif // read first characters current = (length > 0) ? code[0].uc : 0; next1 = (length > 1) ? code[1].uc : 0; next2 = (length > 2) ? code[2].uc : 0; next3 = (length > 3) ? code[3].uc : 0;}void Lexer::shift(unsigned int p){ while (p--) { pos++; current = next1; next1 = next2; next2 = next3; next3 = (pos + 3 < length) ? code[pos+3].uc : 0; }}// called on each new linevoid Lexer::nextLine(){ yylineno++;#ifndef KJS_PURE_ECMA bol = true;#endif}void Lexer::setDone(State s){ state = s; done = true;}int Lexer::lex(){ int token = 0; state = Start; unsigned short stringType = 0; // either single or double quotes pos8 = pos16 = 0; done = false; terminator = false; skipLF = false; skipCR = false; // did we push a token on the stack previously ? // (after an automatic semicolon insertion) if (stackToken >= 0) { setDone(Other); token = stackToken; stackToken = 0; } while (!done) { if (skipLF && current != '\n') // found \r but not \n afterwards skipLF = false; if (skipCR && current != '\r') // found \n but not \r afterwards skipCR = false; if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one { skipLF = false; skipCR = false; shift(1); } switch (state) { case Start: if (isWhiteSpace()) { // do nothing } else if (current == '/' && next1 == '/') { shift(1); state = InSingleLineComment; } else if (current == '/' && next1 == '*') { shift(1); state = InMultiLineComment; } else if (current == 0) { if (!terminator && !delimited) { // automatic semicolon insertion if program incomplete token = ';'; stackToken = 0; setDone(Other); } else setDone(Eof); } else if (isLineTerminator()) { nextLine(); terminator = true; if (restrKeyword) { token = ';'; setDone(Other); } } else if (current == '"' || current == '\'') { state = InString; stringType = current; } else if (isIdentLetter(current)) { record16(current); state = InIdentifier; } else if (current == '0') { record8(current); state = InNum0; } else if (isDecimalDigit(current)) { record8(current); state = InNum; } else if (current == '.' && isDecimalDigit(next1)) { record8(current); state = InDecimal;#ifndef KJS_PURE_ECMA // <!-- marks the beginning of a line comment (for www usage) } else if (current == '<' && next1 == '!' && next2 == '-' && next3 == '-') { shift(3); state = InSingleLineComment; // same for --> } else if (bol && current == '-' && next1 == '-' && next2 == '>') { shift(2); state = InSingleLineComment;#endif } else { token = matchPunctuator(current, next1, next2, next3); if (token != -1) { setDone(Other); } else { // cerr << "encountered unknown character" << endl; setDone(Bad); } } break; case InString: if (current == stringType) { shift(1); setDone(String); } else if (current == 0 || isLineTerminator()) { setDone(Bad); } else if (current == '\\') { state = InEscapeSequence; } else { record16(current); } break; // Escape Sequences inside of strings case InEscapeSequence: if (isOctalDigit(current)) { if (current >= '0' && current <= '3' && isOctalDigit(next1) && isOctalDigit(next2)) { record16(convertOctal(current, next1, next2)); shift(2); state = InString; } else if (isOctalDigit(current) && isOctalDigit(next1)) { record16(convertOctal('0', current, next1)); shift(1); state = InString; } else if (isOctalDigit(current)) { record16(convertOctal('0', '0', current)); state = InString; } else { setDone(Bad); } } else if (current == 'x') state = InHexEscape; else if (current == 'u') state = InUnicodeEscape; else { record16(singleEscape(current)); state = InString; } break; case InHexEscape: if (isHexDigit(current) && isHexDigit(next1)) { state = InString; record16(convertHex(current, next1)); shift(1); } else if (current == stringType) { record16('x'); shift(1); setDone(String); } else { record16('x'); record16(current); state = InString; } break; case InUnicodeEscape: if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) { record16(convertUnicode(current, next1, next2, next3)); shift(3); state = InString; } else if (current == stringType) { record16('u'); shift(1); setDone(String); } else { setDone(Bad); } break; case InSingleLineComment: if (isLineTerminator()) { nextLine(); terminator = true; if (restrKeyword) { token = ';'; setDone(Other); } else state = Start; } else if (current == 0) { setDone(Eof); } break; case InMultiLineComment: if (current == 0) { setDone(Bad); } else if (isLineTerminator()) { nextLine(); } else if (current == '*' && next1 == '/') { state = Start; shift(1); } break; case InIdentifier: if (isIdentLetter(current) || isDecimalDigit(current)) { record16(current); break; } setDone(Identifier); break; case InNum0: if (current == 'x' || current == 'X') { record8(current); state = InHex; } else if (current == '.') { record8(current); state = InDecimal; } else if (current == 'e' || current == 'E') { record8(current); state = InExponentIndicator; } else if (isOctalDigit(current)) { record8(current); state = InOctal; } else if (isDecimalDigit(current)) { record8(current); state = InDecimal; } else { setDone(Number); } break; case InHex: if (isHexDigit(current)) { record8(current); } else { setDone(Hex); } break; case InOctal: if (isOctalDigit(current)) { record8(current); } else if (isDecimalDigit(current)) { record8(current); state = InDecimal; } else setDone(Octal); break; case InNum: if (isDecimalDigit(current)) { record8(current); } else if (current == '.') { record8(current); state = InDecimal; } else if (current == 'e' || current == 'E') { record8(current); state = InExponentIndicator; } else setDone(Number); break; case InDecimal: if (isDecimalDigit(current)) { record8(current); } else if (current == 'e' || current == 'E') { record8(current); state = InExponentIndicator; } else setDone(Number); break; case InExponentIndicator: if (current == '+' || current == '-') { record8(current); } else if (isDecimalDigit(current)) { record8(current); state = InExponent; } else setDone(Bad); break; case InExponent: if (isDecimalDigit(current)) { record8(current); } else setDone(Number); break; default: assert(!"Unhandled state in switch statement"); } // move on to the next character if (!done) shift(1);#ifndef KJS_PURE_ECMA if (state != Start && state != InSingleLineComment) bol = false;#endif } // no identifiers allowed directly after numeric literal, e.g. "3in" is bad if ((state == Number || state == Octal || state == Hex) && isIdentLetter(current)) state = Bad;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -