📄 lexer.cpp

📁 khtml在gtk上的移植版本
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
// -*- c-basic-offset: 2 -*-/* *  This file is part of the KDE libraries *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org) * *  This library is free software; you can redistribute it and/or *  modify it under the terms of the GNU Library General Public *  License as published by the Free Software Foundation; either *  version 2 of the License, or (at your option) any later version. * *  This library is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU *  Library General Public License for more details. * *  You should have received a copy of the GNU Library General Public License *  along with this library; see the file COPYING.LIB.  If not, write to *  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, *  Boston, MA 02111-1307, USA. * */#ifdef HAVE_CONFIG_H#include <config.h>#endif#include <ctype.h>#include <stdlib.h>#include <stdio.h>#include <string.h>#include <assert.h>#include "value.h"#include "object.h"#include "types.h"#include "interpreter.h"#include "nodes.h"#include "lexer.h"#include "identifier.h"#include "lookup.h"#include "internal.h"// we can't specify the namespace in yacc's C output, so do it hereusing namespace KJS;static Lexer *currLexer = 0;#ifndef KDE_USE_FINAL#include "grammar.h"#endif#include "lexer.lut.h"extern YYLTYPE kjsyylloc; // global bison variable holding token info// a bridge for yacc from the C world to C++int kjsyylex(){  return Lexer::curr()->lex();}Lexer::Lexer()  : yylineno(1),    size8(128), size16(128), restrKeyword(false),    eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),    code(0), length(0),#ifndef KJS_PURE_ECMA    bol(true),#endif    current(0), next1(0), next2(0), next3(0),    strings(0), numStrings(0), stringsCapacity(0),    identifiers(0), numIdentifiers(0), identifiersCapacity(0){  // allocate space for read buffers  buffer8 = new char[size8];  buffer16 = new UChar[size16];  currLexer = this;}Lexer::~Lexer(){  doneParsing();  delete [] buffer8;  delete [] buffer16;}Lexer *Lexer::curr(){  if (!currLexer) {    // create singleton instance    currLexer = new Lexer();  }  return currLexer;}#ifdef KJS_DEBUG_MEMvoid Lexer::globalClear(){  delete currLexer;  currLexer = 0L;}#endifvoid Lexer::setCode(const UString &sourceURL, int startingLineNumber, const UChar *c, unsigned int len){  yylineno = 1 + startingLineNumber;  m_sourceURL = sourceURL;  restrKeyword = false;  delimited = false;  eatNextIdentifier = false;  stackToken = -1;  lastToken = -1;  pos = 0;  code = c;  length = len;  skipLF = false;  skipCR = false;#ifndef KJS_PURE_ECMA  bol = true;#endif  // read first characters  current = (length > 0) ? code[0].uc : 0;  next1 = (length > 1) ? code[1].uc : 0;  next2 = (length > 2) ? code[2].uc : 0;  next3 = (length > 3) ? code[3].uc : 0;}void Lexer::shift(unsigned int p){  while (p--) {    pos++;    current = next1;    next1 = next2;    next2 = next3;    next3 = (pos + 3 < length) ? code[pos+3].uc : 0;  }}// called on each new linevoid Lexer::nextLine(){  yylineno++;#ifndef KJS_PURE_ECMA  bol = true;#endif}void Lexer::setDone(State s){  state = s;  done = true;}int Lexer::lex(){  int token = 0;  state = Start;  unsigned short stringType = 0; // either single or double quotes  pos8 = pos16 = 0;  done = false;  terminator = false;  skipLF = false;  skipCR = false;  // did we push a token on the stack previously ?  // (after an automatic semicolon insertion)  if (stackToken >= 0) {    setDone(Other);    token = stackToken;    stackToken = 0;  }  while (!done) {    if (skipLF && current != '\n') // found \r but not \n afterwards        skipLF = false;    if (skipCR && current != '\r') // found \n but not \r afterwards        skipCR = false;    if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one    {        skipLF = false;        skipCR = false;        shift(1);    }    switch (state) {    case Start:      if (isWhiteSpace()) {        // do nothing      } else if (current == '/' && next1 == '/') {        shift(1);        state = InSingleLineComment;      } else if (current == '/' && next1 == '*') {        shift(1);        state = InMultiLineComment;      } else if (current == 0) {        if (!terminator && !delimited) {          // automatic semicolon insertion if program incomplete          token = ';';          stackToken = 0;          setDone(Other);        } else          setDone(Eof);      } else if (isLineTerminator()) {        nextLine();        terminator = true;        if (restrKeyword) {          token = ';';          setDone(Other);        }      } else if (current == '"' || current == '\'') {        state = InString;        stringType = current;      } else if (isIdentLetter(current)) {        record16(current);        state = InIdentifier;      } else if (current == '0') {        record8(current);        state = InNum0;      } else if (isDecimalDigit(current)) {        record8(current);        state = InNum;      } else if (current == '.' && isDecimalDigit(next1)) {        record8(current);        state = InDecimal;#ifndef KJS_PURE_ECMA        // <!-- marks the beginning of a line comment (for www usage)      } else if (current == '<' && next1 == '!' &&                 next2 == '-' && next3 == '-') {        shift(3);        state = InSingleLineComment;        // same for -->      } else if (bol && current == '-' && next1 == '-' &&  next2 == '>') {        shift(2);        state = InSingleLineComment;#endif      } else {        token = matchPunctuator(current, next1, next2, next3);        if (token != -1) {          setDone(Other);        } else {          //      cerr << "encountered unknown character" << endl;          setDone(Bad);        }      }      break;    case InString:      if (current == stringType) {        shift(1);        setDone(String);      } else if (current == 0 || isLineTerminator()) {        setDone(Bad);      } else if (current == '\\') {        state = InEscapeSequence;      } else {        record16(current);      }      break;    // Escape Sequences inside of strings    case InEscapeSequence:      if (isOctalDigit(current)) {        if (current >= '0' && current <= '3' &&            isOctalDigit(next1) && isOctalDigit(next2)) {          record16(convertOctal(current, next1, next2));          shift(2);          state = InString;        } else if (isOctalDigit(current) && isOctalDigit(next1)) {          record16(convertOctal('0', current, next1));          shift(1);          state = InString;        } else if (isOctalDigit(current)) {          record16(convertOctal('0', '0', current));          state = InString;        } else {          setDone(Bad);        }      } else if (current == 'x')        state = InHexEscape;      else if (current == 'u')        state = InUnicodeEscape;      else {        record16(singleEscape(current));        state = InString;      }      break;    case InHexEscape:      if (isHexDigit(current) && isHexDigit(next1)) {        state = InString;        record16(convertHex(current, next1));        shift(1);      } else if (current == stringType) {        record16('x');        shift(1);        setDone(String);      } else {        record16('x');        record16(current);        state = InString;      }      break;    case InUnicodeEscape:      if (isHexDigit(current) && isHexDigit(next1) &&          isHexDigit(next2) && isHexDigit(next3)) {        record16(convertUnicode(current, next1, next2, next3));        shift(3);        state = InString;      } else if (current == stringType) {        record16('u');        shift(1);        setDone(String);      } else {        setDone(Bad);      }      break;    case InSingleLineComment:      if (isLineTerminator()) {        nextLine();        terminator = true;        if (restrKeyword) {          token = ';';          setDone(Other);        } else          state = Start;      } else if (current == 0) {        setDone(Eof);      }      break;    case InMultiLineComment:      if (current == 0) {        setDone(Bad);      } else if (isLineTerminator()) {        nextLine();      } else if (current == '*' && next1 == '/') {        state = Start;        shift(1);      }      break;    case InIdentifier:      if (isIdentLetter(current) || isDecimalDigit(current)) {        record16(current);        break;      }      setDone(Identifier);      break;    case InNum0:      if (current == 'x' || current == 'X') {        record8(current);        state = InHex;      } else if (current == '.') {        record8(current);        state = InDecimal;      } else if (current == 'e' || current == 'E') {        record8(current);        state = InExponentIndicator;      } else if (isOctalDigit(current)) {        record8(current);        state = InOctal;      } else if (isDecimalDigit(current)) {        record8(current);        state = InDecimal;      } else {        setDone(Number);      }      break;    case InHex:      if (isHexDigit(current)) {        record8(current);      } else {        setDone(Hex);      }      break;    case InOctal:      if (isOctalDigit(current)) {        record8(current);      }      else if (isDecimalDigit(current)) {        record8(current);        state = InDecimal;      } else        setDone(Octal);      break;    case InNum:      if (isDecimalDigit(current)) {        record8(current);      } else if (current == '.') {        record8(current);        state = InDecimal;      } else if (current == 'e' || current == 'E') {        record8(current);        state = InExponentIndicator;      } else        setDone(Number);      break;    case InDecimal:      if (isDecimalDigit(current)) {        record8(current);      } else if (current == 'e' || current == 'E') {        record8(current);        state = InExponentIndicator;      } else        setDone(Number);      break;    case InExponentIndicator:      if (current == '+' || current == '-') {        record8(current);      } else if (isDecimalDigit(current)) {        record8(current);        state = InExponent;      } else        setDone(Bad);      break;    case InExponent:      if (isDecimalDigit(current)) {        record8(current);      } else        setDone(Number);      break;    default:      assert(!"Unhandled state in switch statement");    }    // move on to the next character    if (!done)      shift(1);#ifndef KJS_PURE_ECMA    if (state != Start && state != InSingleLineComment)      bol = false;#endif  }  // no identifiers allowed directly after numeric literal, e.g. "3in" is bad  if ((state == Number || state == Octal || state == Hex)      && isIdentLetter(current))    state = Bad;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -