lexer.cpp

来自「konqueror3 embedded版本, KDE环境下的当家浏览器的嵌入式版」· C++ 代码 · 共 900 行 · 第 1/2 页

CPP
900
字号
  fprintf(stderr, "%s ", buffer8);#endif  long double dval = 0;  if (state == Number) {    dval = strtod(buffer8, 0L);  } else if (state == Hex) { // scan hex numbers    dval = 0;    if (buffer8[0] == '0' && (buffer8[1] == 'x' || buffer8[1] == 'X')) {      for (const char *p = buffer8+2; *p; p++) {	if (!isHexDigit(*p)) {	  dval = 0;	  break;	}	dval = dval * 16 + convertHex(*p);      }    }    state = Number;  } else if (state == Octal) {   // scan octal number    dval = 0;    if (buffer8[0] == '0') {      for (const char *p = buffer8+1; *p; p++) {	if (*p < '0' || *p > '7') {	  dval = 0;	  break;	}	dval = dval * 8 + *p - '0';      }    }    state = Number;  }#ifdef KJS_DEBUG_LEX  switch (state) {  case Eof:    printf("(EOF)\n");    break;  case Other:    printf("(Other)\n");    break;  case Identifier:    printf("(Identifier)/(Keyword)\n");    break;  case String:    printf("(String)\n");    break;  case Number:    printf("(Number)\n");    break;  default:    printf("(unknown)");  }#endif  if (state != Identifier && eatNextIdentifier)    eatNextIdentifier = false;  restrKeyword = false;  delimited = false;  kjsyylloc.first_line = yylineno; // ???  kjsyylloc.last_line = yylineno;  switch (state) {  case Eof:    token = 0;    break;  case Other:    if(token == '}' || token == ';') {      delimited = true;    }    break;  case Identifier:    if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {      // Lookup for keyword failed, means this is an identifier      // Apply anonymous-function hack below (eat the identifier)      if (eatNextIdentifier) {        eatNextIdentifier = false;#ifdef KJS_VERBOSE        UString debugstr(buffer16, pos16); fprintf(stderr,"Anonymous function hack: eating identifier %s\n",debugstr.ascii());#endif        token = lex();        break;      }      /* TODO: close leak on parse error. same holds true for String */      kjsyylval.ident = makeIdentifier(buffer16, pos16);      token = IDENT;      break;    }    eatNextIdentifier = false;    // Hack for "f = function somename() { ... }", too hard to get into the grammar    // Same for building an array with function pointers ( 'name', func1, 'name2', func2 )    // There are lots of other uses, we really have to get this into the grammar    if ( token == FUNCTION &&         ( lastToken == '=' || lastToken == ',' || lastToken == '(' ) )            eatNextIdentifier = true;    if (token == CONTINUE || token == BREAK ||        token == RETURN || token == THROW)      restrKeyword = true;    break;  case String:    kjsyylval.ustr = makeUString(buffer16, pos16);    token = STRING;    break;  case Number:    kjsyylval.dval = dval;    token = NUMBER;    break;  case Bad:    foundBad = true;    return -1;  default:    assert(!"unhandled numeration value in switch");    return -1;  }  lastToken = token;  return token;}bool Lexer::isWhiteSpace(unsigned short c){  return (c == ' ' || c == '\t' ||          c == 0x0b || c == 0x0c || c == 0xa0);}bool Lexer::isIdentLetter(unsigned short c){  // Allow any character in the Unicode categories  // Uppercase letter (Lu), Lowercase letter (Ll),  // Titlecase letter (Lt)", Modifier letter (Lm),  // Other letter (Lo), or Letter number (Nl).  // Also see: http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */  return (c >= 'a' && c <= 'z' ||          c >= 'A' && c <= 'Z' ||          // A with grave - O with diaeresis          c >= 0x00c0 && c <= 0x00d6 ||          // O with stroke - o with diaeresis          c >= 0x00d8 && c <= 0x00f6 ||          // o with stroke - turned h with fishook and tail          c >= 0x00f8 && c <= 0x02af ||          // Greek etc. TODO: not precise          c >= 0x0388 && c <= 0x1ffc ||          c == '$' || c == '_');  /* TODO: use complete category table */}bool Lexer::isDecimalDigit(unsigned short c){  return (c >= '0' && c <= '9');}bool Lexer::isHexDigit(unsigned short c){  return (c >= '0' && c <= '9' ||          c >= 'a' && c <= 'f' ||          c >= 'A' && c <= 'F');}bool Lexer::isOctalDigit(unsigned short c){  return (c >= '0' && c <= '7');}int Lexer::matchPunctuator(unsigned short c1, unsigned short c2,                              unsigned short c3, unsigned short c4){  if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {    shift(4);    return URSHIFTEQUAL;  } else if (c1 == '=' && c2 == '=' && c3 == '=') {    shift(3);    return STREQ;  } else if (c1 == '!' && c2 == '=' && c3 == '=') {    shift(3);    return STRNEQ;   } else if (c1 == '>' && c2 == '>' && c3 == '>') {    shift(3);    return URSHIFT;  } else if (c1 == '<' && c2 == '<' && c3 == '=') {    shift(3);    return LSHIFTEQUAL;  } else if (c1 == '>' && c2 == '>' && c3 == '=') {    shift(3);    return RSHIFTEQUAL;  } else if (c1 == '<' && c2 == '=') {    shift(2);    return LE;  } else if (c1 == '>' && c2 == '=') {    shift(2);    return GE;  } else if (c1 == '!' && c2 == '=') {    shift(2);    return NE;  } else if (c1 == '+' && c2 == '+') {    shift(2);    if (terminator)      return AUTOPLUSPLUS;    else      return PLUSPLUS;  } else if (c1 == '-' && c2 == '-') {    shift(2);    if (terminator)      return AUTOMINUSMINUS;    else      return MINUSMINUS;  } else if (c1 == '=' && c2 == '=') {    shift(2);    return EQEQ;  } else if (c1 == '+' && c2 == '=') {    shift(2);    return PLUSEQUAL;  } else if (c1 == '-' && c2 == '=') {    shift(2);    return MINUSEQUAL;  } else if (c1 == '*' && c2 == '=') {    shift(2);    return MULTEQUAL;  } else if (c1 == '/' && c2 == '=') {    shift(2);    return DIVEQUAL;  } else if (c1 == '&' && c2 == '=') {    shift(2);    return ANDEQUAL;  } else if (c1 == '^' && c2 == '=') {    shift(2);    return XOREQUAL;  } else if (c1 == '%' && c2 == '=') {    shift(2);    return MODEQUAL;  } else if (c1 == '|' && c2 == '=') {    shift(2);    return OREQUAL;  } else if (c1 == '<' && c2 == '<') {    shift(2);    return LSHIFT;  } else if (c1 == '>' && c2 == '>') {    shift(2);    return RSHIFT;  } else if (c1 == '&' && c2 == '&') {    shift(2);    return AND;  } else if (c1 == '|' && c2 == '|') {    shift(2);    return OR;  }  switch(c1) {    case '=':    case '>':    case '<':    case ',':    case '!':    case '~':    case '?':    case ':':    case '.':    case '+':    case '-':    case '*':    case '/':    case '&':    case '|':    case '^':    case '%':    case '(':    case ')':    case '{':    case '}':    case '[':    case ']':    case ';':      shift(1);      return static_cast<int>(c1);    default:      return -1;  }}unsigned short Lexer::singleEscape(unsigned short c) const{  switch(c) {  case 'b':    return 0x08;  case 't':    return 0x09;  case 'n':    return 0x0A;  case 'v':    return 0x0B;  case 'f':    return 0x0C;  case 'r':    return 0x0D;  case '"':    return 0x22;  case '\'':    return 0x27;  case '\\':    return 0x5C;  default:    return c;  }}unsigned short Lexer::convertOctal(unsigned short c1, unsigned short c2,                                      unsigned short c3) const{  return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');}unsigned char Lexer::convertHex(unsigned short c){  if (c >= '0' && c <= '9')    return (c - '0');  else if (c >= 'a' && c <= 'f')    return (c - 'a' + 10);  else    return (c - 'A' + 10);}unsigned char Lexer::convertHex(unsigned short c1, unsigned short c2){  return ((convertHex(c1) << 4) + convertHex(c2));}UChar Lexer::convertUnicode(unsigned short c1, unsigned short c2,                                     unsigned short c3, unsigned short c4){  return UChar((convertHex(c1) << 4) + convertHex(c2),               (convertHex(c3) << 4) + convertHex(c4));}void Lexer::record8(unsigned short c){  assert(c <= 0xff);  // enlarge buffer if full  if (pos8 >= size8 - 1) {    char *tmp = new char[2 * size8];    memcpy(tmp, buffer8, size8 * sizeof(char));    delete [] buffer8;    buffer8 = tmp;    size8 *= 2;  }  buffer8[pos8++] = (char) c;}void Lexer::record16(UChar c){  // enlarge buffer if full  if (pos16 >= size16 - 1) {    UChar *tmp = new UChar[2 * size16];    memcpy(tmp, buffer16, size16 * sizeof(UChar));    delete [] buffer16;    buffer16 = tmp;    size16 *= 2;  }  buffer16[pos16++] = c;}bool Lexer::scanRegExp(){  pos16 = 0;  bool lastWasEscape = false;  bool inBrackets = false;  while (1) {    if (current == '\r' || current == '\n' || current == 0)      return false;    else if (current != '/' || lastWasEscape == true || inBrackets == true)    {        // keep track of '[' and ']'        if ( !lastWasEscape ) {          if ( current == '[' && !inBrackets )            inBrackets = true;          if ( current == ']' && inBrackets )            inBrackets = false;        }        record16(current);        lastWasEscape =            !lastWasEscape && (current == '\\');    }    else { // end of regexp      pattern = UString(buffer16, pos16);      pos16 = 0;      shift(1);      break;    }    shift(1);  }  while (isIdentLetter(current)) {    record16(current);    shift(1);  }  flags = UString(buffer16, pos16);  return true;}void Lexer::doneParsing(){  for (unsigned i = 0; i < numIdentifiers; i++) {    delete identifiers[i];  }  free(identifiers);  identifiers = 0;  numIdentifiers = 0;  identifiersCapacity = 0;  for (unsigned i = 0; i < numStrings; i++) {    delete strings[i];  }  free(strings);  strings = 0;  numStrings = 0;  stringsCapacity = 0;}const int initialCapacity = 64;const int growthFactor = 2;Identifier *Lexer::makeIdentifier(UChar *buffer, unsigned int pos){  if (numIdentifiers == identifiersCapacity) {    identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;    identifiers = (KJS::Identifier **)realloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity);  }  KJS::Identifier *identifier = new KJS::Identifier(buffer, pos);  identifiers[numIdentifiers++] = identifier;  return identifier;}UString *Lexer::makeUString(UChar *buffer, unsigned int pos){  if (numStrings == stringsCapacity) {    stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;    strings = (UString **)realloc(strings, sizeof(UString *) * stringsCapacity);  }  UString *string = new UString(buffer, pos);  strings[numStrings++] = string;  return string;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?