📄 lexer.cpp

📁 khtml在gtk上的移植版本
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
  // terminate string  buffer8[pos8] = '\0';#ifdef KJS_DEBUG_LEX  fprintf(stderr, "line: %d ", lineNo());  fprintf(stderr, "yytext (%x): ", buffer8[0]);  fprintf(stderr, "%s ", buffer8);#endif  double dval = 0;  if (state == Number) {    dval = strtod(buffer8, 0L);  } else if (state == Hex) { // scan hex numbers    // TODO: support long unsigned int    unsigned int i;    sscanf(buffer8, "%x", &i);    dval = i;    state = Number;  } else if (state == Octal) {   // scan octal number    unsigned int ui;    sscanf(buffer8, "%o", &ui);    dval = ui;    state = Number;  }#ifdef KJS_DEBUG_LEX  switch (state) {  case Eof:    printf("(EOF)\n");    break;  case Other:    printf("(Other)\n");    break;  case Identifier:    printf("(Identifier)/(Keyword)\n");    break;  case String:    printf("(String)\n");    break;  case Number:    printf("(Number)\n");    break;  default:    printf("(unknown)");  }#endif  if (state != Identifier && eatNextIdentifier)    eatNextIdentifier = false;  restrKeyword = false;  delimited = false;  kjsyylloc.first_line = yylineno; // ???  kjsyylloc.last_line = yylineno;  switch (state) {  case Eof:    token = 0;    break;  case Other:    if(token == '}' || token == ';') {      delimited = true;    }    break;  case Identifier:    if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {      // Lookup for keyword failed, means this is an identifier      // Apply anonymous-function hack below (eat the identifier)      if (eatNextIdentifier) {        eatNextIdentifier = false;        token = lex();        break;      }      kjsyylval.ident = makeIdentifier(buffer16, pos16);      token = IDENT;      break;    }    eatNextIdentifier = false;    // Hack for "f = function somename() { ... }", too hard to get into the grammar    if (token == FUNCTION && lastToken == '=' )      eatNextIdentifier = true;    if (token == CONTINUE || token == BREAK ||        token == RETURN || token == THROW)      restrKeyword = true;    break;  case String:    kjsyylval.ustr = makeUString(buffer16, pos16);    token = STRING;    break;  case Number:    kjsyylval.dval = dval;    token = NUMBER;    break;  case Bad:    fprintf(stderr, "yylex: ERROR.\n");    return -1;  default:    assert(!"unhandled numeration value in switch");    return -1;  }  lastToken = token;  return token;}bool Lexer::isWhiteSpace() const{  return (current == ' ' || current == '\t' ||          current == 0x0b || current == 0x0c || current == 0xa0);}bool Lexer::isLineTerminator(){  bool cr = (current == '\r');  bool lf = (current == '\n');  if (cr)      skipLF = true;  else if (lf)      skipCR = true;  return cr || lf;}bool Lexer::isIdentLetter(unsigned short c){  /* TODO: allow other legitimate unicode chars */  return (c >= 'a' && c <= 'z' ||          c >= 'A' && c <= 'Z' ||          c == '$' || c == '_');}bool Lexer::isDecimalDigit(unsigned short c){  return (c >= '0' && c <= '9');}bool Lexer::isHexDigit(unsigned short c) const{  return (c >= '0' && c <= '9' ||          c >= 'a' && c <= 'f' ||          c >= 'A' && c <= 'F');}bool Lexer::isOctalDigit(unsigned short c) const{  return (c >= '0' && c <= '7');}int Lexer::matchPunctuator(unsigned short c1, unsigned short c2,                              unsigned short c3, unsigned short c4){  if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {    shift(4);    return URSHIFTEQUAL;  } else if (c1 == '=' && c2 == '=' && c3 == '=') {    shift(3);    return STREQ;  } else if (c1 == '!' && c2 == '=' && c3 == '=') {    shift(3);    return STRNEQ;   } else if (c1 == '>' && c2 == '>' && c3 == '>') {    shift(3);    return URSHIFT;  } else if (c1 == '<' && c2 == '<' && c3 == '=') {    shift(3);    return LSHIFTEQUAL;  } else if (c1 == '>' && c2 == '>' && c3 == '=') {    shift(3);    return RSHIFTEQUAL;  } else if (c1 == '<' && c2 == '=') {    shift(2);    return LE;  } else if (c1 == '>' && c2 == '=') {    shift(2);    return GE;  } else if (c1 == '!' && c2 == '=') {    shift(2);    return NE;  } else if (c1 == '+' && c2 == '+') {    shift(2);    if (terminator)      return AUTOPLUSPLUS;    else      return PLUSPLUS;  } else if (c1 == '-' && c2 == '-') {    shift(2);    if (terminator)      return AUTOMINUSMINUS;    else      return MINUSMINUS;  } else if (c1 == '=' && c2 == '=') {    shift(2);    return EQEQ;  } else if (c1 == '+' && c2 == '=') {    shift(2);    return PLUSEQUAL;  } else if (c1 == '-' && c2 == '=') {    shift(2);    return MINUSEQUAL;  } else if (c1 == '*' && c2 == '=') {    shift(2);    return MULTEQUAL;  } else if (c1 == '/' && c2 == '=') {    shift(2);    return DIVEQUAL;  } else if (c1 == '&' && c2 == '=') {    shift(2);    return ANDEQUAL;  } else if (c1 == '^' && c2 == '=') {    shift(2);    return XOREQUAL;  } else if (c1 == '%' && c2 == '=') {    shift(2);    return MODEQUAL;  } else if (c1 == '|' && c2 == '=') {    shift(2);    return OREQUAL;  } else if (c1 == '<' && c2 == '<') {    shift(2);    return LSHIFT;  } else if (c1 == '>' && c2 == '>') {    shift(2);    return RSHIFT;  } else if (c1 == '&' && c2 == '&') {    shift(2);    return AND;  } else if (c1 == '|' && c2 == '|') {    shift(2);    return OR;  }  switch(c1) {    case '=':    case '>':    case '<':    case ',':    case '!':    case '~':    case '?':    case ':':    case '.':    case '+':    case '-':    case '*':    case '/':    case '&':    case '|':    case '^':    case '%':    case '(':    case ')':    case '{':    case '}':    case '[':    case ']':    case ';':      shift(1);      return static_cast<int>(c1);    default:      return -1;  }}unsigned short Lexer::singleEscape(unsigned short c) const{  switch(c) {  case 'b':    return 0x08;  case 't':    return 0x09;  case 'n':    return 0x0A;  case 'v':    return 0x0B;  case 'f':    return 0x0C;  case 'r':    return 0x0D;  case '"':    return 0x22;  case '\'':    return 0x27;  case '\\':    return 0x5C;  default:    return c;  }}unsigned short Lexer::convertOctal(unsigned short c1, unsigned short c2,                                      unsigned short c3) const{  return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');}unsigned char Lexer::convertHex(unsigned short c){  if (c >= '0' && c <= '9')    return (c - '0');  else if (c >= 'a' && c <= 'f')    return (c - 'a' + 10);  else    return (c - 'A' + 10);}unsigned char Lexer::convertHex(unsigned short c1, unsigned short c2){  return ((convertHex(c1) << 4) + convertHex(c2));}UChar Lexer::convertUnicode(unsigned short c1, unsigned short c2,                                     unsigned short c3, unsigned short c4){  return UChar((convertHex(c1) << 4) + convertHex(c2),               (convertHex(c3) << 4) + convertHex(c4));}void Lexer::record8(unsigned short c){  assert(c <= 0xff);  // enlarge buffer if full  if (pos8 >= size8 - 1) {    char *tmp = new char[2 * size8];    memcpy(tmp, buffer8, size8 * sizeof(char));    delete [] buffer8;    buffer8 = tmp;    size8 *= 2;  }  buffer8[pos8++] = (char) c;}void Lexer::record16(UChar c){  // enlarge buffer if full  if (pos16 >= size16 - 1) {    UChar *tmp = new UChar[2 * size16];    memcpy(tmp, buffer16, size16 * sizeof(UChar));    delete [] buffer16;    buffer16 = tmp;    size16 *= 2;  }  buffer16[pos16++] = c;}bool Lexer::scanRegExp(){  pos16 = 0;  bool lastWasEscape = false;  bool inBrackets = false;  while (1) {    if (isLineTerminator() || current == 0)      return false;    else if (current != '/' || lastWasEscape == true || inBrackets == true)    {        // keep track of '[' and ']'        if ( !lastWasEscape ) {          if ( current == '[' && !inBrackets )            inBrackets = true;          if ( current == ']' && inBrackets )            inBrackets = false;        }        record16(current);        lastWasEscape =            !lastWasEscape && (current == '\\');    }    else { // end of regexp      pattern = UString(buffer16, pos16);      pos16 = 0;      shift(1);      break;    }    shift(1);  }  while (isIdentLetter(current)) {    record16(current);    shift(1);  }  flags = UString(buffer16, pos16);  return true;}void Lexer::doneParsing(){  for (unsigned i = 0; i < numIdentifiers; i++) {    delete identifiers[i];  }  free (identifiers);  identifiers = 0;  numIdentifiers = 0;  identifiersCapacity = 0;  for (unsigned i = 0; i < numStrings; i++) {    delete strings[i];  }  free (strings);  strings = 0;  numStrings = 0;  stringsCapacity = 0;}const int initialCapacity = 64;const int growthFactor = 2;Identifier *Lexer::makeIdentifier(UChar *buffer, unsigned int pos){  if (numIdentifiers == identifiersCapacity) {    identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;    identifiers = (KJS::Identifier **)realloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity);  }  KJS::Identifier *identifier = new KJS::Identifier(buffer16, pos16);  identifiers[numIdentifiers++] = identifier;  return identifier;} UString *Lexer::makeUString(UChar *buffer, unsigned int pos){  if (numStrings == stringsCapacity) {    stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;    strings = (UString **)realloc(strings, sizeof(UString *) * stringsCapacity);  }  UString *string = new UString(buffer16, pos16);  strings[numStrings++] = string;  return string;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -