📄 lex.cc
字号:
} } else { if(ReadStrConst(top+1)) { // cout << "WideStringL" << endl; return token(WideStringL); } } } file->Rewind(top); } return ReadIdentifier(top); } else return ReadSeparator(c, top); #else else if(is_letter(c)) return ReadIdentifier(top); else return ReadSeparator(c, top);#endif}bool Lex::ReadCharConst(unsigned top){ char c; for(;;){ c = file->Get(); if(c == '\\'){ c = file->Get(); if(c == '\0') return false; } else if(c == '\''){ token_len = int(file->GetCurPos() - top + 1); return true; } else if(c == '\n' || c == '\0') return false; }}/* If text is a sequence of string constants like: "string1" "string2" then the string constants are delt with as a single constant.*/bool Lex::ReadStrConst(unsigned top){ char c; for(;;){ c = file->Get(); if(c == '\\'){ c = file->Get(); if(c == '\0') return false; } else if(c == '"'){ unsigned pos = file->GetCurPos() + 1; int nline = 0; do{ c = file->Get(); if(c == '\n') ++nline; } while(is_blank(c) || c == '\n'); if(c == '"') /* line_number += nline; */ ; else{ token_len = int(pos - top); file->Rewind(pos); return true; } } else if(c == '\n' || c == '\0') return false; }}int Lex::ReadNumber(char c, unsigned top){ char c2 = file->Get(); if(c == '0' && is_xletter(c2)){ do{ c = file->Get(); } while(is_hexdigit(c)); while(is_int_suffix(c)) c = file->Get(); file->Unget(); token_len = int(file->GetCurPos() - top + 1); return token(Constant); } while(is_digit(c2)) c2 = file->Get(); if(is_int_suffix(c2)) do{ c2 = file->Get(); }while(is_int_suffix(c2)); else if(c2 == '.') return ReadFloat(top); else if(is_eletter(c2)){ file->Unget(); return ReadFloat(top); } file->Unget(); token_len = int(file->GetCurPos() - top + 1); return token(Constant);}int Lex::ReadFloat(unsigned top){ char c; do{ c = file->Get(); }while(is_digit(c)); if(is_float_suffix(c)) do{ c = file->Get(); }while(is_float_suffix(c)); else if(is_eletter(c)){ unsigned p = file->GetCurPos(); c = file->Get(); if(c == '+' || c == '-'){ c = file->Get(); if(!is_digit(c)){ file->Rewind(p); token_len = int(p - top); return token(Constant); } } else if(!is_digit(c)){ file->Rewind(p); token_len = int(p - top); return token(Constant); } do{ c = file->Get(); }while(is_digit(c)); while(is_float_suffix(c)) c = file->Get(); } file->Unget(); token_len = int(file->GetCurPos() - top + 1); return token(Constant);}// ReadLineDirective() simply ignores a line beginning with '#'bool Lex::ReadLineDirective(){ char c; do{ c = file->Get(); }while(c != '\n' && c != '\0'); return true;}int Lex::ReadIdentifier(unsigned top){ char c; do{ c = file->Get(); }while(is_letter(c) || is_digit(c)); unsigned len = file->GetCurPos() - top; token_len = int(len); file->Unget(); return Screening((char*)file->Read(top), int(len));}/* This table is a list of reserved key words. Note: alphabetical order!*/static struct rw_table { char* name; long value;} table[] = {#if (defined __GNUC__) || (defined _GNUC_SYNTAX) { "__alignof__", token(SIZEOF) }, { "__asm__", token(ATTRIBUTE) }, { "__attribute__", token(ATTRIBUTE) }, { "__const", token(CONST) }, { "__extension__", token(EXTENSION) }, { "__inline", token(INLINE) }, { "__inline__", token(INLINE) }, { "__noreturn__", token(Ignore) }, { "__restrict", token(Ignore) }, { "__restrict__", token(Ignore) }, { "__signed", token(SIGNED) }, { "__signed__", token(SIGNED) }, { "__typeof", token(TYPEOF) }, { "__typeof__", token(TYPEOF) }, { "__unused__", token(Ignore) }, { "__vector", token(Ignore) },#endif { "asm", token(ATTRIBUTE) }, { "auto", token(AUTO) },#if !defined(_MSC_VER) || (_MSC_VER >= 1100) { "bool", token(BOOLEAN) },#endif { "break", token(BREAK) }, { "case", token(CASE) }, { "catch", token(CATCH) }, { "char", token(CHAR) }, { "class", token(CLASS) }, { "const", token(CONST) }, { "continue", token(CONTINUE) }, { "default", token(DEFAULT) }, { "delete", token(DELETE) }, { "do", token(DO) }, { "double", token(DOUBLE) }, { "else", token(ELSE) }, { "enum", token(ENUM) }, { "extern", token(EXTERN) }, { "float", token(FLOAT) }, { "for", token(FOR) }, { "friend", token(FRIEND) }, { "goto", token(GOTO) }, { "if", token(IF) }, { "inline", token(INLINE) }, { "int", token(INT) }, { "long", token(LONG) }, { "metaclass", token(METACLASS) }, // OpenC++ { "mutable", token(MUTABLE) }, { "namespace", token(NAMESPACE) }, { "new", token(NEW) },#if (defined __GNUC__) || (defined _GNUC_SYNTAX) { "noreturn", token(Ignore) },#endif { "operator", token(OPERATOR) }, { "private", token(PRIVATE) }, { "protected", token(PROTECTED) }, { "public", token(PUBLIC) }, { "register", token(REGISTER) }, { "return", token(RETURN) }, { "short", token(SHORT) }, { "signed", token(SIGNED) }, { "sizeof", token(SIZEOF) }, { "static", token(STATIC) }, { "struct", token(STRUCT) }, { "switch", token(SWITCH) }, { "template", token(TEMPLATE) }, { "this", token(THIS) }, { "throw", token(THROW) }, { "try", token(TRY) }, { "typedef", token(TYPEDEF) }, { "typeid", token(TYPEID) }, { "typename", token(CLASS) }, // it's not identical to class, but... { "union", token(UNION) }, { "unsigned", token(UNSIGNED) }, { "using", token(USING) }, { "virtual", token(VIRTUAL) }, { "void", token(VOID) }, { "volatile", token(VOLATILE) }, { "while", token(WHILE) }, /* NULL slot */};#ifndef NDEBUGclass rw_table_sanity_check{public: rw_table_sanity_check(const rw_table table[]) { unsigned n = (sizeof table)/(sizeof table[0]); if (n < 2) return; for (const char* old = (table++)->name; --n; old = (table++)->name) if (strcmp(old, table->name) >= 0) { cerr << "FAILED: '" << old << "' < '" << table->name << "'" << endl; assert(! "invalid order in presorted array"); } }};rw_table_sanity_check rw_table_sanity_check_instance(table);#endifstatic void InitializeOtherKeywords(bool recognizeOccExtensions){ static bool done = false; if(done) return; else done = true; if (! recognizeOccExtensions) for(unsigned int i = 0; i < sizeof(table) / sizeof(table[0]); ++i) if(table[i].value == METACLASS){ table[i].value = Identifier; break; }#if defined(_MSC_VER)// by JCAB#define verify(c) do { const bool cond = (c); assert(cond); } while (0) verify(Lex::RecordKeyword("cdecl", Ignore)); verify(Lex::RecordKeyword("_cdecl", Ignore)); verify(Lex::RecordKeyword("__cdecl", Ignore)); verify(Lex::RecordKeyword("_fastcall", Ignore)); verify(Lex::RecordKeyword("__fastcall", Ignore)); verify(Lex::RecordKeyword("_based", Ignore)); verify(Lex::RecordKeyword("__based", Ignore)); verify(Lex::RecordKeyword("_asm", ASM)); verify(Lex::RecordKeyword("__asm", ASM)); verify(Lex::RecordKeyword("_inline", INLINE)); verify(Lex::RecordKeyword("__inline", INLINE)); verify(Lex::RecordKeyword("__forceinline", INLINE)); verify(Lex::RecordKeyword("_stdcall", Ignore)); verify(Lex::RecordKeyword("__stdcall", Ignore)); verify(Lex::RecordKeyword("__declspec", DECLSPEC)); verify(Lex::RecordKeyword("__int8", CHAR)); verify(Lex::RecordKeyword("__int16", SHORT)); verify(Lex::RecordKeyword("__int32", INT)); verify(Lex::RecordKeyword("__int64", INT64));#endif}int Lex::Screening(char *identifier, int len){ struct rw_table *low, *high, *mid; int c, token; if (wcharSupport && !strncmp("wchar_t", identifier, len)) return token(WCHAR); low = table; high = &table[sizeof(table) / sizeof(table[0]) - 1]; while(low <= high){ mid = low + (high - low) / 2; if((c = strncmp(mid->name, identifier, len)) == 0) if(mid->name[len] == '\0') return mid->value; else high = mid - 1; else if(c < 0) low = mid + 1; else high = mid - 1; } if(user_keywords == 0) user_keywords = new HashTable; if(user_keywords->Lookup(identifier, len, (HashTable::Value*)&token)) return token; return token(Identifier);}int Lex::ReadSeparator(char c, unsigned top){ char c1 = file->Get(); token_len = 2; if(c1 == '='){ switch(c){ case '*' : case '/' : case '%' : case '+' : case '-' : case '&' : case '^' : case '|' : return token(AssignOp); case '=' : case '!' : return token(EqualOp); case '<' : case '>' : return token(RelOp); default : file->Unget(); token_len = 1; return SingleCharOp(c); } } else if(c == c1){ switch(c){ case '<' : case '>' : if(file->Get() != '='){ file->Unget(); return token(ShiftOp); } else{ token_len = 3; return token(AssignOp); } case '|' : return token(LogOrOp); case '&' : return token(LogAndOp); case '+' : case '-' : return token(IncOp); case ':' : return token(Scope); case '.' : if(file->Get() == '.'){ token_len = 3; return token(Ellipsis); } else file->Unget(); case '/' : return ReadComment(c1, top); default : file->Unget(); token_len = 1; return SingleCharOp(c); } } else if(c == '.' && c1 == '*') return token(PmOp); else if(c == '-' && c1 == '>') if(file->Get() == '*'){ token_len = 3; return token(PmOp); } else{ file->Unget(); return token(ArrowOp); } else if(c == '/' && c1 == '*') return ReadComment(c1, top); else{ file->Unget(); token_len = 1; return SingleCharOp(c); } cerr << "*** An invalid character has been found! (" << (int)c << ',' << (int)c1 << ")\n"; return token(BadToken);}int Lex::SingleCharOp(unsigned char c){ /* !"#$%&'()*+,-./0123456789:;<=>? */ static char valid[] = "x xx xxxxxxxx xxxxxx"; if('!' <= c && c <= '?' && valid[c - '!'] == 'x') return c; else if(c == '[' || c == ']' || c == '^') return c; else if('{' <= c && c <= '~') return c; else return token(BadToken);}int Lex::ReadComment(char c, unsigned top) { unsigned len = 0; if (c == '*') { // a nested C-style comment is prohibited. do { c = file->Get(); if (c == '*') { c = file->Get(); if (c == '/') { len = 1; break; } else { file->Unget(); } } } while(c != '\0'); } else { assert(c == '/'); do { c = file->Get(); } while(c != '\n' && c != '\0'); } len += file->GetCurPos() - top; token_len = int(len); Leaf* node = new Leaf((char*)file->Read(top), int(len)); comments = PtreeUtil::Snoc(comments, node); return Ignore;}Ptree* Lex::GetComments() { Ptree* c = comments; comments = 0; return c;}Ptree* Lex::GetComments2() { return comments;}}#ifdef TEST#include <stdio.h>#include <opencxx/parser/ProgramFromStdin.h>using namespace Opencxx;int main(){ int i = 0; Token token; Lex lex(new ProgramFromStdin); for(;;){// int t = lex.GetToken(token); int t = lex.LookAhead(i++, token); if(t == 0) break; else if(t < 128) printf("%c (%x): ", t, t); else printf("%-10.10s (%x): ", (char*)t, t); putchar('"'); while(token.len-- > 0) putchar(*token.ptr++); puts("\""); };}#endif/*line directive:^"#"{blank}*{digit}+({blank}+.*)?\npragma directive:^"#"{blank}*"pragma".*\nConstant {digit}+{int_suffix}* "0"{xletter}{hexdigit}+{int_suffix}* {digit}*\.{digit}+{float_suffix}* {digit}+\.{float_suffix}* {digit}*\.{digit}+"e"("+"|"-")*{digit}+{float_suffix}* {digit}+\."e"("+"|"-")*{digit}+{float_suffix}* {digit}+"e"("+"|"-")*{digit}+{float_suffix}*CharConst \'([^'\n]|\\[^\n])\'WideCharConst L\'([^'\n]|\\[^\n])\' !!! newStringL \"([^"\n]|\\["\n])*\"WideStringL L\"([^"\n]|\\["\n])*\" !!! newIdentifier {letter}+({letter}|{digit})*AssignOp *= /= %= += -= &= ^= <<= >>=EqualOp == !=RelOp <= >=ShiftOp << >>LogOrOp ||LogAndOp &&IncOp ++ --Scope ::Ellipsis ...PmOp .* ->*ArrowOp ->others !%^&*()-+={}|~[];:<>?,./BadToken others*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -