📄 lex.cc
字号:
//@beginlicenses@//@license{chiba_tokyo}{}@//@license{xerox}{}@//@license{contributors}{}@//// Permission to use, copy, distribute and modify this software and its // documentation for any purpose is hereby granted without fee, provided that// the above copyright notice appears in all copies and that both that copyright// notice and this permission notice appear in supporting documentation.// // 1997-2001 Shigeru Chiba, Tokyo Institute of Technology. make(s) no representations about the suitability of this// software for any purpose. It is provided "as is" without express or implied// warranty.// // Copyright (C) 1997-2001 Shigeru Chiba, Tokyo Institute of Technology.//// -----------------------------------------------------------------////// Copyright (c) 1995, 1996 Xerox Corporation.// All Rights Reserved.//// Use and copying of this software and preparation of derivative works// based upon this software are permitted. Any copy of this software or// of any derivative work must include the above copyright notice of // Xerox Corporation, this paragraph and the one after it. Any// distribution of this software or derivative works must comply with all// applicable United States export control laws.//// This software is made available AS IS, and XEROX CORPORATION DISCLAIMS// ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE, AND NOTWITHSTANDING ANY OTHER PROVISION CONTAINED HEREIN, ANY// LIABILITY FOR DAMAGES RESULTING FROM THE SOFTWARE OR ITS USE IS// EXPRESSLY DISCLAIMED, WHETHER ARISING IN CONTRACT, TORT (INCLUDING// NEGLIGENCE) OR STRICT LIABILITY, EVEN IF XEROX CORPORATION IS ADVISED// OF THE POSSIBILITY OF SUCH DAMAGES.//// -----------------------------------------------------------------//// Permission to use, copy, distribute and modify this software and its // documentation for any purpose is hereby granted without fee, provided that// the above copyright notice appears in all copies and that both that copyright// notice and this permission notice appear in supporting documentation.// // Other Contributors (see file AUTHORS) make(s) no representations about the suitability of this// software for any purpose. It is provided "as is" without express or implied// warranty.// // Copyright (C) Other Contributors (see file AUTHORS)////@endlicenses@#include <cassert>#include <cstdlib>#include <cstring>#include <iostream>#include <opencxx/parser/Lex.h>#include <opencxx/parser/token-names.h>#include <opencxx/parser/Token.h>#include <opencxx/parser/HashTable.h>#include <opencxx/parser/ptreeAll.h>#include <opencxx/parser/Program.h>#include <opencxx/parser/auxil.h>#include <opencxx/parser/GC.h>// #include <opencxx/driver.h>using namespace std;namespace Opencxx{static void InitializeOtherKeywords(bool);#ifdef TEST#if (defined __GNUC__)#define token(x) (long)#x#else#define token(x) (long)"x"#endif#else#define token(x) x#endif// class LexHashTable* Lex::user_keywords = 0;Ptree* Lex::comments = 0;Lex::Lex(Program* aFile, bool wchars, bool recognizeOccExtensions) : file(aFile) , fifo(this) , wcharSupport(wchars){ file->Rewind(); last_token = '\n'; tokenp = 0; token_len = 0; InitializeOtherKeywords(recognizeOccExtensions);}char* Lex::Save(){ char* pos; int len; fifo.Peek(0, pos, len); return pos;}void Lex::Restore(char* pos){ last_token = '\n'; tokenp = 0; token_len = 0; fifo.Clear(); Rewind(pos);}// ">>" is either the shift operator or double closing brackets.void Lex::GetOnlyClosingBracket(Token& t){ Restore(t.ptr + 1);}unsigned Lex::LineNumber(char* pos, char*& ptr, int& len){ return file->LineNumber(pos, ptr, len);}int Lex::GetToken(Token& t){ t.kind = fifo.Pop(t.ptr, t.len); return t.kind;}int Lex::LookAhead(int offset){ return fifo.Peek(offset);}int Lex::LookAhead(int offset, Token& t){ t.kind = fifo.Peek(offset, t.ptr, t.len); return t.kind;}char* Lex::TokenPosition(){ return (char*)file->Read(Tokenp());}char Lex::Ref(unsigned i){ return file->Ref(i);}void Lex::Rewind(char* p){ file->Rewind(p - file->Read(0));}bool Lex::RecordKeyword(char* keyword, int token){ int index; char* str; if(keyword == 0) return false; str = new(GC) char[strlen(keyword) + 1]; strcpy(str, keyword); if(user_keywords == 0) user_keywords = new HashTable; if(user_keywords->AddEntry(str, (HashTable::Value)token, &index) >= 0) return true; else return bool(user_keywords->Peek(index) == (HashTable::Value)token);}bool Lex::Reify(Ptree* t, unsigned int& value){ if(t == 0 || !t->IsLeaf()) return false; char* p = t->GetPosition(); int len = t->GetLength(); value = 0; if(len > 2 && *p == '0' && is_xletter(p[1])){ for(int i = 2; i < len; ++i){ char c = p[i]; if(is_digit(c)) value = value * 0x10 + (c - '0'); else if('A' <= c && c <= 'F') value = value * 0x10 + (c - 'A' + 10); else if('a' <= c && c <= 'f') value = value * 0x10 + (c - 'a' + 10); else if(is_int_suffix(c)) break; else return false; } return true; } else if(len > 0 && is_digit(*p)){ for(int i = 0; i < len; ++i){ char c = p[i]; if(is_digit(c)) value = value * 10 + c - '0'; else if(is_int_suffix(c)) break; else return false; } return true; } else return false;}// Reify() doesn't interpret an escape character.bool Lex::Reify(Ptree* t, char*& str){ if(t == 0 || !t->IsLeaf()) return false; char* p = t->GetPosition(); int length = t->GetLength(); if(*p != '"') return false; else{ str = new(GC) char[length]; char* sp = str; for(int i = 1; i < length; ++i) if(p[i] != '"'){ *sp++ = p[i]; if(p[i] == '\\' && i + 1 < length) *sp++ = p[++i]; } else while(++i < length && p[i] != '"') ; *sp = '\0'; return true; }}// class TokenFifoLex::TokenFifo::TokenFifo(Lex* l){ lex = l; size = 16; ring = new (GC) Slot[size]; head = tail = 0;}Lex::TokenFifo::~TokenFifo(){ // delete [] ring;}void Lex::TokenFifo::Clear(){ head = tail = 0;}void Lex::TokenFifo::Push(int token, char* pos, int len){ const int Plus = 16; ring[head].token = token; ring[head].pos = pos; ring[head].len = len; head = (head + 1) % size; if(head == tail){ Slot* ring2 = new (GC) Slot[size + Plus]; int i = 0; do{ ring2[i++] = ring[tail]; tail = (tail + 1) % size; } while(head != tail); head = i; tail = 0; size += Plus; // delete [] ring; ring = ring2; }}int Lex::TokenFifo::Pop(char*& pos, int& len){ if(head == tail) return lex->ReadToken(pos, len); int t = ring[tail].token; pos = ring[tail].pos; len = ring[tail].len; tail = (tail + 1) % size; return t;}int Lex::TokenFifo::Peek(int offset){ return ring[Peek2(offset)].token;}int Lex::TokenFifo::Peek(int offset, char*& pos, int& len){ int cur = Peek2(offset); pos = ring[cur].pos; len = ring[cur].len; return ring[cur].token;}int Lex::TokenFifo::Peek2(int offset){ int i; int cur = tail; for(i = 0; i <= offset; ++i){ if(head == cur){ while(i++ <= offset){ char* p; int l; int t = lex->ReadToken(p, l); Push(t, p, l); } break; } cur = (cur + 1) % size; } return (tail + offset) % size;}/* Lexical Analyzer*/int Lex::ReadToken(char*& ptr, int& len){ int t; for(;;){ t = ReadLine(); if(t == Ignore) continue; last_token = t;#if (defined __GNUC__) || (defined _GNUC_SYNTAX) if(t == ATTRIBUTE){ SkipAttributeToken(); continue; } else if(t == EXTENSION){ t = SkipExtensionToken(ptr, len); if(t == Ignore) continue; else return t; }#endif#if defined(_MSC_VER) if(t == ASM){ SkipAsmToken(); continue; } else if(t == DECLSPEC){ SkipDeclspecToken(); continue; }#endif if(t != '\n') break; } ptr = TokenPosition(); len = TokenLen(); return t;}// SkipAttributeToken() skips __attribute__(...), __asm__(...), ...void Lex::SkipAttributeToken(){ char c; do{ c = file->Get(); }while(c != '(' && c != '\0'); int i = 1; do{ c = file->Get(); if(c == '(') ++i; else if(c == ')') --i; else if(c == '\0') break; } while(i > 0);}// SkipExtensionToken() skips __extension__(...).int Lex::SkipExtensionToken(char*& ptr, int& len){ ptr = TokenPosition(); len = TokenLen(); char c; do{ c = file->Get(); }while(is_blank(c) || c == '\n'); if(c != '('){ file->Unget(); return Ignore; // if no (..) follows, ignore __extension__ } int i = 1; do{ c = file->Get(); if(c == '(') ++i; else if(c == ')') --i; else if(c == '\0') break; } while(i > 0); return Identifier; // regards it as the identifier __extension__}#if defined(_MSC_VER)#define CHECK_END_OF_INSTRUCTION(C, EOI) \ if (C == '\0') return; \ if (strchr(EOI, C)) { \ this->file->Unget(); \ return; \ }/* SkipAsmToken() skips __asm ... You can have the following : Just count the '{' and '}' and it should be ok __asm { mov ax,1 mov bx,1 } Stop when EOL found. Note that the first ';' after an __asm instruction is an ASM comment ! int v; __asm mov ax,1 __asm mov bx,1; v=1; Stop when '}' found if (cond) {__asm mov ax,1 __asm mov bx,1} and certainly more...*/void Lex::SkipAsmToken(){ char c; do{ c = file->Get(); CHECK_END_OF_INSTRUCTION(c, ""); }while(is_blank(c) || c == '\n'); if(c == '{'){ int i = 1; do{ c = file->Get(); CHECK_END_OF_INSTRUCTION(c, ""); if(c == '{') ++i; else if(c == '}') --i; } while(i > 0); } else{ for(;;){ CHECK_END_OF_INSTRUCTION(c, "}\n"); c = file->Get(); } }}// SkipDeclspecToken() skips __declspec(...).void Lex::SkipDeclspecToken(){ char c; do{ c = file->Get(); CHECK_END_OF_INSTRUCTION(c, ""); }while(is_blank(c)); if (c == '(') { int i = 1; do{ c = file->Get(); CHECK_END_OF_INSTRUCTION(c, "};"); if(c == '(') ++i; else if(c == ')') --i; }while(i > 0); }}#undef CHECK_END_OF_INSTRUCTION#endif /* _MSC_VER */char Lex::GetNextNonWhiteChar(){ char c; for(;;){ do{ c = file->Get(); }while(is_blank(c)); if(c != '\\') break; c = file->Get(); if(c != '\n' && c!= '\r') { file->Unget(); break; } } return c;}int Lex::ReadLine(){ char c; unsigned top; c = GetNextNonWhiteChar(); tokenp = top = file->GetCurPos(); if(c == '\0'){ file->Unget(); return '\0'; } else if(c == '\n') return '\n'; else if(c == '#' && last_token == '\n'){ if(ReadLineDirective()) return '\n'; else{ file->Rewind(top + 1); token_len = 1; return SingleCharOp(c); } } else if(c == '\'' || c == '"'){ if(c == '\''){ if(ReadCharConst(top)) return token(CharConst); } else{ if(ReadStrConst(top)) return token(StringL); } file->Rewind(top + 1); token_len = 1; return SingleCharOp(c); } else if(is_digit(c)) return ReadNumber(c, top); else if(c == '.'){ c = file->Get(); if(is_digit(c)) return ReadFloat(top); else{ file->Unget(); return ReadSeparator('.', top); } }#if 1 // for wchar constants !!! else if(is_letter(c)) { if (c == 'L') { c = file->Get(); if (c == '\'' || c == '"') { if (c == '\'') { if (ReadCharConst(top+1)) { // cout << "WideCharConst" << endl; return token(WideCharConst);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -