📄 lexer.cpp
字号:
// Lexer.cpp: implementation of the Lexer class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "SymbolTable.h"
#include "Lexer.h"
#include <assert.h>
#include <algorithm>
#include <iostream>
using namespace std;
extern SymbolTable symbol_table;
Lexer::Lexer(): line_no(1),
lexbuf_p(lexbuf), lexbuf_end(lexbuf+BSIZE) {
}
void Lexer::initialize(ifstream* ainFile) {
inFile = ainFile;
}
int Lexer::getLineNo() const {
return line_no;
}
Token Lexer::getVT() {
lexbuf_p = lexbuf;
char c = inFile->get();
for(; c != EOF && lexbuf_p < lexbuf_end; c = inFile->get() ) {
if (c == SymbolEntry::c_SINGLE_QUOTE) {
if (lexbuf_p == lexbuf) {
error("Lex::getVT..@" + iToString(line_no) +
"..the VT between ' and ' is empty");
}
string lex(lexbuf, lexbuf_p);
int index = symbol_table.insert(lex, SymbolEntry::VT);
return Token(SymbolEntry::VT, index); // 1) OK outlet
//'abc'ef' will get [abc], for '\'abc\' abc', will get ['abc' abc]
}
if (c == SymbolEntry::c_REVERSE_SLASH) {
c = inFile->get(); // skip escape char : c_REVERSE_SLASH
// only can use \ just before \ or '
// such as '\\n', '\'abc', 'ab\'c'
if (c != SymbolEntry::c_REVERSE_SLASH &&
c != SymbolEntry::c_SINGLE_QUOTE)
error("Lexer::getVT..@" + iToString(line_no) +
"..\\ or ' expected after \\");
*lexbuf_p++ = c;
}
else {
*lexbuf_p++ = c;
}
}//for
//2) failure outlet
if (c != EOF )
error("Lexer::getVT..@" + iToString(line_no) +
"..lexbuf overflow");
error("Lexer::VT..@" + iToString(line_no) +
"..encounter EOF and can't find the delimiter ' ");
return Token();
}
Token Lexer::getVN_epsilon(char first) {
lexbuf_p = lexbuf;
char c = first;
for(; isalnum(c) && lexbuf_p < lexbuf_end;
*lexbuf_p++ = c, c = inFile->get()) ;
if(isalnum(c)) {
error("Lexer::getVN..@" + iToString(line_no) +
"..lexbuf overflow..");
}
inFile->putback(c); //maybe WS, ';'
string lex(lexbuf, lexbuf_p);
int index;
if (first == 'e' && lex.compare(SymbolEntry::p_EPSILON) == 0) {
index = symbol_table.findInKeywords(lex); // KEYWORD: EPSILON
assert(index >= 0);
return Token(SymbolEntry::EPSILON, index);
}
index = symbol_table.insert(lex, SymbolEntry::VN); // VN: [A-Za-z][0-9a-zA-Z]*
return Token(SymbolEntry::VN, index);
}
Token Lexer::getNextToken() {
// (type,value) meaning lexeme
//============================================================================================
// 1) from SymbolEntry
// (VN,index) V: Non-terminal symbol [A-Za-z][0-9a-zA-Z]*
// (VT,index) V: terminal symbol \'Escape\' | 'epsilon'
// Escape -> NoEscapeChar Escape | '\\\\' Escape | '\\\'' Escape | epsilon;
// NoEscapeChar -> [^\'];
// (ALTER,index) KEYWORD: meta-symbol |
// (ARROW,index) KEYWORD: meta-symbol ->
// (DELIMITER,index) KEYWORD: meta-symbol ;
// (EPSILON,index) KEYWORD: meta-symbol epsilon
//--------------------------------------------------------------------------------------------
// 2) lexer using (DONE, NONE) to notify the parser ending of the token stream
// (tDONE, tNONE) EOF EOF
//--------------------------------------------------------------------------------------------
// 3) Token objects' initial value
//--------------------------------------------------------------------------------------------
// (tEMPTY, tNONE) Token objects' initial value
char c;
int index;
while (1) {
c = inFile->get();
switch(c) {
case EOF:
return Token(Token::tDONE, Token::tNONE); //EOF
case ' ':
case '\t':
case '\r':
break; // omit white space
case '\n':
++line_no; // increase number of lines
break;
case SymbolEntry::c_ALTER: // KEYWORD: ALTER
index = symbol_table.findInKeywords(SymbolEntry::p_ALTER);
return Token(SymbolEntry::ALTER, index);
case SymbolEntry::c_DELIMITER: // KEYWORD: DELIMITER
index = symbol_table.findInKeywords(SymbolEntry::p_DELIMITER);
return Token(SymbolEntry::DELIMITER, index);
case '-':
c = inFile->get();
if ( c == '>' ) {
index = symbol_table.findInKeywords("->"); // KEYWORD: ARROW
return Token(SymbolEntry::ARROW, index);
}
else {
error("Lexer::getNextToken..@" + iToString(line_no) +
"..the '-' must be quoted or the '>' must be after '-' ");
}
break;
case SymbolEntry::c_SINGLE_QUOTE : // VT: \'Escape\' such as : 'abc' : '\'abc\'' : '\\abc'
return getVT();
case SymbolEntry::c_LEFT_BRACKET:
index = symbol_table.findInKeywords(SymbolEntry::p_LEFT_BRACKET);
return Token(SymbolEntry::LEFT_BRACKET, index);
case SymbolEntry::c_RIGHT_BRACKET:
index = symbol_table.findInKeywords(SymbolEntry::p_RIGHT_BRACKET);
return Token(SymbolEntry::RIGHT_BRACKET, index);
default:
if (isalpha(c)) // KEYWORD : epsilon
return getVN_epsilon(c); // VN : [A-Za-z][0-9a-zA-Z]*
string msg("Lexer::getNextToken..@" +
iToString(line_no) + "..unknown char ");
msg += c;
msg += "\n maybe you forgot to escape it by ' and ' ";
error(msg);
break;
}//switch
}//while
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -