📄 lexer.cpp

📁 compiler principle how to ananilyze
💻 CPP
字号:
// Lexer.cpp: implementation of the Lexer class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "SymbolTable.h"
#include "Lexer.h"

#include <assert.h>
#include <algorithm>
#include <iostream>

using namespace std;

extern SymbolTable symbol_table; 


Lexer::Lexer(): line_no(1), 
lexbuf_p(lexbuf), lexbuf_end(lexbuf+BSIZE)  {
	
}

void Lexer::initialize(ifstream* ainFile) {

	inFile = ainFile;
}

int Lexer::getLineNo() const {
	return line_no;
}

Token Lexer::getVT() {

	lexbuf_p = lexbuf;

	char c = inFile->get();
	
	for(; c != EOF && lexbuf_p < lexbuf_end; c = inFile->get() ) {
		
		if (c == SymbolEntry::c_SINGLE_QUOTE)  {
			
			if (lexbuf_p == lexbuf)  {
				error("Lex::getVT..@" + iToString(line_no) + 
					   "..the VT between ' and ' is empty");
			}

			string lex(lexbuf, lexbuf_p);
			int index = symbol_table.insert(lex, SymbolEntry::VT);
			return Token(SymbolEntry::VT, index);  // 1) OK outlet

			//'abc'ef' will get [abc], for '\'abc\' abc', will get ['abc' abc]
		}

		if (c == SymbolEntry::c_REVERSE_SLASH) {
			c = inFile->get();						// skip escape char : c_REVERSE_SLASH
			// only can use \ just before \ or '
			// such as '\\n', '\'abc', 'ab\'c'
			if (c != SymbolEntry::c_REVERSE_SLASH && 
				c != SymbolEntry::c_SINGLE_QUOTE)  	
				error("Lexer::getVT..@" + iToString(line_no) + 
				       "..\\ or ' expected after \\");
						
			*lexbuf_p++ = c;		
		}
		else {
			*lexbuf_p++ = c;
		}
	}//for

	//2) failure outlet
	if (c != EOF ) 
		error("Lexer::getVT..@" + iToString(line_no) + 
		       "..lexbuf overflow");
	
	error("Lexer::VT..@" + iToString(line_no) + 
		 "..encounter EOF and can't find the delimiter ' ");
	return Token();
}

Token Lexer::getVN_epsilon(char first) {
	
	lexbuf_p = lexbuf;
	char c = first;
	
	for(; isalnum(c) && lexbuf_p < lexbuf_end;
		*lexbuf_p++ = c, c = inFile->get()) ;

	if(isalnum(c)) {

		error("Lexer::getVN..@" + iToString(line_no) + 
			  "..lexbuf overflow..");
	}
	

	inFile->putback(c);		//maybe WS, ';'
		
	string lex(lexbuf, lexbuf_p);
	int index;
	if (first == 'e' && lex.compare(SymbolEntry::p_EPSILON) == 0) {
		index = symbol_table.findInKeywords(lex);		// KEYWORD: EPSILON
		assert(index >= 0);
		return Token(SymbolEntry::EPSILON, index);
	}

	index = symbol_table.insert(lex, SymbolEntry::VN);	// VN: [A-Za-z][0-9a-zA-Z]*
	return Token(SymbolEntry::VN, index);

}


Token Lexer::getNextToken() {

// (type,value)			meaning						    lexeme
//============================================================================================
// 1)  from SymbolEntry
// (VN,index)			V: Non-terminal symbol			[A-Za-z][0-9a-zA-Z]*
// (VT,index)			V: terminal symbol				\'Escape\' | 'epsilon'
//	Escape -> NoEscapeChar Escape | '\\\\' Escape | '\\\'' Escape | epsilon;
//	NoEscapeChar -> [^\'];

// (ALTER,index)		KEYWORD: meta-symbol			|		
// (ARROW,index)		KEYWORD: meta-symbol			->
// (DELIMITER,index)	KEYWORD: meta-symbol			;
// (EPSILON,index)		KEYWORD: meta-symbol			epsilon
//--------------------------------------------------------------------------------------------
// 2) lexer using (DONE, NONE) to notify the parser ending of the token stream
// (tDONE, tNONE)		EOF								EOF
//--------------------------------------------------------------------------------------------
// 3) Token objects' initial value
//--------------------------------------------------------------------------------------------
// (tEMPTY, tNONE)		Token objects' initial value 

  char c;
  int index;

  while (1) {
    c = inFile->get();

	switch(c) {

	case EOF:
		return  Token(Token::tDONE, Token::tNONE);		//EOF
	
	case ' ':
	case '\t':
	case '\r':
		break;					// omit white space

	case '\n':
		++line_no;				// increase number of lines
		break;

	case SymbolEntry::c_ALTER:									// KEYWORD: ALTER
		index = symbol_table.findInKeywords(SymbolEntry::p_ALTER);
		return Token(SymbolEntry::ALTER, index);

	case SymbolEntry::c_DELIMITER:								// KEYWORD: DELIMITER
		index = symbol_table.findInKeywords(SymbolEntry::p_DELIMITER);
		return Token(SymbolEntry::DELIMITER, index);

	case '-':											
		c = inFile->get();
		if ( c == '>' ) {
			index = symbol_table.findInKeywords("->");			// KEYWORD: ARROW
			return Token(SymbolEntry::ARROW, index);
		}
		else {
			error("Lexer::getNextToken..@" + iToString(line_no) + 
				  "..the '-' must be quoted or the '>' must be after '-' "); 
		}
		break;

	case SymbolEntry::c_SINGLE_QUOTE :							// VT: \'Escape\' such as : 'abc' : '\'abc\'' : '\\abc'			
		return getVT();
		
	case SymbolEntry::c_LEFT_BRACKET:
		index = symbol_table.findInKeywords(SymbolEntry::p_LEFT_BRACKET);
		return Token(SymbolEntry::LEFT_BRACKET, index);

	case SymbolEntry::c_RIGHT_BRACKET:
		index = symbol_table.findInKeywords(SymbolEntry::p_RIGHT_BRACKET);
		return Token(SymbolEntry::RIGHT_BRACKET, index);

	default:
		if (isalpha(c))											// KEYWORD : epsilon
			return getVN_epsilon(c);							// VN : [A-Za-z][0-9a-zA-Z]* 		
		
		string msg("Lexer::getNextToken..@" + 
			        iToString(line_no) + "..unknown char ");
		msg += c;
		msg += "\n maybe you forgot to escape it by ' and ' ";
		error(msg);
			
		break;

	}//switch
  }//while
}
💿 文件大小 71 K
👤 上传用户 ys0796
📂 所属分类编译器/解释器
🏷️ 相关标签

#ananilyze #principle #compiler #how
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -