📄 parser.cpp

📁 compiler principle how to ananilyze
💻 CPP
字号:
// Parser.cpp: implementation of the Parser class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"

#include "VLocation.h"
#include "SymbolEntry.h"
#include "SymbolTable.h"

#include "Parser.h"

#include <string>
using namespace std;

extern SymbolTable symbol_table;

Parser::Parser(): currentRow(0), currentColumn(0) {
	products.push_back(Product());

}

void Parser::check() const {

	checkExistStartSymbol();
}

void Parser::checkExistStartSymbol() const {

	int start = grammarTitle.getStartSymbolIndex();
	const SymbolEntry& se = symbol_table[start];
	const VLocationSet& vs = se.locations;
	VLocationSet::const_iterator i = vs.begin();
	VLocationSet::const_iterator e = vs.end();
	for (; i != e && (*i).column != 0; ++i) ;

	if ( i == e) {

		error("Parser::checkExistStartSymbol..the start symbol '" + se.lexeme +
			  "' doesn't exist in the left of any products");
	}
}

void Parser::normalize() {

}
	/*
	G[S];
	S -> GrammarTitle ';' ProductList;
	GrammarTitle -> GrammarName '[' StartSymbol ']' ;
	ProductList -> Product ';' ProductList | epsilon;
	Product -> LeftList '->' RightListSet;
	LeftList -> VN LeftList | epsilon;
	RightListSet ->   RightList MoreRightList;
	RightList -> VT RightList | VN RightList | epsilon;
	MoreRightList -> '|' RightList MoreRightList | epsilon;

    GrammarName -> VN;
	StartSymbol -> VN;
	VN -> [A-Za-z][A-Za-z_0-9]* ; 
	VT -> \'Escape\' | 'epsilon' ;
	Escape -> NoEscapeChar Escape | '\\\\' Escape | '\\\'' Escape | epsilon;
	NoEscapeChar -> [^\'];
*/

// S -> GrammarTitle ';' ProductList;
void Parser::parse(ifstream* grammar) {
	
	// prepare
	lexer.initialize(grammar);

	// start
	look_ahead = lexer.getNextToken();
	
	g_GrammarTitle();
	match_advance(Token(SymbolEntry::DELIMITER));
	g_ProductList();

	// check validation of the grammar
	check();
	// normalize
	normalize();
}

// GrammarTitle -> GrammarName '[' StartSymbol ']' ;
// GrammarName -> VN;
// StartSymbol -> VN;
void Parser::g_GrammarTitle() {

	// GrammarName -> VN;
	if (look_ahead.type != SymbolEntry::VN) {

		error("->" + look_ahead.toStr() + "->Parser::g_GrammarTitle..syntax error, " +
		"..@" + iToString(lexer.getLineNo()) + "..expect a grammar name ");
	}

	// change token_type from VN to GN
	symbol_table.setTokenType(look_ahead.value, SymbolEntry::GN);
	grammarTitle.append_Left(look_ahead.value);

	// '['
	advance();		
	match_advance(Token(SymbolEntry::LEFT_BRACKET));

	// StartSymbol -> VN;
	if (look_ahead.type != SymbolEntry::VN) {

		error("->" + look_ahead.toStr() + "->Parser::g_GrammarTitle..syntax error, " +
		"..@" + iToString(lexer.getLineNo()) + "..expect start symbol ");
	}
	grammarTitle.append_Right(look_ahead.value);

	// ']'
	advance();
	match_advance(Token(SymbolEntry::RIGHT_BRACKET));

}

//ProductList -> Product ';' ProductList | epsilon;
void Parser::g_ProductList() {

	if (look_ahead.type != Token::tDONE) {
		// ProductList -> Product ';' ProductList
		g_Product();

		newProduct();
		match_advance(Token(SymbolEntry::DELIMITER));
		
		g_ProductList();
	}
	else { // follow(ProductList) = {EOF}
		// ProductList -> epsilon
		return;
	}
}

// Product -> LeftList '->' RightListSet;
void Parser::g_Product() {
		
	g_LeftList();
	match_advance(Token(SymbolEntry::ARROW));
	g_RightListSet();
}

// LeftList -> VN LeftList | epsilon;
void Parser::g_LeftList() {

	if (look_ahead.type == SymbolEntry::VN ||
		look_ahead.type == SymbolEntry::VT) {

		// LeftList -> VN LeftList
		append_Left();
		advance();
		
		g_LeftList();
	}
	else { // follow(LeftList) = { -> }
		// LeftList -> epsilon
		return;
	}
}

// RightListSet ->   RightList MoreRightList;
void Parser::g_RightListSet() {

	g_RightList();
	g_MoreRightList();
}

// RightList -> VT RightList | VN RightList | epsilon;
void Parser::g_RightList() {

	switch(look_ahead.type) {

	case SymbolEntry::VT:		// RightList -> VT RightList | VN RightList
	case SymbolEntry::VN:
		
		append_Right_VN_VT();
		advance();
		g_RightList();
		break;

	case SymbolEntry::EPSILON:  // VT -> \'Escape\' | 'epsilon' ;
	
		append_Right_Epsilon();
		advance();
		g_RightList();
		break;

	default:

		// follow(RightList) = first(MoreRightList) + follow(MoreRightList)
		// = { | } + follow(RightListSet)
		// = { | } + follow(Product)
		// = { | } + { ; } = { |, ; }
		// RightList -> epsilon
		return;
	}
}

// MoreRightList -> '|' RightList MoreRightList | epsilon;
void Parser::g_MoreRightList() {

	if (look_ahead.type == SymbolEntry::ALTER) {

		newAlternative();
		advance();
		
		g_RightList();
		g_MoreRightList();
	}
	else { // follow(MoreRightList) = { ; }

		return;
	}
}

void Parser::match(const Token& token) {
	if (look_ahead != token)
		error("encounter " + look_ahead.toStr() + "..Parser::match..@" +
			  iToString(lexer.getLineNo()) + "..expect " + token.toStr());
}

void Parser::match_advance(const Token& token) {
	if (look_ahead == token)
		look_ahead = lexer.getNextToken();
	else
		error("encounter " + look_ahead.toStr() + "..Parser::match_advance..@" +
		iToString(lexer.getLineNo()) + "..expect " + token.toStr());
}
void Parser::advance() {

	look_ahead = lexer.getNextToken();
}

void Parser::addLocationInfo() {
	SymbolEntry& se = symbol_table[look_ahead.value];
	se.locations.push_back(VLocation(currentRow, currentColumn));
}

void Parser::append_Left() {

	addLocationInfo();
	Product& product = products[currentRow];
	product.append_Left(look_ahead.value);
	++currentColumn;

}

void Parser::append_Right_Epsilon() {
	Product& product = products[currentRow];
	if (product.append_Right_Epsilon(look_ahead.value)) { //false: the new index isn't inserted
		
		addLocationInfo();
		++currentColumn;
	}
}

void Parser::append_Right_VN_VT() {
	Product& product = products[currentRow];
	if (product.append_Right_VN_VT(look_ahead.value, currentRow)) {  
		addLocationInfo();
		++currentColumn;
	}
	else {		//false: epsilon is removed
		--currentColumn;
		addLocationInfo();
		++currentColumn;
	}
}

void Parser::newProduct() {

	products.push_back(Product());
	++currentRow;
	currentColumn = 0;
}

void Parser::newAlternative() {

	Product& left = products[currentRow];
	++currentRow;
	currentColumn = left.sizeOfLeft();
	Product prod;
	left.copyLeft(prod, currentRow);			//copy same left part of the current product

	products.push_back(prod);
	
}

string Parser::grammar_display() const {
	string str;

	int i=0;
	
	str = grammarTitle.display();
	str += "\n";

	int size = products.size();
	for(; i< size; ++i) {
		const Product& prod = products[i];
		if (prod.size() > 0) {
			str += iToString(i, 3);
			str += prod.display();
			str += "\n";
		}
	}
	return str;
}

string Parser::grammar_type()  {
	
	int sum_grammar_type = Product::UNKNOWN;
	int i =0;
	int size = products.size();

	for(; i<size; ++i) {

		Product& prod = products[i];
		prod.getGrammarType(products, sum_grammar_type,
					grammarTitle.getStartSymbolIndex());
	}

	return Product::grammarTypeString(sum_grammar_type);
}
💿 文件大小 71 K
👤 上传用户 ys0796
📂 所属分类编译器/解释器
🏷️ 相关标签

#ananilyze #principle #compiler #how
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -