lexanalyze.cpp

来自「pl0文法编译器」· C++ 代码 · 共 516 行

CPP
516
字号
#using <mscorlib.dll>

#include "LexAnalyze.h"
#include "PL0Compiler.h"
#include "Errors.h"
#include <fstream>
#include <iostream>
#include <string>
#include <utility>
using namespace System;
using namespace std;

LexAnalyze::LexAnalyze(PL0Compiler *pl )
{
	pl0Compiler = pl;
	initSourceFile( pl->infileStream );
	initResvwordMap();
	
}

LexAnalyze::LexAnalyze(ifstream &fileStream)
{
	pl0Compiler = 0;
	initSourceFile(fileStream);
	initResvwordMap();
}

void LexAnalyze::initResvwordMap()
{
	resvwordMap.insert( resvwordValueType(string("const"), constsy) );
	resvwordMap.insert( resvwordValueType(string("var"), varsy) );
	resvwordMap.insert( resvwordValueType(string("integer"), intsy) );
	resvwordMap.insert( resvwordValueType(string("char"), charsy) );
	resvwordMap.insert( resvwordValueType(string("real"), realsy) );
	resvwordMap.insert( resvwordValueType(string("procedure"), procsy) );
	resvwordMap.insert( resvwordValueType(string("function"), funcsy) );
	resvwordMap.insert( resvwordValueType(string("if"), ifsy) );
	resvwordMap.insert( resvwordValueType(string("then"), thensy) );
	resvwordMap.insert( resvwordValueType(string("else"), elsesy) );
	resvwordMap.insert( resvwordValueType(string("while"), whilesy) );
	resvwordMap.insert( resvwordValueType(string("do"), dosy) );
	resvwordMap.insert( resvwordValueType(string("for"), forsy) );
	resvwordMap.insert( resvwordValueType(string("to"), tosy) );
	resvwordMap.insert( resvwordValueType(string("begin") ,beginsy) );
	resvwordMap.insert( resvwordValueType(string("end"), endsy) );
	resvwordMap.insert( resvwordValueType(string("read"), readsy) );
	resvwordMap.insert( resvwordValueType(string("write"), writesy) );
}

void LexAnalyze::initSourceFile( ifstream &fileStream )
{
	if( !fileStream )
		cout<<" 文件名或路径无效! "<<endl;

	//将源文件中所有行读入linesOfFile
	string temp;
	while(getline( fileStream, temp, '\n' ) )
	{
		linesOfFile.push_back( temp );
	}
	if(linesOfFile.empty())
	{
		cout<<"源文件为空!"<<endl;
		exit(1);
	}

	curLine = (string)linesOfFile.at(0);
	lineNum = 0;
	indexOfCh = 0;
}

/*char LexAnalyze::getCh()
{
	char ch;
	sourceFile.get(ch);
	if( ch == '\n' )
	{
		lineNum++;
	}
	return ch;
}*/

bool LexAnalyze::getCh(char &ch)
{
	if( (unsigned)indexOfCh >= curLine.size())
	{
		if( (unsigned)(++lineNum) >= linesOfFile.size() )
		{
			ch = EOF;
			return false;
		}
		else
		{
			curLine = (string)linesOfFile.at(lineNum);
			indexOfCh = 0;
			ch = '\n';
		}
	}
	else
	{
		ch = curLine[indexOfCh++];
	}
	return true;
}

bool LexAnalyze::resvSearch(const string &word)
{
	resvword_value::const_iterator iter;
	iter = resvwordMap.find( word );
	if( iter == resvwordMap.end())
	{
		//判断标识符位数是否过限,若错误,转错误处理
		//if(word.size()>MAX_IEDNTLENGTH)
		//{
		//	pl0Compiler->errors->addError(5);
		//	tokenPair = make_pair( nul, "");
		//}
		tokenPair = make_pair( ident, word );
		return false;
	}
	else
	{
		tokenPair = make_pair( iter->second, word );
		return true;
	}
}

void LexAnalyze::getNum(string &token, char &ch)
{
	do 
	{
		token.push_back(ch);
		getCh(ch);
	} while( '0'<=ch&&ch<='9' );

	//判断是否是实数
	if ( ch=='.' )
	{
		token.push_back(ch);
		getCh(ch);
		//是实数,判断小数点后是否紧跟数字,若是读取实数,否则转错误处理里
		if( '0'<=ch&&ch<='9' )
		{
			do 
			{
				token.push_back(ch);
				getCh(ch);
			} while( '0'<=ch&&ch<='9' );
			if(ch!=EOF)
				backChar();
			//判断实数是否大于最大整数值,若大于,转错误处理
			//if(System::Convert::ToDouble(System::Convert::ToString(token.c_str())) > MAX_NUMBER)
			//{
				//pl0Compiler->errors->addError(1);
				//tokenPair = make_pair(nul, "0");
			//}
			//判断实数位数(包括小数点)是否大于最大允许位数,若大于,转错误处理
			//else if(token.size()>MAX_NUMBERLENGTH)
			//{
			//	pl0Compiler->errors->addError(2);
			//	tokenPair = make_pair(nul, "0");
			//}
			//else
			//{
				tokenPair = make_pair(rnum, token);
			//}
		}
		else
		{
			//错误处理,小数点后应有数字
			tokenPair = make_pair(nul, "0");
			pl0Compiler->errors->addError(3);//实数格式无效,小数点后应有数字
		}
	}
	else
	{
		if(ch!=EOF)
			backChar();
		//判断无符号整数是否小于最大允许值,若大于,转错误处理
		//if(System::Convert::ToInt32(System::Convert::ToString(token.c_str())) > MAX_NUMBER)
		//{
		//	pl0Compiler->errors->addError(1);
			//tokenPair = make_pair(nul, "0");
		//}
		//else
		//{
			tokenPair = make_pair(inum,token);
		//}
	}
}

token_pair LexAnalyze::getSymbol()
{
	char ch;
	string token;
	getCh( ch );//取字符
	getNBC( ch );//跳过空白字符和换行符
	
	if( 'a'<=ch&&ch<='z' || 'A'<=ch&&ch<='Z' )
	{
		int n = 0;
		do 
		{
			if(n<MAX_IEDNTLENGTH)
			{
				token.push_back(ch);
				n++;
			}
			getCh(ch);
		} while( 'a'<=ch&&ch<='z' || 'A'<=ch&&ch<='Z' || '0'<=ch&&ch<='9' );
		if(n > MAX_IEDNTLENGTH )
			pl0Compiler->errors->addError(5);
		
		if(ch!=EOF)
			backChar();//退回一个字符
		resvSearch( token );//查保留字表
	}
	else if( '0'<=ch&&ch<='9' )
	{
		getNum(token, ch);
	}
	else if( ch=='<' )
	{
		token.push_back(ch);
		getCh(ch);
		switch(ch) 
		{
		case '>':
			token.push_back(ch);
			tokenPair = make_pair(nequsy, token);
			break;
		case '=':
			token.push_back(ch);
			tokenPair = make_pair(leseqsy, token);
			break;
		default:
			backChar();
			tokenPair = make_pair(lesssy, token);
			break;
		}
	}
	else if( ch==':' )
	{
		token.push_back(ch);
		getCh(ch);
		if( ch=='=')
		{
			token.push_back(ch);
			tokenPair = make_pair(assignsy, token);
		}
		else
		{
			backChar();
			tokenPair = make_pair(colonsy, token);
		}
	}
	else if( ch=='>')
	{
		token.push_back(ch);
		getCh(ch);
		if( ch=='=')
		{
			token.push_back(ch);
			tokenPair = make_pair(grteqsy, token);
		}
		else
		{
			backChar();
			tokenPair = make_pair(grtsy, token);
		}
	}
	else if( ch=='\'')
	{
		getCh(ch);
		/*if(ch=='\\')
		/{
			getCh(ch);
			switch(ch) {
			case 't':
				token = '\t';
				break;
			case 'b':
				token = '\b';
				break;
			case 'n':
				token = '\n';
				break;
			case 'a':
				token = '\a';
				break;
			case 'r':
				token = '\r';
				break;
			case '\\':
				token = '\\';
				break;
			case '\?':
				token = '\?';
				break;
			case 'v':
				token = '\v';
				break;
			case 'f':
				token = '\f';
				break;
			case '\'':
				token = '\'';
				break;
			case '\"':
				token = '\"';
				break;
			default:
				{
					pl0Compiler->errors->addError(8);
					//do {
						//getCh(ch);
					//} while(!(ch==',' || ch=='\n' || ch==EOF));
					//if(ch!=EOF)
						//backChar();
					token = ch;
					//tokenPair = make_pair(cha, "");
					//return tokenPair;
				}
			}
		}
		if (ch=='\'')//空字符
		{
			pl0Compiler->errors->addError(10);
			tokenPair = make_pair(nul, "");
			return tokenPair;
		}
		else if (ch=='\n')
		{
			pl0Compiler->errors->addError(7);
			tokenPair = make_pair(nul, "");
			return tokenPair;
		}
		else if(ch==EOF)
		{
			pl0Compiler->errors->addError(7);
			tokenPair = make_pair(endfile,"");
			return tokenPair;
		}
		else
		{
			token.push_back(ch);
		}*/
		if ('a'<=ch&&ch<='z' || 'A'<=ch&&ch<='Z' || '0'<=ch&&ch<='9')
		{
			token.push_back(ch);
		}
		else if(ch==EOF)
		{
			pl0Compiler->errors->addError(7);
			tokenPair = make_pair(endfile,"");
			return tokenPair;
		}
		else
		{
			pl0Compiler->errors->addError(7);
			tokenPair = make_pair(nul, "");
			return tokenPair;
		}

		getCh(ch);
		if(ch!='\'')
		{
			if(ch==',')//如果是逗号,则认为已存储的字符时合法的,但是依然想用户提示缺少单引号
			{
				pl0Compiler->errors->addError(7);
				tokenPair = make_pair(cha,token);
				return tokenPair;
			}
			else
			{
				pl0Compiler->errors->addError(7);
				do 
				{
					getCh(ch);	
				} while(!( ch==',' || ch=='\n' || ch==EOF));
				if(ch!=EOF)
					backChar();
				tokenPair = make_pair(nul, "");
				return tokenPair;
			}
		}
		tokenPair = make_pair(cha,token);
		return tokenPair;
	}
	else if( ch=='\"')
	{
		token = "";
		getCh(ch);
		do {
			if(ch=='\\')
			{
				getCh(ch);
				switch(ch)
				{
				case 't':
					token += '\t';
					break;
				case 'b':
					token += '\b';
					break;
				case 'n':
					token += '\n';
					break;
				case 'a':
					token += '\a';
					break;
				case 'r':
					token += '\r';
					break;
				case '\\':
					token += '\\';
					break;
				case '\?':
					token += '\?';
					break;
				case 'v':
					token += '\v';
					break;
				case 'f':
					token += '\f';
					break;
				case '\'':
					token += '\'';
					break;
				case '\"':
					token += '\"';
					break;
				default:
					{
						pl0Compiler->errors->addError(8);
						//do 
						//{
							//getCh(ch);
						//} while(!(ch==',' || ch=='\n'));
						//tokenPair += ch;
						//return tokenPair;
						token += ch;
					}
				}
			}
			else if(ch=='\"')
				break;
			else
			{
				token.push_back(ch);
			}
			getCh(ch);
		} while(!(ch=='\"' || ch=='\n' || ch==EOF));
		//if(ch!=EOF)
			//backChar();
		if(ch=='\"' )
		{
			tokenPair = make_pair(chastring, token);
			return tokenPair;
		}
		else
		{
			pl0Compiler->errors->addError(9);
			tokenPair = make_pair(nul,"");
			return tokenPair;
		}
	}
	else
	{
		token.push_back(ch);
		switch (ch)
		{
		case '+':tokenPair = make_pair(plussy, token);	break;
		case '-':tokenPair = make_pair(minussy, token);	break;
		case '*':tokenPair = make_pair(timesy, token);	break;
		case '/':tokenPair = make_pair(divisy, token);	break;
		case '(':tokenPair = make_pair(lparsy, token);	break;
		case ')':tokenPair = make_pair(rparsy, token);	break;
		case ';':tokenPair = make_pair(semicolonsy, token);	break;
		case ',':tokenPair = make_pair(commasy, token);	break;
		case '=':tokenPair = make_pair(equsy, token);	break;
		case '.':tokenPair = make_pair(dot, token);		break;
		//case'\'':tokenPair = make_pair(squot, token);	break;
		//case'\"':tokenPair = make_pair(dquot,token);	break;
		case EOF:tokenPair = make_pair(endfile, "");	break;
		default	:
			//转错误处理
			tokenPair = make_pair(nul,"");
			pl0Compiler->errors->addError(4);//无效标识符
		}
	}

	return tokenPair;
}

void LexAnalyze::showScript()
{
	
	cout<<endl<<"the source file: "<<endl;
	for(int j =0; j < 6; j++)
	for(int i = 0; i < 10; i++)
		cout<<i;
	cout<<endl;
	for(int i = 0; i < linesOfFile.size(); i++)
	{
		cout<<i+1<<"\t"<<(string)linesOfFile.at(i)<<endl;
	}
}

Location LexAnalyze::getLocation()
{
	if(indexOfCh == 0)
		return make_pair(lineNum, linesOfFile.at(lineNum-1).size());
	else
		return make_pair(lineNum+1, indexOfCh+1);
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?