lexanalyze.cpp
来自「pl0文法编译器」· C++ 代码 · 共 516 行
CPP
516 行
#using <mscorlib.dll>
#include "LexAnalyze.h"
#include "PL0Compiler.h"
#include "Errors.h"
#include <fstream>
#include <iostream>
#include <string>
#include <utility>
using namespace System;
using namespace std;
LexAnalyze::LexAnalyze(PL0Compiler *pl )
{
pl0Compiler = pl;
initSourceFile( pl->infileStream );
initResvwordMap();
}
LexAnalyze::LexAnalyze(ifstream &fileStream)
{
pl0Compiler = 0;
initSourceFile(fileStream);
initResvwordMap();
}
void LexAnalyze::initResvwordMap()
{
resvwordMap.insert( resvwordValueType(string("const"), constsy) );
resvwordMap.insert( resvwordValueType(string("var"), varsy) );
resvwordMap.insert( resvwordValueType(string("integer"), intsy) );
resvwordMap.insert( resvwordValueType(string("char"), charsy) );
resvwordMap.insert( resvwordValueType(string("real"), realsy) );
resvwordMap.insert( resvwordValueType(string("procedure"), procsy) );
resvwordMap.insert( resvwordValueType(string("function"), funcsy) );
resvwordMap.insert( resvwordValueType(string("if"), ifsy) );
resvwordMap.insert( resvwordValueType(string("then"), thensy) );
resvwordMap.insert( resvwordValueType(string("else"), elsesy) );
resvwordMap.insert( resvwordValueType(string("while"), whilesy) );
resvwordMap.insert( resvwordValueType(string("do"), dosy) );
resvwordMap.insert( resvwordValueType(string("for"), forsy) );
resvwordMap.insert( resvwordValueType(string("to"), tosy) );
resvwordMap.insert( resvwordValueType(string("begin") ,beginsy) );
resvwordMap.insert( resvwordValueType(string("end"), endsy) );
resvwordMap.insert( resvwordValueType(string("read"), readsy) );
resvwordMap.insert( resvwordValueType(string("write"), writesy) );
}
void LexAnalyze::initSourceFile( ifstream &fileStream )
{
if( !fileStream )
cout<<" 文件名或路径无效! "<<endl;
//将源文件中所有行读入linesOfFile
string temp;
while(getline( fileStream, temp, '\n' ) )
{
linesOfFile.push_back( temp );
}
if(linesOfFile.empty())
{
cout<<"源文件为空!"<<endl;
exit(1);
}
curLine = (string)linesOfFile.at(0);
lineNum = 0;
indexOfCh = 0;
}
/*char LexAnalyze::getCh()
{
char ch;
sourceFile.get(ch);
if( ch == '\n' )
{
lineNum++;
}
return ch;
}*/
bool LexAnalyze::getCh(char &ch)
{
if( (unsigned)indexOfCh >= curLine.size())
{
if( (unsigned)(++lineNum) >= linesOfFile.size() )
{
ch = EOF;
return false;
}
else
{
curLine = (string)linesOfFile.at(lineNum);
indexOfCh = 0;
ch = '\n';
}
}
else
{
ch = curLine[indexOfCh++];
}
return true;
}
bool LexAnalyze::resvSearch(const string &word)
{
resvword_value::const_iterator iter;
iter = resvwordMap.find( word );
if( iter == resvwordMap.end())
{
//判断标识符位数是否过限,若错误,转错误处理
//if(word.size()>MAX_IEDNTLENGTH)
//{
// pl0Compiler->errors->addError(5);
// tokenPair = make_pair( nul, "");
//}
tokenPair = make_pair( ident, word );
return false;
}
else
{
tokenPair = make_pair( iter->second, word );
return true;
}
}
void LexAnalyze::getNum(string &token, char &ch)
{
do
{
token.push_back(ch);
getCh(ch);
} while( '0'<=ch&&ch<='9' );
//判断是否是实数
if ( ch=='.' )
{
token.push_back(ch);
getCh(ch);
//是实数,判断小数点后是否紧跟数字,若是读取实数,否则转错误处理里
if( '0'<=ch&&ch<='9' )
{
do
{
token.push_back(ch);
getCh(ch);
} while( '0'<=ch&&ch<='9' );
if(ch!=EOF)
backChar();
//判断实数是否大于最大整数值,若大于,转错误处理
//if(System::Convert::ToDouble(System::Convert::ToString(token.c_str())) > MAX_NUMBER)
//{
//pl0Compiler->errors->addError(1);
//tokenPair = make_pair(nul, "0");
//}
//判断实数位数(包括小数点)是否大于最大允许位数,若大于,转错误处理
//else if(token.size()>MAX_NUMBERLENGTH)
//{
// pl0Compiler->errors->addError(2);
// tokenPair = make_pair(nul, "0");
//}
//else
//{
tokenPair = make_pair(rnum, token);
//}
}
else
{
//错误处理,小数点后应有数字
tokenPair = make_pair(nul, "0");
pl0Compiler->errors->addError(3);//实数格式无效,小数点后应有数字
}
}
else
{
if(ch!=EOF)
backChar();
//判断无符号整数是否小于最大允许值,若大于,转错误处理
//if(System::Convert::ToInt32(System::Convert::ToString(token.c_str())) > MAX_NUMBER)
//{
// pl0Compiler->errors->addError(1);
//tokenPair = make_pair(nul, "0");
//}
//else
//{
tokenPair = make_pair(inum,token);
//}
}
}
token_pair LexAnalyze::getSymbol()
{
char ch;
string token;
getCh( ch );//取字符
getNBC( ch );//跳过空白字符和换行符
if( 'a'<=ch&&ch<='z' || 'A'<=ch&&ch<='Z' )
{
int n = 0;
do
{
if(n<MAX_IEDNTLENGTH)
{
token.push_back(ch);
n++;
}
getCh(ch);
} while( 'a'<=ch&&ch<='z' || 'A'<=ch&&ch<='Z' || '0'<=ch&&ch<='9' );
if(n > MAX_IEDNTLENGTH )
pl0Compiler->errors->addError(5);
if(ch!=EOF)
backChar();//退回一个字符
resvSearch( token );//查保留字表
}
else if( '0'<=ch&&ch<='9' )
{
getNum(token, ch);
}
else if( ch=='<' )
{
token.push_back(ch);
getCh(ch);
switch(ch)
{
case '>':
token.push_back(ch);
tokenPair = make_pair(nequsy, token);
break;
case '=':
token.push_back(ch);
tokenPair = make_pair(leseqsy, token);
break;
default:
backChar();
tokenPair = make_pair(lesssy, token);
break;
}
}
else if( ch==':' )
{
token.push_back(ch);
getCh(ch);
if( ch=='=')
{
token.push_back(ch);
tokenPair = make_pair(assignsy, token);
}
else
{
backChar();
tokenPair = make_pair(colonsy, token);
}
}
else if( ch=='>')
{
token.push_back(ch);
getCh(ch);
if( ch=='=')
{
token.push_back(ch);
tokenPair = make_pair(grteqsy, token);
}
else
{
backChar();
tokenPair = make_pair(grtsy, token);
}
}
else if( ch=='\'')
{
getCh(ch);
/*if(ch=='\\')
/{
getCh(ch);
switch(ch) {
case 't':
token = '\t';
break;
case 'b':
token = '\b';
break;
case 'n':
token = '\n';
break;
case 'a':
token = '\a';
break;
case 'r':
token = '\r';
break;
case '\\':
token = '\\';
break;
case '\?':
token = '\?';
break;
case 'v':
token = '\v';
break;
case 'f':
token = '\f';
break;
case '\'':
token = '\'';
break;
case '\"':
token = '\"';
break;
default:
{
pl0Compiler->errors->addError(8);
//do {
//getCh(ch);
//} while(!(ch==',' || ch=='\n' || ch==EOF));
//if(ch!=EOF)
//backChar();
token = ch;
//tokenPair = make_pair(cha, "");
//return tokenPair;
}
}
}
if (ch=='\'')//空字符
{
pl0Compiler->errors->addError(10);
tokenPair = make_pair(nul, "");
return tokenPair;
}
else if (ch=='\n')
{
pl0Compiler->errors->addError(7);
tokenPair = make_pair(nul, "");
return tokenPair;
}
else if(ch==EOF)
{
pl0Compiler->errors->addError(7);
tokenPair = make_pair(endfile,"");
return tokenPair;
}
else
{
token.push_back(ch);
}*/
if ('a'<=ch&&ch<='z' || 'A'<=ch&&ch<='Z' || '0'<=ch&&ch<='9')
{
token.push_back(ch);
}
else if(ch==EOF)
{
pl0Compiler->errors->addError(7);
tokenPair = make_pair(endfile,"");
return tokenPair;
}
else
{
pl0Compiler->errors->addError(7);
tokenPair = make_pair(nul, "");
return tokenPair;
}
getCh(ch);
if(ch!='\'')
{
if(ch==',')//如果是逗号,则认为已存储的字符时合法的,但是依然想用户提示缺少单引号
{
pl0Compiler->errors->addError(7);
tokenPair = make_pair(cha,token);
return tokenPair;
}
else
{
pl0Compiler->errors->addError(7);
do
{
getCh(ch);
} while(!( ch==',' || ch=='\n' || ch==EOF));
if(ch!=EOF)
backChar();
tokenPair = make_pair(nul, "");
return tokenPair;
}
}
tokenPair = make_pair(cha,token);
return tokenPair;
}
else if( ch=='\"')
{
token = "";
getCh(ch);
do {
if(ch=='\\')
{
getCh(ch);
switch(ch)
{
case 't':
token += '\t';
break;
case 'b':
token += '\b';
break;
case 'n':
token += '\n';
break;
case 'a':
token += '\a';
break;
case 'r':
token += '\r';
break;
case '\\':
token += '\\';
break;
case '\?':
token += '\?';
break;
case 'v':
token += '\v';
break;
case 'f':
token += '\f';
break;
case '\'':
token += '\'';
break;
case '\"':
token += '\"';
break;
default:
{
pl0Compiler->errors->addError(8);
//do
//{
//getCh(ch);
//} while(!(ch==',' || ch=='\n'));
//tokenPair += ch;
//return tokenPair;
token += ch;
}
}
}
else if(ch=='\"')
break;
else
{
token.push_back(ch);
}
getCh(ch);
} while(!(ch=='\"' || ch=='\n' || ch==EOF));
//if(ch!=EOF)
//backChar();
if(ch=='\"' )
{
tokenPair = make_pair(chastring, token);
return tokenPair;
}
else
{
pl0Compiler->errors->addError(9);
tokenPair = make_pair(nul,"");
return tokenPair;
}
}
else
{
token.push_back(ch);
switch (ch)
{
case '+':tokenPair = make_pair(plussy, token); break;
case '-':tokenPair = make_pair(minussy, token); break;
case '*':tokenPair = make_pair(timesy, token); break;
case '/':tokenPair = make_pair(divisy, token); break;
case '(':tokenPair = make_pair(lparsy, token); break;
case ')':tokenPair = make_pair(rparsy, token); break;
case ';':tokenPair = make_pair(semicolonsy, token); break;
case ',':tokenPair = make_pair(commasy, token); break;
case '=':tokenPair = make_pair(equsy, token); break;
case '.':tokenPair = make_pair(dot, token); break;
//case'\'':tokenPair = make_pair(squot, token); break;
//case'\"':tokenPair = make_pair(dquot,token); break;
case EOF:tokenPair = make_pair(endfile, ""); break;
default :
//转错误处理
tokenPair = make_pair(nul,"");
pl0Compiler->errors->addError(4);//无效标识符
}
}
return tokenPair;
}
void LexAnalyze::showScript()
{
cout<<endl<<"the source file: "<<endl;
for(int j =0; j < 6; j++)
for(int i = 0; i < 10; i++)
cout<<i;
cout<<endl;
for(int i = 0; i < linesOfFile.size(); i++)
{
cout<<i+1<<"\t"<<(string)linesOfFile.at(i)<<endl;
}
}
Location LexAnalyze::getLocation()
{
if(indexOfCh == 0)
return make_pair(lineNum, linesOfFile.at(lineNum-1).size());
else
return make_pair(lineNum+1, indexOfCh+1);
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?