📄 canalyse.cpp

📁 C_Minus词法分析器
💻 CPP
字号:
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
using namespace std;

#define LETTLE 1
#define NUM 2
#define SIGN 3

const char sign[16] = {'+','-','*','/','<','=','>','!',
';',',','(',')','[',']','{','}'};

const string keyWords[6]={"else","if","int","return","void","while"};


//分析一个字符串，其中可能包括ID和NUM，将他们分开，并且保存在vector中
void analyseWord(string str,vector<string>& vec);
//判断字符是否是专有符号
bool isSign(char chr);
//判断字符是否是字母
bool isLettle(char chr);
//判断字符是否是数字
bool isNum(char chr);
//判断字符串是否是关键字
bool isKeyWord(string str);
//去掉注释部分
void wipeOffComent(vector<string> org,vector<string>& dest);
//将token 按照<token,STYLE>格式输出
void normalize(vector<string> org,vector<string>& dest);

//检查是否有数字和字母在一起的情况，这是错误的词法
bool checkWrong1(vector<string> vec);
//检查是否有非法字符出现
bool checkWrong2(vector<string> vec);

ifstream in("input.txt");
ofstream out("output.txt");

void main() 
{
	vector<string> inWords; 
	vector<string> outWords1; //有注释的token集合
	vector<string> outWords2; //去除注释后的token集合
	vector<string> finalWord; //规划格式的token集合，即是最后的token集合

    //如果文件找不掉提示错误
	if(!in)
	{
		cout<<"The file is not found."<<endl;
		exit(1);
	}
	string word;

	//将源程序中内容按照空格分隔成一个一个word，保存在inWords中
	while(in>>word)
	{
		inWords.push_back(word);
	}

	//将inWords中的字符串在分成ID和NUM或者是SIGN，token保存在outWords中
    for(int i=0;i<inWords.size();i++)
    {
		analyseWord(inWords[i],outWords1);
	}
	
	//将outWords中的注释去掉
	wipeOffComent(outWords1,outWords2);

	//判断是否有数字和字母在一起的词法错误
	if(!checkWrong1(outWords2))
	{
		cout<<"有数字和字母在一起的词法错误。"<<endl;
		out<<"有数字和字母在一起的词法错误。"<<endl;
		exit(1);
	}
	//检查是否有非法字符出现
	if(!checkWrong2(outWords2))
	{
		cout<<"有非法字符的词法错误。"<<endl;
		out<<"有非法字符的词法错误。"<<endl;
		exit(1);	
	}

    //将token 按照<token,STYLE>格式输出，保存在finalWords中
	normalize(outWords2,finalWord);

    //将finalWords中的token是全部保存在文件中，并且在控制台中输出
	for(int x=0;x<finalWord.size();x++)
	{
		cout<<finalWord[x]<<endl;
		out<<finalWord[x]<<endl;
	}

	in.close();
	out.close();
}

void analyseWord(string str,vector<string>& vec)
{
	int length = str.length();
	string temp = "";
	int style = LETTLE;
	

    for(int i=0;i<length;i++)
	{	
		if(isLettle(str[i]) || isNum(str[i]))
		{
				if(style== LETTLE || style==NUM)
				{
					temp+=str[i];
				}else if(i==0)
				{
					temp = str[i];
					style = LETTLE;
				}else if(style!=LETTLE && i!=0 && style!=NUM)
				{
						vec.push_back(temp);
						temp = str[i];
						style = LETTLE;
				}		
		}else if(isSign(str[i]))
		{
			if(i!=0)
			{
				vec.push_back(temp);
				
				//判断是否是<=,>=,==,!=这些情况
				if(i!=length-1 && (str[i]=='<' && str[i+1]=='='
					|| str[i]=='>' && str[i+1]=='='
					|| str[i]=='!' && str[i+1]=='='
					|| str[i]=='=' && str[i+1]=='='))
				{
					temp=str[i];
					temp+=str[i+1];
					i++;
				}else
				{
					temp=str[i];
				}
				style = SIGN;
			}else if(i==0)
			{
				//判断是否是<=,>=,==,!=这些情况
				if(i!=length-1 && (str[i]=='<' && str[i+1]=='='
					|| str[i]=='>' && str[i+1]=='='
					|| str[i]=='!' && str[i+1]=='='
					|| str[i]=='=' && str[i+1]=='='))
				{
					temp=str[i];
					temp+=str[i+1];
					i++;
				}else
				{
					temp=str[i];
				}
				style=SIGN;
			}	
		}
	}
	vec.push_back(temp);

}
bool isSign(char chr)
{
	for(int i=0;i<16;i++)
		if(chr == sign[i])
			return true;
	return false;
}
bool isLettle(char chr)
{
	if((chr>='a' && chr<='z') || (chr>='A' && chr<= 'Z'))
		return true;
	return false;
}
bool isNum(char chr)
{
	if(chr>='0' && chr<='9')
		return true;
	return false;
}
bool isKeyWord(string str)
{
	for(int i=0;i<6;i++)
	{
		if(keyWords[i]==str)
			return true;
	}
	return false;
}
//去除注释
void wipeOffComent(vector<string> org,vector<string>& dest)
{
	bool isComment = false; //标志是否是注释内容
	for(int m=0;m<org.size();m++)
	{
		if(org[m]=="/" && org[m+1] == "*")
		{
			isComment = true;
		}else if(isComment && m!=0 && org[m] == "/" && org[m-1]=="*")
		{
			isComment = false;
		}else 
		{
			if(!isComment)
			{
				dest.push_back(org[m]);
			}
		}
	}
}
void normalize(vector<string> org,vector<string>& dest)
{
	string temp;
	for(int i=0;i<org.size();i++)
	{
		if(isSign(org[i][0]))
		{
			temp="<" + org[i] + "," + "OP" + ">";
			dest.push_back(temp);
		}else if(isNum(org[i][0]))
		{
			temp="<" + org[i] + "," + "NUM" + ">";
			dest.push_back(temp);
		}else if(isKeyWord(org[i]))
		{
			temp="<" + org[i] + "," + "KEYWORD" + ">";
			dest.push_back(temp);
		}else
		{
			temp="<" + org[i] + "," + "IDENTIFIER" + ">";
			dest.push_back(temp);
		}
	}
}
bool checkWrong1(vector<string> vec)
{
	for(int i=0;i<vec.size();i++)
	{
		for(int j=0;j<vec[i].length();j++)
		{
			//判断是否有数字和字母在一起
			if(vec[i].length()!=1 && j!=(vec[i].length()-1)
				&& (isLettle(vec[i][j]) && isNum(vec[i][j+1])
				|| isNum(vec[i][j]) && isLettle(vec[i][j+1])))
				return false;
		}
	}
	return true;
}
bool checkWrong2(vector<string> vec)
{
	for(int i=0;i<vec.size();i++)
	{
		if(!isLettle(vec[i][0]) && !isNum(vec[i][0])
			&& !isSign(vec[i][0]) || vec[i][0]=='!')
			return false;
	}
	return true;
}
💿 文件大小 17 K
👤 上传用户 hjf
📂 所属分类编译器/解释器
🏷️ 相关标签

#C_Minus #分析器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -