📄 lexical.cpp

📁 一个简单词法分析器
💻 CPP
字号:
#include <iostream>
#include <fstream>
#include <string>
#include <cctype>
#include <cmath>	//数字转换时用到power函数
#include <vector>

using namespace std;
/*using std::string;
using std::ifstream;
using std::ofstream;
using std::vector;
using std::endl;*/

//=================================================================
//函数声明
//=================================================================
void ParseToken();				//找出标识符
void ParseDelim();				//找运算符
void ParseMark();				//找符号
void GetChar();					//从输入文件读入一个字符
void FindSort();				//判断字符的类型
void ShowTokenTable();			//打印符号表
int  InstallToken(string);		//将标识符装入标识符表
void OutputDelim(string);		//
void ParseNumber();				//
int  IsReserve(string);			//检查是否是保留字

//=================================================================
//定义全局变量
//=================================================================
const int NUM = 0;				//0-9

const int ALPHA = 1;			//a-z

const int DELIM = 2;			//+,-,/,<,=,!

const int WHITESPACE = 3;		//white space

const int MARK = 4;				//符号

const int OTHERS = 5;			//除了以上5类外的全部合法字符

const int RESERVE_TABLE_LENGTH = 14;

const int DELIM_TABLE_LENGTH = 15;

const int MARK_TABLE_LENGTH = 14;	

const char mark[] = {',',';','?','\\','@','#','$','%','^','&','(',')','{','}'};

const string reserve[] = 
{"if","else","while","for","switch","case","int","char","long","float","double","bool","const","include"};

const string delimtable[] = {"+","-","*","/","+=","-=","*=","/=","=","==","!=","<","<=",">",">="};

//=================================================================
//Global Variable
//=================================================================
char	c_char;			//当前处理的字符
char	next_char;		//当前要处理的字符的下一个字符
int		sort;			//字符的类别
int		line;			//行号
int		index;			//列号
int		t_index;		//临时列号
int		t_line;			//临时行号
int		pre_state;		//前一个输入的单词是什么类别的(标识符,运算符还是常量)
int		n_state;		//number state
int		f_offset;		//float sort offset
float	f_table[50];		//start mem address of float table
string	t_word;
vector<string> token;

ifstream inFile("a.txt");
ofstream outFile("b.txt");

//=================================================================
//数字转换部分
//=================================================================
int		sign = 1;			//常数的符号
int		number;				//常数的十进制部分
int		expnum;				//常数的指数部分
int		n;					//小数点位数
int		expsign;			//常数指数部分的正负号
bool	isFinished = false;	//常数部分是否处理完毕
int value(char);
void InstallNum1();
void InstallNum2();
void InstallNum3();



int main(){
	ShowTokenTable();
	line = 1;index = 1;
	inFile.get(c_char);

	while(inFile){
		FindSort();
		switch(sort){
		case NUM:ParseNumber();pre_state = NUM;break;
		case ALPHA:ParseToken();pre_state = ALPHA;break;
		case DELIM:ParseDelim();pre_state = DELIM;break;
		case WHITESPACE:GetChar();pre_state = WHITESPACE;break;
		case MARK:ParseMark();GetChar();pre_state = MARK;break;
		case OTHERS:GetChar();pre_state = OTHERS;
		}
	}

	return 0;
}

void GetChar(){
	//读入一个字符,改变行号和列号,行号和列号都从1开始
	c_char = inFile.get();
	index++;
	if(c_char == '\n'){
		line++;
		index = 1;
		c_char = inFile.get();
	}
}

void FindSort(){
	if(isdigit(c_char))
		sort = NUM;
	else if(isalpha(c_char))
		sort = ALPHA;
	else if(c_char == '+' || c_char == '-' || c_char == '*' || c_char == '/' ||
		    c_char == '<' || c_char == '>' || c_char == '!' || c_char == '=')
		sort = DELIM;
	else if(isspace(c_char) || c_char == (char)(-1)/*文末符EOF*/ || c_char == '\n')
		sort = WHITESPACE;
	//{',',';','?','\','@','#','$','%','^','&','(',')'}
	else if(c_char == ',' || c_char == ';' || c_char == '?' || c_char == '\\' ||
		    c_char == '@' || c_char == '#' || c_char == '$' || c_char == '%' ||
			c_char == '^' || c_char == '&' || c_char == '(' || c_char == ')')
		sort = MARK;
	else 
		sort = OTHERS;
}

void ParseMark(){
	//转换符号
	t_index = index;t_line  = line;	//将符号开始的行号列号保存起来,准备输出时使用

	int i;
	for(i=0 ; i<MARK_TABLE_LENGTH ; i++)
		if(c_char == mark[i])
			break;
	outFile << c_char << "\t\t" << '(' 
		<< 2 + RESERVE_TABLE_LENGTH + DELIM_TABLE_LENGTH + i 
		<< ",0," << t_line << ',' << t_index << ')' << endl;
}

void ParseToken(){
	t_index = index;t_line	= line;

	while(sort == ALPHA || sort == NUM){
		t_word = t_word + c_char;
		GetChar();
		FindSort();
	}

	//输出结果,先要判断读入的字符是否是保留字
	int pos = IsReserve(t_word);
	if(pos != -1){		//是保留字,直接输出就可以了
		outFile << t_word << "\t\t" << '(' << pos << ",0," << t_line 
			<< ',' << t_index << ')' << endl;
	}
	else{				//不是保留字准备输入符号表
		pos = InstallToken(t_word);
		outFile << t_word << "\t\t" << "(1," << pos << ',' << t_line 
			<< ',' << t_index << ')' << endl;
	}
	
	t_word = "";
}


int IsReserve(string s){
	//检查是不是保留字
	int i = 0;
	for( ; i<RESERVE_TABLE_LENGTH ; i++){
		if(s == reserve[i])
			return i;
	}

	return -1;
}

int InstallToken(string s){
	//检查是不是已经存在与符号表中的符号
	for(int j=0 ; j<token.size() ; j++){
		if(s == token.at(j))
			return j;
	}

	//所以这个符号是一个非保留字,且不在符号表中,添加~
	token.push_back(s);
	return j;
}

void ParseDelim(){
	//转换标识符
	t_index = index;t_line	= line;
	if(c_char == '<'){
		GetChar();
		if(c_char == '='){
			OutputDelim("<=");
			GetChar();
		}
		else
			OutputDelim("<");
	}
	else if(c_char == '>'){
		GetChar();
		if(c_char == '='){
			OutputDelim(">=");
			GetChar();
		}
		else
			OutputDelim(">");
	}
	else if(c_char == '!'){
		GetChar();
		if(c_char == '='){
			OutputDelim("!=");
			GetChar();
		}
		else
			;
	}
	else if(c_char == '='){
		GetChar();
		if(c_char == '='){
			OutputDelim("==");
			GetChar();
		}
		else
			OutputDelim("=");
	}
	/*遇到正负号时要判断他们表示加减还是正负
	当+/-号前面是常量或者是变量时且后面是数字时他们时加减号,
	其他时候都是正负号*/
	else if(c_char == '+'){
		GetChar();
		if(c_char == '='){
			OutputDelim("+=");
			GetChar();
		}
		else
			if(pre_state == NUM || pre_state == ALPHA ||!isdigit(c_char))
				OutputDelim("+");
			else
				sign = 1;
	}
	else if(c_char == '-'){
		GetChar();
		if(c_char == '='){
			OutputDelim("-=");
			GetChar();
		}
		else
			if(pre_state == NUM || pre_state == ALPHA ||!isdigit(c_char))
				OutputDelim("-");
			else
				sign = -1;
	}
	else if(c_char == '*'){
		GetChar();
		if(c_char == '='){
			OutputDelim("*=");
			GetChar();
		}
		else
			OutputDelim("*");
	}
	else if(c_char == '/'){
		GetChar();
		if(c_char == '='){
			OutputDelim("/=");
			GetChar();
		}
		else
			OutputDelim("/");
	}
}

void OutputDelim(string s){
	//输出运算符
	int i;
	for(i=0 ; i<DELIM_TABLE_LENGTH ; i++){
		if(s == delimtable[i])
			break;
	}

	outFile << s << "\t\t" << '(' << 2 + RESERVE_TABLE_LENGTH + i<< ',' 
		<< 0 << ',' << t_line << ',' << t_index << ')' << endl;
}

void ParseNumber(){
	t_index = index;
	t_line	= line;
	n_state = 0;
	while(!isFinished){
	switch(n_state){
	case 0:
		if(sort == NUM){
			n_state = 1;
			number = value(c_char);
			GetChar();
			FindSort();
		}
		else if(c_char == '.'){
			n_state = 2;
			GetChar();
			FindSort();
		}
		else if(c_char == 'E'){
			n_state = 4;
			GetChar();
			FindSort();
		}
		else{
			//Error();
			isFinished = true;
		}
		break;
	case 1:
		if(sort == NUM){
			n_state = 1;
			number = number * 10 + value(c_char);
			GetChar();
			FindSort();
		}
		else if(c_char == '.'){
			n_state = 2;
			GetChar();
			FindSort();
		}
		else if(c_char == 'E'){
			n_state = 4;
			GetChar();
			FindSort();
		}
		else if(sort == WHITESPACE || sort == MARK || sort == DELIM){
			InstallNum1();
		}
		else{
			//Error();
			isFinished = true;
		}
		break;
	case 2:
		if(sort == NUM){
			n_state = 3;
			n = n +1;
			number = number*10 + value(c_char);
			GetChar();
			FindSort();
		}
		else {
			//Error();
			isFinished = true;
		}
		break;
	case 3:
		if(sort == NUM){
			n_state = 3;
			n = n +1;
			number = number*10 + value(c_char);
			GetChar();
			FindSort();
		}
		else if(c_char == 'E'){
			n_state = 4;
			GetChar();
			FindSort();
		}
		else if(sort ==	WHITESPACE || sort == MARK || sort == DELIM)
			InstallNum2();
		else{
			//Error();
			isFinished = true;
		}
		break;
	case 4:
		if(sort == NUM){
			n_state = 6;
			expsign = 1;
			expnum = value(c_char);
			GetChar();
			FindSort();
		}
		else if(c_char == '+'){
			n_state = 5;
			expsign = 1;
			GetChar();
			FindSort();
		}
		else if(c_char == '-'){
			n_state = 5;
			expsign = -1;
			GetChar();
			FindSort();
		}
		else {
			//Error();
			isFinished = true;
		}
		break;
	case 5:
		if(sort == NUM){
			n_state = 6;
			expnum = value(c_char);
			GetChar();
			FindSort();
		}
		else {
			//Error();
			isFinished = true;
		}
		break;
	case 6:
		if(sort == NUM){
			n_state = 6;
			expnum = expnum * 10 + value(c_char);
			GetChar();
			FindSort();
		}
		else if(sort == WHITESPACE || sort == MARK || sort == DELIM){
			InstallNum3();
		}
		else{
			//Error();
			isFinished = true;
		}
		break;
	}
	}
	isFinished = false;
}

int value(char in){
	return in-'0';
}

void InstallNum1(){
	//只有整数部分的常数
	float num;
	num = sign * number;
	*(f_table + f_offset) = num;
	outFile << num << "\t\t" << '(' << 0 << ',' << f_offset << ',' 
		<< line << ',' << t_index << ')' << endl;
	f_offset++;

	number = 0;sign = 1;isFinished = true;
}

void InstallNum2(){
	//由整数部分和小数部分组成的常数
	float num;
	int i;
	num = sign * number * pow(10,-1 * n);
	for(i=0 ; i<f_offset; i++)
		if(*(f_table + i) == num)
			break;

	if(i==f_offset)
    {
		*(f_table + i) = num;
    	f_offset++;
	}
	outFile << num << "\t\t" << '(' << 0 << ',' << f_offset << ',' 
		<< line << ',' << t_index << ')' << endl;

	number = 0;n = 0;sign = 1;isFinished = true;
}

void InstallNum3(){
	//由整数部分和小数部分和指数部分组成的常数
	int i;
	float num;
	if(expsign == -1)
		expnum = -1 * expnum;
	expnum = expnum - n;
	num = sign * number * pow(10,expnum);
	for(i=0 ; i<f_offset; i++)
		if(*(f_table + i) == num)
			break;
	if(i==f_offset)
    {
		*(f_table + i) = num;
    	f_offset++;
	}
	outFile << num << "\t\t" << '(' << 0 << ',' << f_offset << ',' << line << ',' 
		<< t_index << ')' << endl;

	number = 0;n = 0;sign = 1;expnum = 0;isFinished = true;
}



void ShowTokenTable(){
	outFile << "=====================================" << endl;
	outFile << "           词法分析程序				 " << endl;
	outFile << "=====================================" << endl;
}
💿 文件大小 4 K
👤 上传用户 zxk756921815
📂 所属分类编译器/解释器
🏷️ 相关标签

#分析器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -