⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexical.cpp

📁 词法分析器,学编译原理时做的,很简单,可供初学者借鉴
💻 CPP
字号:
#include <iostream>
#include <fstream>
#include <string>
#include <iomanip>
#include "Lexical.h"
using namespace std;

int main()
{	
	err_count = 0;                              //initialize variable
	lineOfProc = 1;
	label_count = 0;
	addr_count = 0;
	var_count = 0;
	
	for(int i = 0; i < maxVariable; i++)
		for(int j = 0; j < 30; j++)
			symbleList[i].name[j] = '\0';
	for(i = 0; i < numOfKeyWord; i++)
		for(int j = 0; j < 30; j++)
			key[i].name[j] = '\0';
	Scanner();                                   //start to scan source file
	return 0;
}

void Scanner()                                   //function begin
{
	tokenOut.open("tokenOut.txt");               //tokenOut.txt for storing tokens
	if(tokenOut.fail())							 //for output					
	{
		cout << "Error occurs when opening file tokenOut.txt" << endl;
		return;
	}
	
	symbleOut.open("symbleOut.txt");              //symbleOut.txt for storing symble
	if(symbleOut.fail())                          // for output
	{
		cout << "Error occurs when opening file symbleOut.txt" << endl;
		return;
	}

	keyIn.open("keyWord.txt", ios::out);      //keyWord.txt for storing reserved word
	{                                              // for input
		if(keyIn.fail())
		{
			cout << "Error occurs when opening file keyWord.txt" << endl;
			return;
		}
	}
	int i =  0;                                   // variable i for index of keyword
	while(!keyIn.eof())                           //read keyword from keyWord.txt
	{
		keyIn >> key[i].code >> key[i].name;
		i++;
	}
	cout << "*******************************************************" << endl;
	cout << "**********************词法分析器***********************" << endl;
	cout << "*******************************************************" << endl;
	cout << "请输入源程序文件名:" << endl;          //the file name for scanning
	char filename[20];
	cin >> filename;
	sourceIn.open(filename);//, ios::in);
	if(sourceIn.fail())
	{
		cout << "Error occurs when opening file " << filename << endl;
		return;
	}
	cout << "正在执行分析任务......" << endl;

	currentLetter = sourceIn.get();                //get the first letter of source file
	while(currentLetter != EOF)                    //until end of file
	{
		for(int i = 0; i < 30; i ++)               //initialize currentToken.name
			currentToken.name[i] = '\0';
		currentToken.line = lineOfProc;
		if(currentLetter >= '0' && currentLetter <= '9') IsNumber(); //if first letter is digital
		else
		{
			if((currentLetter <= 'z' && currentLetter >= 'a') || 
				(currentLetter <='Z' && currentLetter >= 'A')) //if first letter is alpha
			IsAlpha();
			else
			{
				if(currentLetter == '/') IsNotes();       //represent other letters is notes in this line
				else
				{
					if(currentLetter == '\'') IsChar();  //constant letter
					else
						IsOther();                        //other letters
				}
			}
		}
	}
	/*ifstream in("tokenOut.txt");
	int a, b, c, d;
	char ch[30];
	for(i = 0; i < 30; i ++)
		ch[i] = '\0';
	
	while(!in.eof())
	{
		in >> a >> ch >> b >> c >> d;
		in.get();
		in.get();
		cout << a << "   " << ch<<"   " << b<< "   " << c << "    "<< d << endl;
		cout << in.tellg() << endl;
	}*/
	tokenOut.close(); 
	tokenOut.close();                                      //close the opening file
	symbleOut.close();
	keyIn.close();
	sourceIn.close();
}
//function IsNumber()
void IsNumber()
{
	int index = 0;
	while(currentLetter <= '9' && currentLetter >= '0') //until letter isn't digital
	{
		currentToken.name[index++] = currentLetter;
		currentLetter = sourceIn.get();
	}
	
	currentToken.code = 28;                  //整常数
	currentToken.addr = addr_count++;
	currentToken.label = label_count++;
	if(currentLetter == '.')                          //real constant
	{
		char oldLetter = currentLetter;
		currentLetter = sourceIn.get();
		if(currentLetter <= '9' && currentLetter >= '0')
		{

			currentToken.code = 29;                  //实常数
			currentToken.name[index++] = oldLetter;
			while(currentLetter <= '9' && currentLetter >= '0')
			{
				currentToken.name[index++] = currentLetter;
				currentLetter = sourceIn.get();
			}
		}
		if(currentLetter == '.' || 
			(currentLetter <= 'z'&& currentLetter >= 'a') || 
			(currentLetter <= 'Z' && currentLetter >= 'A'))
		{
			Error(2);
			currentLetter = sourceIn.get();
			while(currentLetter <= '9' && currentLetter >= '0')
				currentLetter = sourceIn.get();
		}
	}
	Output();
} // function IsNumber() end

//function IsAlpha()
void IsAlpha()
{
	int index = 0;
	while((currentLetter <= '9' && currentLetter >= '0') ||
		(currentLetter <= 'Z' && currentLetter >= 'A') ||
		(currentLetter <= 'z' && currentLetter >= 'a') ||
		currentLetter == '_')
	{
		currentToken.name[index++] = currentLetter;
		currentLetter = sourceIn.get();
	}
	
	for(int i = 0; i < numOfKeyWord; i++)                   //check whether it's keyword
	{
		if(strcmp(currentToken.name, key[i].name) == 0)
		{
			currentToken.label = label_count++;
			currentToken.addr = -1;
			currentToken.code = key[i].code;
			Output();
			return;
		}
	}
	currentToken.label = label_count++;
	currentToken.code = 27;
	currentToken.addr = addr_count++;
	Output();
} //function IsAlpha() end

void IsNotes()
{
	char oldLetter = currentLetter;
	currentLetter = sourceIn.get();
	if(currentLetter == '*')
	{
		for(;;)
		{
			currentLetter = sourceIn.get();
			if(currentLetter == '*')
			{
				currentLetter = sourceIn.get();
				if(currentLetter == '/')
					break;
			}
			if(currentLetter == '\n' || currentLetter == EOF) 
			{
				Error(3);
				break;
			}
		}
	}
	else
	{
		currentToken.name[0] = '/';
		currentToken.addr = addr_count++;
		currentToken.code = 39;
		currentToken.label = label_count++;
		Output();
	}
}//function IsNotes() end

void IsChar()
{		
	int index = 0;
	for(;;)
	{
		currentLetter = sourceIn.get();
		if(currentLetter == '\'')
			break;
		currentToken.name[index++] = currentLetter;
	}

	currentToken.label = label_count++;
	currentToken.code = 30;                      //字符常数
	currentToken.addr = addr_count++;
	currentLetter = sourceIn.get();
	Output();
}// function IsChar() end

void IsOther()
{
	char oldLetter = currentLetter;
	switch(currentLetter)
	{
	case '(':
		currentToken.name[0] = currentLetter;
		currentToken.addr = addr_count++;
		currentToken.label = label_count++;
		currentToken.code = 32;
		currentLetter = sourceIn.get();
		Output();
		break;
	case ')':
		currentToken.name[0] = currentLetter;
		currentToken.addr = addr_count++;
		currentToken.label = label_count++;
		currentToken.code = 33;
		currentLetter = sourceIn.get();
		Output();
		break;
	case '*':
		currentToken.name[0] = currentLetter;
		currentToken.addr = addr_count++;
		currentToken.label = label_count++;
		currentToken.code = 34;
		currentLetter = sourceIn.get();
		Output();
		break;
	case '+':
		currentToken.name[0] = currentLetter;
		currentToken.addr = addr_count++;
		currentToken.label = label_count++;
		currentToken.code = 35;
		currentLetter = sourceIn.get();
		Output();
		break;
	case '-':
		currentToken.name[0] = currentLetter;
		currentToken.addr = addr_count++;
		currentToken.label = label_count++;
		currentToken.code = 36;
		currentLetter = sourceIn.get();
		Output();
		break;
	case ',':
		currentToken.name[0] = currentLetter;
		currentToken.addr = addr_count++;
		currentToken.label = label_count++;
		currentToken.code = 37;
		currentLetter = sourceIn.get();
		Output();
		break;
	case '.':
		currentToken.name[0] = currentLetter;
		currentToken.addr = addr_count++;
		currentToken.label = label_count++;
		currentToken.code = 38;
		currentLetter = sourceIn.get();
		Output();
		break;
	case ':':
		
		currentLetter = sourceIn.get();
		if(currentLetter == '=')
		{
			currentToken.name[0] = oldLetter;
			currentToken.name[1] = currentLetter;
			currentToken.addr = addr_count++;
			currentToken.code = 41;
			currentToken.label = label_count++;
			currentLetter = sourceIn.get();
		}
		else
		{
			currentToken.name[0] = oldLetter;
			currentToken.addr = addr_count++;
			currentToken.label = label_count++;
			currentToken.code = 40;
		}
		Output();
		break;
	case ';':
		currentToken.name[0] = currentLetter;
		currentToken.addr = addr_count++;
		currentToken.label = label_count++;
		currentToken.code = 42;
		currentLetter = sourceIn.get();
		Output();
		break;
	case '<':
		currentLetter = sourceIn.get();
		if(currentLetter == '=')
		{
			currentToken.name[0] = oldLetter;
			currentToken.name[1] = currentLetter;
			currentToken.addr = addr_count++;
			currentToken.code = 44;
			currentToken.label = label_count++;
			currentLetter = sourceIn.get();
		}
		if(currentLetter == '>')
		{
			currentToken.name[0] = oldLetter;
			currentToken.name[1] = currentLetter;
			currentToken.addr = addr_count++;
			currentToken.code = 45;
			currentToken.label = label_count++;
			currentLetter = sourceIn.get();
		}
		else
		{
			currentToken.name[0] = oldLetter;
			currentToken.addr = addr_count++;
			currentToken.label = label_count++;
			currentToken.code = 43;
		}
		Output();
		break;
	case '=':
		currentToken.name[0] = currentLetter;
		currentToken.addr = addr_count++;
		currentToken.label = label_count++;
		currentToken.code = 46;
		currentLetter = sourceIn.get();
		Output();
		break;
	case '>':
		currentLetter = sourceIn.get();
		if(currentLetter == '=')
		{
			currentToken.name[0] = oldLetter;
			currentToken.name[1] = currentLetter;
			currentToken.addr = addr_count++;
			currentToken.code = 48;
			currentToken.label = label_count++;
			currentLetter = sourceIn.get();
		}
		else
		{
			currentToken.name[0] = oldLetter;
			currentToken.addr = addr_count++;
			currentToken.label = label_count++;
			currentToken.code = 47;
		}
		Output();
		break;
	case 10:
	case 13:
		lineOfProc++;
		currentLetter = sourceIn.get();
		break;
	case ' ':
	case '\t':
		currentLetter = sourceIn.get();
		break;
	default:
		Error(1);
		currentLetter = sourceIn.get();
		break;
	}
}    //function IsOther() end

void Output()
{
	if(currentToken.code == 27 || currentToken.code == 28 ||
		currentToken.code == 29 || currentToken.code == 30)
	{
		bool flag = symbleExist();
		if(!flag)
		{
			symbleOut << setw(4) <<currentToken.addr;
			symbleOut << setw(6) << currentToken.code;
			symbleOut << "    " <<currentToken.name;
			symbleOut << endl;
		}
	}
	int length = 0;
	while(currentToken.name[length] != '\0')
	{
		length++;
	}
	tokenOut << setw(4) << currentToken.label;
	cout << setw(4) << currentToken.label;
	for(int i = length; i < 30; i++)
	{
		tokenOut << ' ';
		cout << ' ';
	}
	tokenOut << currentToken.name;
	cout << currentToken.name;
	tokenOut << setw(4) << currentToken.addr;
	tokenOut << setw(4) << currentToken.code;// << endl;
	tokenOut << setw(4) << currentToken.line << endl;

	cout << setw(4) << currentToken.addr;
	cout << setw(4) << currentToken.code;// << endl;
	cout << setw(4) << currentToken.line << endl;
	//cout << tokenOut.tellp() << endl;
}   //function Output() end

bool symbleExist()
{
	for(int i = 0; i < var_count; i++)
	{
		if(strcmp(currentToken.name, symbleList[i].name) == 0)
		{
			currentToken.addr = symbleList[i].addr;
			return true;
		}
	}
	strcpy(symbleList[var_count].name, currentToken.name);
	symbleList[var_count].code = currentToken.code;
	symbleList[var_count].addr = currentToken.addr;
	var_count++;
	return false;
} //function symbleExist() end

void Error(int number)
{
	err_count++;
	switch(number)
	{
	case 1:
		cout << "error " << err_count << ":";
		cout << "非法字符出现在第" << lineOfProc <<"行" << endl;
		break;
	case 2:
		cout << "error " << err_count << ":";
		cout << "实常数出错于第" << lineOfProc << "行" << endl;
		break;
	case 3:
		cout << "error " << err_count << ":";
		cout << "第" << lineOfProc << "行没有匹配注释符 */" << endl;
		break;
	default:
		break;
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -