📄 lex09005426.cpp

📁 基于C++的编译器词法分析模块生成器[Lex]
💻 CPP
字号:
// Lex09005426.cpp : Defines the entry point for the console application.
//
/*
我们伟大的Lex09005426
我们伟大的小组~

组长：09005426 杨梦冬
成员：09005423 陈龙
      09005424 孔祥龙
*/
#include "stdafx.h"
#include "lexdef.h"

int lineno = 0;

void ParseHeader();		//解析文件头(其实就是把这段内容原封不动地直接输入到源文件中)
void ParseCharset();	//解析字符集的定义(生成字符集)，例如number [0-9]
void ParseRESegment();	//解析规则-动作段
void ParseCustomFuncSegment();	//用户自定义函数段的扫描，直接输出
int CheckSpecsign(char c);
void ParseCharsetDef(string &s, CHARSET *set);	//解析字符集定义段，并加入到标号-字符集映射中
char GetTransMeanChar(char c);			//获取转义字符

int _tmain(int argc, _TCHAR* argv[])
{
	
	string s;
	if (argc == 1){
		//cin>>s;
		fin.open("lex.l", ios::in);
	}
	else{
		fin.open(argv[1], ios::in);
	}
	if (!fin){
		////////////////////////////////////////////////////////////////
		//				Here an error reporting						////
		////////////////////////////////////////////////////////////////
		cerr<<"Can't open source file!\n";
		exit(0);
	}
	fout.open("lex.cpp", ios::out);
	if (!fout){
		////////////////////////////////////////////////////////////////
		//				Here an error reporting						////
		////////////////////////////////////////////////////////////////
		cerr<<"Create out file error!\n";
		exit(0);
	}

	ParseHeader();
	ParseCharset();
	ParseRESegment();
	ParseCustomFuncSegment();

	fin.close();
	fout.close();

	return 0;
}


void ParseHeader(){
	char c=fin.get();
	int state=CheckSpecsign(c);
	//判断开头是不是%{
	if(state!=CHARSTATE_HEADER_BEGIN)
	{
		////////////////////////////////////////////////////////////////
		//				Here an error reporting						////
		////////////////////////////////////////////////////////////////
		cout<<"The input file has no correct formation,Please try again!\n";
		exit(0);		//here we have a modification of the return value of main
	}
	//判断到%}或到文件尾为止进行扫描
	while(!fin.eof()&&state!=CHARSTATE_HEADER_END)
	{
		c=fin.get();
		//if(c=='\t') continue;//跳过\t字符不输出。		<--------不应该抹去TAB,有的人就是喜欢用TAB不用空格，比如我
		if(c=='%'){
			state=CheckSpecsign(c);
			continue;
		}//当接受到%时，注意判断是不是特殊符号
		if(c=='\n') lineno++;//让行号自增，用以判断错误行号。
		fout.put(c);
	}
}
void ParseCharset(){
	int state = CHARSTATE_BEGIN;
	while(!fin.eof()&&(state!=CHARSTATE_LAYER_ID)){
		char c = fin.get();
		while((c==' ')||(c=='\t')||(c=='\n')){
			if (c=='\n') lineno++;
			c = fin.get();
		}
		if (c=='%'){
			state = CheckSpecsign(c);
			if (state == CHARSTATE_ERROR){
				////////////////////////////////////////////////////////////////
				//				Here an error reporting						////
				////////////////////////////////////////////////////////////////
				exit(0);
			}
			continue;
		}
		else{
			fin.unget();
		}
		string name, charsetstr;
		//读入字符集定义
		fin>>name>>charsetstr;
		CHARSET CharSet;
		//解析字符集定义
		ParseCharsetDef(charsetstr, &CharSet);
		//将symbol到字符集的映射加入
		charset[name] = CharSet;

		/*
		以下代码测试一下生成的charset是否正确
		cout<<name<<endl;
		for (int i = 0; i < CharSet.count; i ++){
			cout<<CharSet.set[i]<<',';
		}
		cout<<endl;
		以上代码测试一下生成的charset是否正确，事实上对了
		*/
	}
}
void ParseRESegment(){
	int state = CHARSTATE_BEGIN;
	list<NFA> nfalist;

	DWORD start = GetTickCount();	//性能测试代码
	cout<<"\nBuilding NFA...\n";	//性能测试代码
	cout.flush();
	while(!fin.eof()&&(state!=CHARSTATE_LAYER_ID)){
		char c = fin.get();
		while((c==' ')||(c=='\t')||(c=='\n')){
			if (c=='\n') lineno++;
			c = fin.get();
		}
		if (c=='%'){
			state = CheckSpecsign(c);
			if (state == CHARSTATE_ERROR){
				////////////////////////////////////////////////////////////////
				//				Here an error reporting						////
				////////////////////////////////////////////////////////////////
				exit(0);
			}
			continue;
		}
		else{
			fin.unget();
		}
		string re, action;
		//读入正则表达式
		fin>>re;
		//读入动作
		getline(fin, action);

		/*
		看看读入的两个动作对不对，事实上对了
		cout<<re<<action<<endl;
		*/
		NFA nfa(re, action);
		nfalist.insert(nfalist.end(), nfa);
	}
	cout<<"Build complete, "<<GetTickCount()-start<<" ms\n";	//性能测试代码
	cout.flush();	//性能测试代码


	/*
	测试一下只合并两个NFA的代码
	NFA finalnfa = nfalist.front();
	nfalist.pop_front();
	finalnfa += nfalist.front();
	*/
	/*
	测试完成，该玩点真的了，把所有的NFA都合并，为了好写只做了两个NFA合并的
	运算符重载，用这个函数合并多个NFA也可以，不过合并后有点类似于这个结构：
	if{}
	else{
		if{}
		else{
			if{}
			else{
			}
		}
	}
	表面上看是if, else if, else if, else的平行结构，其实是嵌套多层的，这个
	巨大的NFA也是。没事，这个对性能影响不会很大，管他的……计算机就是为了计算，
	不多来点复杂的，那计算机存在的意义在哪里啊~
	*/
	start = GetTickCount();	//性能测试代码
	cout<<"\nCombining NFAs...\n";	//性能测试代码
	cout.flush();	//性能测试代码
	NFA finalnfa = nfalist.front();
	nfalist.pop_front();
	while (!nfalist.empty()){
		finalnfa += nfalist.front();
		nfalist.pop_front();
	}
	finalnfa.BuildStateMap();
	cout<<"Combination complete, "<<GetTickCount()-start<<" ms\n";	//性能测试代码
	cout.flush();	//性能测试代码

	start = GetTickCount();	//性能测试代码
	cout<<"\nBuilding DFA..."<<endl;	//性能测试代码
	cout.flush();	//性能测试代码
	DFA dfa(finalnfa);
	cout<<char(0x07);	//响一声，注意，这可不是吸费电话!
	cout<<"Build complete, "<<GetTickCount()-start<<" ms\n";	//性能测试代码
	cout.flush();	//性能测试代码

	start = GetTickCount();	//性能测试代码
	cout<<"\nMinimizing DFA..."<<endl;	//性能测试代码
	cout.flush();	//性能测试代码
	dfa.Minimize();
	cout<<"Minimization complete, "<<GetTickCount()-start<<" ms\n";	//性能测试代码
	cout.flush();	//性能测试代码

	//DFA进行编码存入二进制文件
	cout<<"\nWriting DFA into binary code file...\n";
	dfa.Output("dfa.bin", DFA::OUT_TO_FILE);
	//DFA相关的函数写入源代码文件
	cout<<"\nWriting DFA-dependant source code into output code file...\n";
	dfa.Output("DFA terminal state process function", DFA::OUT_TO_CODE);
}
void ParseCustomFuncSegment(){
	while(!fin.eof())
	{
		//if(c=='\t') continue;//跳过\t字符不输出。		<--------不应该抹去TAB,有的人就是喜欢用TAB不用空格，比如我
		char c = fin.get();
		if (c!=-1) fout.put(c);
	}
}

int CheckSpecsign(char c)
{
	if(c=='%')
	{
		char cc=fin.get();
		switch(cc)
		{
		case '%': return CHARSTATE_LAYER_ID;
		case '{': return CHARSTATE_HEADER_BEGIN;
		case '}': return CHARSTATE_HEADER_END;
		default:
			fin.unget();
			break;
		}
	}
	return CHARSTATE_ERROR;
}

void ParseCharsetDef(string &s, CHARSET *set){
	int i = 0;
	int charcount = 0;
	char up, down;
	stack<char> bound;
	//cout<<"Hello World!\n";
	cout.flush();

	/*
	do{
		i++;
		if (i%3==1){
			down = s[i];
			if (down!=']'){
				bound.push(down);
			}
		}
		if (i%3==0){
			up = s[i];
			bound.push(up);
			charcount += (up-down+1);
		}
	}while(s[i]!=']');
	*/

	int length = s.length();
	while(i<length-2){
		i++;
		if (s[i]=='\\'){
			down = GetTransMeanChar(s[++i]);
			bound.push(down);
			//cout<<"transmean down!\n";
			cout.flush();
		}
		else{
			down = s[i];
			bound.push(down);
		}
		i++;
		i++;
		if (s[i]=='\\'){
			up = GetTransMeanChar(s[++i]);
			bound.push(up);
			//cout<<"transmean up!\n";
			cout.flush();
		}
		else{
			up = s[i];
			bound.push(up);
		}
		charcount += up-down+1;
	}
	
	set->count = charcount;
	set->set = new char[charcount];
	int location = 0;
	while (!bound.empty()){
		up = bound.top();
		bound.pop();
		down = bound.top();
		bound.pop();
		for (int i = down; i <= up; i ++, location ++){
			set->set[location] = i;
		}
	}
	/*for (int i = 0; i < set->count; i++){
		if (set->set[i]=='\t') cout<<"\\t"<<endl;
		else cout<<set->set[i]<<' ';
	}*/
}

char GetTransMeanChar(char c){
	switch(c){
		case 't':return '\t';	//制表符'\t'
		case 's':return ' ';	//空格' '
		case 'r':return char(0x0d);	//换行'\r'
		case 'n':return char(0x0a);	//回车'\n'
		case '\\':return '\\';
		default:
			cerr<<"\nError: unrecognizable transmean char!\n";
			exit(1);
	}
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -