⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexdef.h

📁 基于C++的编译器词法分析模块生成器[Lex]
💻 H
📖 第 1 页 / 共 2 页
字号:
						for (nfa_state_list::iterator p = this_map[i+states].begin();
							p != this_map[i+states].end(); p++){
								p->state += states;
						}
					}
					this_map[i+states] = mapping[i];
					for (nfa_state_list::iterator p = this_map[i+states].begin();
						p != this_map[i+states].end(); p++){
							p->state = i+states;
					}
					node.transition = CHARSTATE_EPSLONG;
					node.state = terminal;
					node.istrmnl = false;
					this_map[i+states].insert(this_map[i+states].end(), node);
				}
				*/
			}
			break;
		default:
			break;
	}
}
//获得这二个NFA所有的边上的字符的集合
void NFA::GetCharset(CHARSET *char_set){
	set<char> temp;
	char_set->set = new char[256];
	for (int i = 0; i < GetStateCount(); i ++){
		for (nfa_state_list::iterator p = this_map[i].begin();
			p!=this_map[i].end(); p++){
				if (p->transition!=CHARSTATE_EPSLONG){
					temp.insert(p->transition);
				}
			}
	}
	int count = 0;
	for (set<char>::iterator p = temp.begin(); 
		p!=temp.end(); p++, count++){
			char_set->set[count] = *p;
	}
	char_set->count = count;
	/*
	输出字符集看看对不对~
	cout<<"charcount: "<<count<<endl;
	for (int i = 0; i < count; i ++){
		cout<<char_set->set[i]<<',';
	}
	cout<<endl;
	*/
}
int NFA::GetStateCount(){
	return state_count;
}
void NFA::InitAttributes(){
	this->terminal = 0;
	this->state_count = 1;
}
state_set NFA::GetFirstStateset(){
	state_set tmpset;
	tmpset.insert(0);
	tmpset = GetEpslongTranStateset(tmpset);
	tmpset.insert(0);
	return tmpset;
}
state_set NFA::GetTranStateset(state_set &s_set, char transt){
	//本函数中注释为性能测试代码
	//DWORD start = GetTickCount();
	state_set set1 = GetNonepslongTranStateset(s_set, transt);
	state_set set2 = GetEpslongTranStateset(set1);
	//cout<<"\t\tGetTranStateset() "<<GetTickCount()-start<<" ms"<<endl;
	return set1+set2;
}
state_set NFA::GetNonepslongTranStateset(state_set &s_set, char transt){
	state_set tmpset;
	for (state_set::iterator p = s_set.begin(); p!=s_set.end(); p++){
		for (nfa_state_list::iterator pp = this_map[*p].begin();pp != this_map[*p].end();pp++){
			if (pp->transition==transt){
				tmpset.insert(pp->state);
			}
		}
	}

	/*
	测试一下这个转换对不对,把转换前后的集合输出
	cout<<"s_set:\n";
	for (state_set::iterator p = s_set.begin(); p!=s_set.end(); p++){
		cout<<*p<<',';
	}
	cout<<endl;
	cout<<"'"<<transt<<"'"<<endl;
	for (state_set::iterator p = tmpset.begin(); p!=tmpset.end(); p++){
		cout<<*p<<',';
	}
	cout<<endl;
	对头~
	*/

	return tmpset;
}
state_set NFA::GetEpslongTranStateset(state_set &s_set){
	state_set tmpset;
	for (state_set::iterator p = s_set.begin(); p!=s_set.end(); p++){
		for (nfa_state_list::iterator pp = this_map[*p].begin();pp != this_map[*p].end();pp++){
			if (pp->transition==CHARSTATE_EPSLONG){
				tmpset.insert(pp->state);
			}
		}
	}
	for (p = tmpset.begin(); p!=tmpset.end(); p++){
		for (nfa_state_list::iterator pp = this_map[*p].begin();pp != this_map[*p].end();pp++){
			if (pp->transition==CHARSTATE_EPSLONG){
				tmpset.insert(pp->state);
			}
		}
	}
	
	/*
	测试一下这个转换对不对,把转换前后的集合输出
	cout<<"s_set:\n";
	for (state_set::iterator p = s_set.begin(); p!=s_set.end(); p++){
		cout<<*p<<',';
	}
	cout<<endl;
	cout<<"'epslong'"<<endl;
	for (state_set::iterator p = tmpset.begin(); p!=tmpset.end(); p++){
		cout<<*p<<',';
	}
	cout<<endl;
	对头~
	*/

	return tmpset;
}
DFANODE NFA::GetFirstDFANode(){
	return GetDFANodeFromStateset
		(GetFirstStateset());
}
DFANODE NFA::GetNextDFANode(DFANODE &node, char transt){
	//此函数被DFA::DFA(NFA&)调用
	//此函数性能瓶颈在GetDFANodeFromStateset一步
	return GetDFANodeFromStateset
		(GetTranStateset(node.s_set, transt));
}
DFANODE NFA::GetDFANodeFromStateset(state_set &s_set){
	DFANODE node;
	node.s_set = s_set;
	node.istrmnl = false;
	node.iskeywd = false;

	//经过确定最终确定这个是性能瓶颈,此函数被
	//DFANODE NFA::GetNextDFANode(DFANODE &node, char transt)
	//调用
	//DWORD start = GetTickCount();
	//DWORD last = GetTickCount();
	for (state_set::iterator p = s_set.begin(); p!=s_set.end(); p++){
		//下面这个循环就是性能瓶颈
		//fuck, nfa的数据结构出的问题,每次都要遍历整个邻接表,太SB了,BS我自己
		//cout<<"\t\t\tstate_set::iterator "<<GetTickCount()-last<<" ms"<<endl;	//性能测试

		/*
		其实不用都看一遍的,按照我NFA表的数据结构,只看头尾应该就够了,注意,是应该
		我没有从数学上证明,如果出现了错误,要把这段代码用上的~
		///////////////////////////////////////////该死的没错的超慢的算法
		for (int i = 0; i < GetStateCount(); i ++){
			for (nfa_state_list::iterator pp = this_map[i].begin(); pp!=this_map[i].end(); pp++){
				if(pp->state==*p){
					//fuck!!!!! 为什么这里写if(pp->istrmnl)就不对!!啊!!!
					//一定要写if(pp->istrmnl==true),omg, have mercy!我承受能力很差的!!
					if (pp->istrmnl == true){
						if (node.istrmnl==false){
							node.istrmnl = true;
							node.iskeywd = pp->iskeywd;
							node.action = pp->action;
							break;
						}
						else{
							if ((node.iskeywd==false) && 
								(pp->iskeywd==true) && 
								(pp->istrmnl==true)){
								node.iskeywd = pp->iskeywd;
								node.action = pp->action;
								break;
							}
						}
					}
				}				
			}
		}
		*/

		//cout<<"\t\t\tfor_one_cycle "<<GetTickCount()-last<<" ms"<<endl;
		//last = GetTickCount();

		//改进后的代码,从散列映射表中直接查询状态对应的信息
		//再也不用每次都遍历该死的链表了,万岁~~~
		if (state_map[*p].istrmnl==true){
			if (node.istrmnl==false){
				node.istrmnl = true;
				node.iskeywd = state_map[*p].iskeywd;
				node.action = state_map[*p].action;
			}
			else{
				if ((node.iskeywd==false) && 
					(state_map[*p].iskeywd==true) && 
					(state_map[*p].istrmnl==true)){
					node.iskeywd = true;
					node.action = state_map[*p].action;
				}
			}
		}
	}
	//cout<<"\t\tGetDFANodeFromStateset() "<<GetTickCount()-start<<" ms"<<endl;

	/*
	输出状态集的状态
	cout<<endl;
	for (p = s_set.begin();p!=s_set.end(); p++){
		cout<<*p<<',';
	}
	cout<<endl;
	cout<<((node.istrmnl==true)?"terminal":"nonterminal")<<endl;
	if (node.istrmnl){
		cout<<((node.iskeywd)?"keyword":"nonkeyword")<<endl;
		cout<<"action:"<<node.action<<endl;
	}
	正确了~
	*/

	return node;
}
void NFA::Output(const string &title, NFA::OUTPUT_MODE mode){
	switch(mode){
		case NFA::OUT_TO_CONSOLE:
			{
				cout<<endl<<title<<endl;
				for (int i = 0; i < GetStateCount(); i ++){
					cout.width(5);cout.setf(ios::left);cout<<i;
					for (nfa_state_list::iterator p = this_map[i].begin(); 
						p != this_map[i].end(); p++){
						cout<<p->transition<<','<<p->state;
						if (p->istrmnl == true){
							cout<<",T,";
							if (p->iskeywd) cout<<"K,";
							else cout<<"NK,";
							//cout<<p->action;		//打印动作(动作好长……格式会乱)
						}
						cout<<' ';
					}
					cout<<endl;
				}
				cout<<"state count: "<<state_count<<endl;
				cout<<"terminal: "<<terminal<<endl;
			}
			break;
		case NFA::OUT_TO_FILE:
			break;
		default:
			break;
	}
}
void NFA::BuildStateMap(){
	for(int i = 0; i < GetStateCount(); i ++){
		for(nfa_state_list::iterator p = this_map[i].begin(); p!=this_map[i].end(); p++){
			state_map[p->state] = *p;
		}
	}
}
DFAStateMap::DFAStateMap(){
	this->map_count = 0;
	this->last_index = 0;
}
//直接调用成员变量的对应重载运算符实现,主要为了方便
DFANODE DFAStateMap::operator[](int index){
	return this_map[index];
}
void DFAStateMap::insert(DFANODE &node){
	this_map[++map_count] = node;
	last_index = map_count;
}
bool DFAStateMap::has(const DFANODE &node){
	int s_set_size = node.s_set.size();
	for (int i = 1; i <= map_count; i ++){
		if (this_map[i].s_set.size() == s_set_size){
			last_index = i;
			if (this_map[i].s_set==node.s_set) return true;
		}
	}
	return false;
}
int DFAStateMap::GetLastIndex(){
	return last_index;
}
int DFAStateMap::GetMapCount(){
	return map_count;
}
/*
根据NFA构造DFA,呵呵,写在DFA构造函数里可以让主程序更加容易读懂
具体怎么实现,DFA臃肿点也没关系的啦~!
*/
DFA::DFA(NFA &nfa){
	//NFA不能是空的(不会有人这么无聊拿空的NFA构造吧?)
	if (nfa.GetStateCount()>1){

		CHARSET char_set;
		nfa.GetCharset(&char_set);

		/*
		这段代码可以测试一下
		void DFAStateMap::insert(state_set &s_set) 和
		bool DFAStateMap::has(const state_set &s_set)

		state_set set1;
		set1.insert(set1.end(), 1);
		DFAStateMap map1;
		map1.insert(set1);

		state_set set2;

		state_set set3;
		set3.insert(set1.end(), 1);
		set3.insert(set1.end(), 2);

		cout<<((map1.has(set1)==true)?"true":"false")<<endl;
		cout<<((map1.has(set2)==true)?"true":"false")<<endl;
		cout<<((map1.has(set3)==true)?"true":"false")<<endl;
		结果正确
		*/

        DFANODE node;
		node = nfa.GetFirstDFANode();
		state_map.insert(node);
		int statecount = state_map.GetMapCount();
		for (int i = 1; i <= statecount; i++){
			//DWORD start1 = GetTickCount();
			for (int j = 0; j < char_set.count; j++){
				//DWORD start2 = GetTickCount();
				//经过测试下面这个调用是性能瓶颈,靠,果然是我自己算法磋
				node = nfa.GetNextDFANode(state_map[i], char_set.set[j]);
				//cout<<"\tGetNextDFANode "<<GetTickCount()-start2<<" ms"<<endl;
				//cout.flush();
				if (node.s_set.size()){
					if (state_map.has(node)){
						//DWORD start3 = GetTickCount();
						//cout<<"existing...now i = "<<i<<endl;		//这是无聊的测试代码
						//cout<<"added "<<i<<"->"<<char_set.set[j]<<","<<state_map.GetLastIndex()<<endl;
						this_map[i][char_set.set[j]] = state_map.GetLastIndex();
						//cout<<"\thas "<<GetTickCount()-start3<<" ms"<<endl;
						//cout.flush();
					}
					else{
						//DWORD start3 = GetTickCount();
						//cout<<"insertion! now i = "<<i<<endl;
						state_map.insert(node);
						//cout<<"added "<<i<<"->"<<char_set.set[j]<<","<<state_map.GetLastIndex()<<endl;
						this_map[i][char_set.set[j]] = state_map.GetLastIndex();
						//cout<<"\t!has "<<GetTickCount()-start3<<" ms"<<endl;
						//cout.flush();
					}
				}
			}
			statecount = state_map.GetMapCount();
			//cout<<"for "<<GetTickCount()-start1<<" ms"<<endl;
			//cout.flush();
			/*
			看看生成的DFA状态对不对
			cout<<i<<((state_map[i].istrmnl)?" terminal":" nonterminal")<<endl;
			if (state_map[i].istrmnl==true){
				cout<<state_map[i].action<<endl;
			}
			对头~
			*/
		}

		//Output("traditional output", OUT_TO_CONSOLE);		//输出DFA到控制台
	}
}
void DFA::Minimize(){
	;
}
/*
使用这个输出DFA到:
1.文件,这样把DFA信息和词法分析程序代码分离,避免过于冗长的代码
输出至dfainfo.bin, 其中第一行为title
2.控制台,用于程序调试
输出格式为:
title
DFA邻接表
*/
void DFA::Output(const string &title, DFA::OUTPUT_MODE mode){
	switch(mode){
		case DFA::OUT_TO_CONSOLE:
			{
				cout<<title<<endl;
				cout<<"state count: "<<this_map.size()<<endl;
				for (dfa_map::iterator p = this_map.begin();
					p!=this_map.end(); p++){
						cout.width(5);
						cout.setf(ios::left);
						cout<<p->first;

						int index = p->first;
						state_set tmpset = state_map[index].s_set;
						cout<<"states: "<<tmpset.size()<<endl;
						for (state_set::iterator ppp = tmpset.begin();
							ppp != tmpset.end(); ppp++){
								cout<<*ppp<<',';
							}
						cout<<endl;

						for (hash_map<char, int>::iterator pp = p->second.begin();
							pp!=p->second.end(); pp++){
								cout<<pp->first<<','<<pp->second;
								cout<<((state_map[pp->second].istrmnl)?"T":"NT");
								cout<<' ';
							}
						cout<<endl;
					}
				cout<<endl;
			}
			break;
		case DFA::OUT_TO_FILE:
			{
				ofstream fout;
				fout.open(title.c_str(), ios::binary);
				for (dfa_map::iterator p = this_map.begin(); p!=this_map.end();p++){
					for (hash_map<char, int>::iterator pp = this_map[p->first].begin();
						pp != this_map[p->first].end();pp++){
						fout<<BYTE(p->first)<<BYTE(pp->first)<<BYTE(pp->second);
					}
				}
				fout<<BYTE(0);
				fout<<BYTE(state_map.GetMapCount());
				for (int i = 1; i <= state_map.GetMapCount(); i ++){
					if (state_map[i].istrmnl == true)
						fout<<BYTE(1);
					else
						fout<<BYTE(0);
					fout<<state_map[i].action;
					fout<<endl;
				}
				fout.close();
			}
			break;
		case OUT_TO_CODE:
			{
				fout<<endl<<"//"<<title<<endl;
				fout<<"void ProcessEndState(int state){"<<endl;
				fout<<"\tswitch(state){\n";
				for (int i = 1; i <= state_map.GetMapCount(); i ++){
					if (state_map[i].istrmnl == true){
						fout<<"\t\tcase "<<i<<":"<<state_map[i].action<<"break;\n";
					}
				}
				fout<<"\t\tdefault:{ReportParseError();}break;\n";
				fout<<"\t}"<<endl;
				fout<<"}"<<endl;
			}
			break;
		default:
			break;
	}
}

state_set operator+(state_set &a, state_set &b){
	state_set retval;

	for (state_set::iterator p = a.begin(); p!=a.end(); p++){
		retval.insert(*p);
	}
	for (p = b.begin(); p!=b.end();p++){
		retval.insert(*p);
	}

	return retval;
}
bool operator==(state_set &a, state_set &b){
	int size_a = a.size();
	int size_b = b.size();
	if (size_a!=size_b) return false;
	for (state_set::iterator p = a.begin(); p!=a.end();p++){
		if (!(*b.find(*p)==*p)) return false;
	}
	return true;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -