📄 lexdef.h
字号:
for (nfa_state_list::iterator p = this_map[i+states].begin();
p != this_map[i+states].end(); p++){
p->state += states;
}
}
this_map[i+states] = mapping[i];
for (nfa_state_list::iterator p = this_map[i+states].begin();
p != this_map[i+states].end(); p++){
p->state = i+states;
}
node.transition = CHARSTATE_EPSLONG;
node.state = terminal;
node.istrmnl = false;
this_map[i+states].insert(this_map[i+states].end(), node);
}
*/
}
break;
default:
break;
}
}
//获得这二个NFA所有的边上的字符的集合
void NFA::GetCharset(CHARSET *char_set){
set<char> temp;
char_set->set = new char[256];
for (int i = 0; i < GetStateCount(); i ++){
for (nfa_state_list::iterator p = this_map[i].begin();
p!=this_map[i].end(); p++){
if (p->transition!=CHARSTATE_EPSLONG){
temp.insert(p->transition);
}
}
}
int count = 0;
for (set<char>::iterator p = temp.begin();
p!=temp.end(); p++, count++){
char_set->set[count] = *p;
}
char_set->count = count;
/*
输出字符集看看对不对~
cout<<"charcount: "<<count<<endl;
for (int i = 0; i < count; i ++){
cout<<char_set->set[i]<<',';
}
cout<<endl;
*/
}
int NFA::GetStateCount(){
return state_count;
}
void NFA::InitAttributes(){
this->terminal = 0;
this->state_count = 1;
}
state_set NFA::GetFirstStateset(){
state_set tmpset;
tmpset.insert(0);
tmpset = GetEpslongTranStateset(tmpset);
tmpset.insert(0);
return tmpset;
}
state_set NFA::GetTranStateset(state_set &s_set, char transt){
//本函数中注释为性能测试代码
//DWORD start = GetTickCount();
state_set set1 = GetNonepslongTranStateset(s_set, transt);
state_set set2 = GetEpslongTranStateset(set1);
//cout<<"\t\tGetTranStateset() "<<GetTickCount()-start<<" ms"<<endl;
return set1+set2;
}
state_set NFA::GetNonepslongTranStateset(state_set &s_set, char transt){
state_set tmpset;
for (state_set::iterator p = s_set.begin(); p!=s_set.end(); p++){
for (nfa_state_list::iterator pp = this_map[*p].begin();pp != this_map[*p].end();pp++){
if (pp->transition==transt){
tmpset.insert(pp->state);
}
}
}
/*
测试一下这个转换对不对,把转换前后的集合输出
cout<<"s_set:\n";
for (state_set::iterator p = s_set.begin(); p!=s_set.end(); p++){
cout<<*p<<',';
}
cout<<endl;
cout<<"'"<<transt<<"'"<<endl;
for (state_set::iterator p = tmpset.begin(); p!=tmpset.end(); p++){
cout<<*p<<',';
}
cout<<endl;
对头~
*/
return tmpset;
}
state_set NFA::GetEpslongTranStateset(state_set &s_set){
state_set tmpset;
for (state_set::iterator p = s_set.begin(); p!=s_set.end(); p++){
for (nfa_state_list::iterator pp = this_map[*p].begin();pp != this_map[*p].end();pp++){
if (pp->transition==CHARSTATE_EPSLONG){
tmpset.insert(pp->state);
}
}
}
for (p = tmpset.begin(); p!=tmpset.end(); p++){
for (nfa_state_list::iterator pp = this_map[*p].begin();pp != this_map[*p].end();pp++){
if (pp->transition==CHARSTATE_EPSLONG){
tmpset.insert(pp->state);
}
}
}
/*
测试一下这个转换对不对,把转换前后的集合输出
cout<<"s_set:\n";
for (state_set::iterator p = s_set.begin(); p!=s_set.end(); p++){
cout<<*p<<',';
}
cout<<endl;
cout<<"'epslong'"<<endl;
for (state_set::iterator p = tmpset.begin(); p!=tmpset.end(); p++){
cout<<*p<<',';
}
cout<<endl;
对头~
*/
return tmpset;
}
DFANODE NFA::GetFirstDFANode(){
return GetDFANodeFromStateset
(GetFirstStateset());
}
DFANODE NFA::GetNextDFANode(DFANODE &node, char transt){
//此函数被DFA::DFA(NFA&)调用
//此函数性能瓶颈在GetDFANodeFromStateset一步
return GetDFANodeFromStateset
(GetTranStateset(node.s_set, transt));
}
DFANODE NFA::GetDFANodeFromStateset(state_set &s_set){
DFANODE node;
node.s_set = s_set;
node.istrmnl = false;
node.iskeywd = false;
//经过确定最终确定这个是性能瓶颈,此函数被
//DFANODE NFA::GetNextDFANode(DFANODE &node, char transt)
//调用
//DWORD start = GetTickCount();
//DWORD last = GetTickCount();
for (state_set::iterator p = s_set.begin(); p!=s_set.end(); p++){
//下面这个循环就是性能瓶颈
//fuck, nfa的数据结构出的问题,每次都要遍历整个邻接表,太SB了,BS我自己
//cout<<"\t\t\tstate_set::iterator "<<GetTickCount()-last<<" ms"<<endl; //性能测试
/*
其实不用都看一遍的,按照我NFA表的数据结构,只看头尾应该就够了,注意,是应该
我没有从数学上证明,如果出现了错误,要把这段代码用上的~
///////////////////////////////////////////该死的没错的超慢的算法
for (int i = 0; i < GetStateCount(); i ++){
for (nfa_state_list::iterator pp = this_map[i].begin(); pp!=this_map[i].end(); pp++){
if(pp->state==*p){
//fuck!!!!! 为什么这里写if(pp->istrmnl)就不对!!啊!!!
//一定要写if(pp->istrmnl==true),omg, have mercy!我承受能力很差的!!
if (pp->istrmnl == true){
if (node.istrmnl==false){
node.istrmnl = true;
node.iskeywd = pp->iskeywd;
node.action = pp->action;
break;
}
else{
if ((node.iskeywd==false) &&
(pp->iskeywd==true) &&
(pp->istrmnl==true)){
node.iskeywd = pp->iskeywd;
node.action = pp->action;
break;
}
}
}
}
}
}
*/
//cout<<"\t\t\tfor_one_cycle "<<GetTickCount()-last<<" ms"<<endl;
//last = GetTickCount();
//改进后的代码,从散列映射表中直接查询状态对应的信息
//再也不用每次都遍历该死的链表了,万岁~~~
if (state_map[*p].istrmnl==true){
if (node.istrmnl==false){
node.istrmnl = true;
node.iskeywd = state_map[*p].iskeywd;
node.action = state_map[*p].action;
}
else{
if ((node.iskeywd==false) &&
(state_map[*p].iskeywd==true) &&
(state_map[*p].istrmnl==true)){
node.iskeywd = true;
node.action = state_map[*p].action;
}
}
}
}
//cout<<"\t\tGetDFANodeFromStateset() "<<GetTickCount()-start<<" ms"<<endl;
/*
输出状态集的状态
cout<<endl;
for (p = s_set.begin();p!=s_set.end(); p++){
cout<<*p<<',';
}
cout<<endl;
cout<<((node.istrmnl==true)?"terminal":"nonterminal")<<endl;
if (node.istrmnl){
cout<<((node.iskeywd)?"keyword":"nonkeyword")<<endl;
cout<<"action:"<<node.action<<endl;
}
正确了~
*/
return node;
}
void NFA::Output(const string &title, NFA::OUTPUT_MODE mode){
switch(mode){
case NFA::OUT_TO_CONSOLE:
{
cout<<endl<<title<<endl;
for (int i = 0; i < GetStateCount(); i ++){
cout.width(5);cout.setf(ios::left);cout<<i;
for (nfa_state_list::iterator p = this_map[i].begin();
p != this_map[i].end(); p++){
cout<<p->transition<<','<<p->state;
if (p->istrmnl == true){
cout<<",T,";
if (p->iskeywd) cout<<"K,";
else cout<<"NK,";
//cout<<p->action; //打印动作(动作好长……格式会乱)
}
cout<<' ';
}
cout<<endl;
}
cout<<"state count: "<<state_count<<endl;
cout<<"terminal: "<<terminal<<endl;
}
break;
case NFA::OUT_TO_FILE:
break;
default:
break;
}
}
void NFA::BuildStateMap(){
for(int i = 0; i < GetStateCount(); i ++){
for(nfa_state_list::iterator p = this_map[i].begin(); p!=this_map[i].end(); p++){
state_map[p->state] = *p;
}
}
}
DFAStateMap::DFAStateMap(){
this->map_count = 0;
this->last_index = 0;
}
//直接调用成员变量的对应重载运算符实现,主要为了方便
DFANODE DFAStateMap::operator[](int index){
return this_map[index];
}
void DFAStateMap::insert(DFANODE &node){
this_map[++map_count] = node;
last_index = map_count;
}
bool DFAStateMap::has(const DFANODE &node){
int s_set_size = node.s_set.size();
for (int i = 1; i <= map_count; i ++){
if (this_map[i].s_set.size() == s_set_size){
last_index = i;
if (this_map[i].s_set==node.s_set) return true;
}
}
return false;
}
int DFAStateMap::GetLastIndex(){
return last_index;
}
int DFAStateMap::GetMapCount(){
return map_count;
}
/*
根据NFA构造DFA,呵呵,写在DFA构造函数里可以让主程序更加容易读懂
具体怎么实现,DFA臃肿点也没关系的啦~!
*/
DFA::DFA(NFA &nfa){
//NFA不能是空的(不会有人这么无聊拿空的NFA构造吧?)
if (nfa.GetStateCount()>1){
CHARSET char_set;
nfa.GetCharset(&char_set);
/*
这段代码可以测试一下
void DFAStateMap::insert(state_set &s_set) 和
bool DFAStateMap::has(const state_set &s_set)
state_set set1;
set1.insert(set1.end(), 1);
DFAStateMap map1;
map1.insert(set1);
state_set set2;
state_set set3;
set3.insert(set1.end(), 1);
set3.insert(set1.end(), 2);
cout<<((map1.has(set1)==true)?"true":"false")<<endl;
cout<<((map1.has(set2)==true)?"true":"false")<<endl;
cout<<((map1.has(set3)==true)?"true":"false")<<endl;
结果正确
*/
DFANODE node;
node = nfa.GetFirstDFANode();
state_map.insert(node);
int statecount = state_map.GetMapCount();
for (int i = 1; i <= statecount; i++){
//DWORD start1 = GetTickCount();
for (int j = 0; j < char_set.count; j++){
//DWORD start2 = GetTickCount();
//经过测试下面这个调用是性能瓶颈,靠,果然是我自己算法磋
node = nfa.GetNextDFANode(state_map[i], char_set.set[j]);
//cout<<"\tGetNextDFANode "<<GetTickCount()-start2<<" ms"<<endl;
//cout.flush();
if (node.s_set.size()){
if (state_map.has(node)){
//DWORD start3 = GetTickCount();
//cout<<"existing...now i = "<<i<<endl; //这是无聊的测试代码
//cout<<"added "<<i<<"->"<<char_set.set[j]<<","<<state_map.GetLastIndex()<<endl;
this_map[i][char_set.set[j]] = state_map.GetLastIndex();
//cout<<"\thas "<<GetTickCount()-start3<<" ms"<<endl;
//cout.flush();
}
else{
//DWORD start3 = GetTickCount();
//cout<<"insertion! now i = "<<i<<endl;
state_map.insert(node);
//cout<<"added "<<i<<"->"<<char_set.set[j]<<","<<state_map.GetLastIndex()<<endl;
this_map[i][char_set.set[j]] = state_map.GetLastIndex();
//cout<<"\t!has "<<GetTickCount()-start3<<" ms"<<endl;
//cout.flush();
}
}
}
statecount = state_map.GetMapCount();
//cout<<"for "<<GetTickCount()-start1<<" ms"<<endl;
//cout.flush();
/*
看看生成的DFA状态对不对
cout<<i<<((state_map[i].istrmnl)?" terminal":" nonterminal")<<endl;
if (state_map[i].istrmnl==true){
cout<<state_map[i].action<<endl;
}
对头~
*/
}
//Output("traditional output", OUT_TO_CONSOLE); //输出DFA到控制台
}
}
void DFA::Minimize(){
;
}
/*
使用这个输出DFA到:
1.文件,这样把DFA信息和词法分析程序代码分离,避免过于冗长的代码
输出至dfainfo.bin, 其中第一行为title
2.控制台,用于程序调试
输出格式为:
title
DFA邻接表
*/
void DFA::Output(const string &title, DFA::OUTPUT_MODE mode){
switch(mode){
case DFA::OUT_TO_CONSOLE:
{
cout<<title<<endl;
cout<<"state count: "<<this_map.size()<<endl;
for (dfa_map::iterator p = this_map.begin();
p!=this_map.end(); p++){
cout.width(5);
cout.setf(ios::left);
cout<<p->first;
int index = p->first;
state_set tmpset = state_map[index].s_set;
cout<<"states: "<<tmpset.size()<<endl;
for (state_set::iterator ppp = tmpset.begin();
ppp != tmpset.end(); ppp++){
cout<<*ppp<<',';
}
cout<<endl;
for (hash_map<char, int>::iterator pp = p->second.begin();
pp!=p->second.end(); pp++){
cout<<pp->first<<','<<pp->second;
cout<<((state_map[pp->second].istrmnl)?"T":"NT");
cout<<' ';
}
cout<<endl;
}
cout<<endl;
}
break;
case DFA::OUT_TO_FILE:
{
ofstream fout;
fout.open(title.c_str(), ios::binary);
for (dfa_map::iterator p = this_map.begin(); p!=this_map.end();p++){
for (hash_map<char, int>::iterator pp = this_map[p->first].begin();
pp != this_map[p->first].end();pp++){
fout<<BYTE(p->first)<<BYTE(pp->first)<<BYTE(pp->second);
}
}
fout<<BYTE(0);
fout<<BYTE(state_map.GetMapCount());
for (int i = 1; i <= state_map.GetMapCount(); i ++){
if (state_map[i].istrmnl == true)
fout<<BYTE(1);
else
fout<<BYTE(0);
fout<<state_map[i].action;
fout<<endl;
}
fout.close();
}
break;
case OUT_TO_CODE:
{
fout<<endl<<"//"<<title<<endl;
fout<<"void ProcessEndState(int state){"<<endl;
fout<<"\tswitch(state){\n";
for (int i = 1; i <= state_map.GetMapCount(); i ++){
if (state_map[i].istrmnl == true){
fout<<"\t\tcase "<<i<<":"<<state_map[i].action<<"break;\n";
}
}
fout<<"\t\tdefault:{ReportParseError();}break;\n";
fout<<"\t}"<<endl;
fout<<"}"<<endl;
}
break;
default:
break;
}
}
state_set operator+(state_set &a, state_set &b){
state_set retval;
for (state_set::iterator p = a.begin(); p!=a.end(); p++){
retval.insert(*p);
}
for (p = b.begin(); p!=b.end();p++){
retval.insert(*p);
}
return retval;
}
bool operator==(state_set &a, state_set &b){
int size_a = a.size();
int size_b = b.size();
if (size_a!=size_b) return false;
for (state_set::iterator p = a.begin(); p!=a.end();p++){
if (!(*b.find(*p)==*p)) return false;
}
return true;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -