📄 myyacc.cpp
字号:
#include<iostream>
#include<iomanip>
#include<fstream>
#include<string>
#include<set>
#include<vector>
#include<stack>
#include<queue>
#include<hash_set>
#include<hash_map>
#include<map>
#include<conio.h>
#include"myyacc.h"
using namespace stdext;
using namespace std;
bool item::end(vector<producer*>& p)
{
if(pp>=p[index]->right.size())return true;
else return false;
}
int item::getcurpath(vector<producer*>& p)
{
return p[index]->right[pp];
}
int item::getnextSym(vector<producer*>& p)
{
return p[index]->right[pp+1];
}
int grammer::strtoint(string curstr)
{
unsigned int size=curstr.size();
if(size==0)return 0;//empty string
else if(size==1)return (int)(curstr[0]);
else return tokenmap[curstr];
}
bool grammer::begin(ifstream& input)
{
realstart=0;
numofleft=-1;
numofproducers=-1;
char cur;
string* bufferIn=new string[BUFSIZE];
string* bufferH=new string[HSIZE];
char* curline=new char[LSIZE];
int line=0;
int index=-1;
//string substr;
do
{
input.getline(curline,LSIZE);
if(curline[0]=='%'&&curline[1]=='%')break;
// cout<<curline<<endl;
++line;
if((curline[0])!='%'){
if(curline[0]=='/'&&curline[1]=='*')continue;
else {
erro(++line);
return false;
}
}
//substr='\0';//作局部处理时用到
if(curline[1]=='{')
{
do{
input.getline(curline,LSIZE);
//cout<<curline<<endl;
++line;
bufferIn[++index]=curline;
}while(curline[0]!='%');
}
else{
string substr;
int i=1;
while(curline[i]!=' ')substr.push_back(curline[i++]);
int r;
if(!substr.compare("union"))r=UNION;
else if(!substr.compare("token"))r=TOKEN;
else if(!substr.compare("type"))r=TYPE;
else if(!substr.compare("nonassoc"))r=NONASSOC;
else if(!substr.compare("left"))r=LEFT;
else if(!substr.compare("right"))r=RIGHT;
switch(r){
case UNION:dealunion(input);break;
case TOKEN:dealtoken(curline);break;
case TYPE:dealtype(curline);break;
case NONASSOC:dealnonassoc(curline);break;
case LEFT:dealleft(curline);break;
case RIGHT:dealright(curline);break;
default:cout<<"unexpressed reserved word"<<endl;
}
}
}while(curline[0]!='%'||curline[1]!='%');
getabh();//生成yy.tab.h文件
do{
input.getline(curline,LSIZE);
if(curline[0]=='%'&&curline[1]=='%')break;
++line;
if(curline[0]=='%'){
int j=0;
string substr;
while(curline[j]!=' ')substr.push_back(curline[++j]);
if(substr.compare("start")==0)realstart=-1;//表示有效,待会在deal中赋值,这样做变态了一点,呵呵,我喜欢这么干
else {
erro(++line);
return false;
}
}
int i=0;
if(curline[0]=='\0')continue;
while(curline[i]==' '||curline[i]=='\t')++i;
if(curline[i]==';')continue;
if(curline[i]=='|')//是右步
dealproducer(0,curline,i);
else dealproducer(1,curline,i);
}while(curline[0]!='%'||curline[1]!='%'||!input.eof());
//producers are all droped out now
//Begin our creating of table for driving the compiler
first();
creatLR();
creattable();
generatecode();
return true;
}
bool grammer::dealproducer(int type,string s,int start)//用type来表示是有无left的传进来
{
//string subs;
int i=start;
int size=s.size();
int curright;
producer* t=new producer();
++numofproducers;
if(type>0){//是由left的
string subs;
while(i<size&&s[i]!=' ')subs.push_back(s[i++]);
if(lefts.find(subs)==lefts.end()){
lefts[subs]=++numofleft;//就用值与下标相同的来表示NT的值
nonterminals.insert(numofleft);
isepsilon.push_back(false);
}
curleft=lefts[subs];
ptoindex[curleft]=numofproducers;
ptosize[curleft]=0;
i=i+2;//越过:和一个空格
if(realstart<0)realstart=curleft;//我的经典做法
}
while(s[i]==' '||s[i]=='\t'||s[i]=='|')++i;
if(s[i]==';'){
--numofproducers;
return true;
}
//++i;
t->left=curleft;
t->right=vector<int>(0);
if(s[i]=='\0'){
t->right.push_back(EPSILON);
isepsilon[curleft]=true;//这个left可以生成空串
goto myend;
}
while(i<size)
{
string subs;//stl中不知回收处理的如何,先试试,注意这里可能会出错
while(s[i]!=' '&&s[i]!='\0')subs.push_back(s[i++]);
if(subs[0]=='\''){
curright=(int)subs[1];//lex 中处理时也是这样,单个字符直接返回其ASCII值
terminals.insert((int)subs[1]);//termianls在处理token时已经有了一部分值,这里补充而已
}
else if(subs[0]<91&&subs[0]>64)
//tokens
curright=tokenmap[subs];
else{
if(lefts.find(subs)==lefts.end())//it is new
{
lefts[subs]=++numofleft;
nonterminals.insert(numofleft);
isepsilon.push_back(false);
}
curright=lefts[subs];
}
t->right.push_back(curright);
++i;
}
myend:
++ptosize[curleft];
producers.push_back(t);
return true;
}
bool grammer::creatLR()
{
int i,s;
producer* startp=new producer();
startp->left=++numofleft;
startp->right.push_back(realstart);
producers.push_back(startp);
item curitem(numofproducers,0);
curitem.index=producers.size()-1;//正好将开始态的唯一一个item对应的producer放在最后一个
curitem.lookhead.insert(ENDMARK);
set<item,mycompare> mystartset;
mystartset.insert(curitem);
closure(mystartset);
nextStateID=-1;
queue<ItemSet> Q;
Q.push(mystartset);
FAstate* fastate=new FAstate(++nextStateID);
finalstates.push_back(fastate);
stateSet.insert(mystartset);
ItemSet curset;
int curID;
queue<int> stateQ;//心寒阿,我忘记了用个隐射了,只好用两个同步队列,应该在时间效率上低了,但用hash时我担心默认的hashfunction会出错,还是原始的好啊
stateQ.push(nextStateID);
FAstate* temp;//When add path, it is used
while(!Q.empty()){
curset=Q.front();
curID=stateQ.front();
stateQ.pop();
temp=finalstates[curID];
Q.pop();
curset.begin()->stateID=curID;//我实在没办法啊,所以这么干,
stateSet.insert(curset);
finalStateSet.push_back(curset);
statetoset[curID]=curset;
//settostate[curset]=curID;
//cout<<curset.begin()->index<<endl;
hash_set<int>paths;//记录路径的临时变量,用的很BT不,嘿嘿,我的风格
hash_map<int,ItemSet> pathtoset;//采用临时变量的好处就在于我在while的每个循环都要用新的
s=curset.size();
ItemSet::iterator j=curset.begin();
for(;s>0;s--){
item temp=*j;
++j;
if(!temp.end(producers)){
int tpath=temp.getcurpath(producers);
item titem(temp.index,++temp.pp);
titem.lookhead=temp.lookhead;
if(paths.find(tpath)!=paths.end())//tpath不是新的路径,已经有目标集合存在,继续加入就可以
{
pathtoset[tpath].insert(titem);
}
else{
paths.insert(tpath);
ItemSet newitemset;
newitemset.insert(titem);
pathtoset[tpath]=newitemset;
}
}
else continue;
}
//检查生成的一些列itemset
hash_set<int>::iterator i;
for(i=paths.begin();i!=paths.end();++i)
{
closure(pathtoset[*i]);
set<ItemSet>::iterator myitr=stateSet.find(pathtoset[*i]);
if(myitr==stateSet.end()){
//stateSet.insert(pathtoset[*i]);
Q.push(pathtoset[*i]);//待会回来改一下,三次访问时都要下标几次,浪费时间?
FAstate* mystate=new FAstate(++nextStateID);
stateQ.push(nextStateID);
temp->trans[*i]=nextStateID;
//cout<<nextStateID<<endl;
//stateQ.push(nextStateID);
finalstates.push_back(mystate);
}
else //已经有了,加上边即可
{
temp->trans[*i]=myitr->begin()->stateID;
}
}
}
return true;
}
void grammer::closure(ItemSet &curSet)
{
queue<item> Q;
for(ItemSet::iterator i=curSet.begin();i!=curSet.end();++i)
{
if((!(*i).end(producers))&&nonterminals.find((*i).getcurpath(producers))!=nonterminals.end())Q.push(*i);
}
while(!Q.empty())
{
item curit=Q.front();
Q.pop();
hash_set<int> curlook;
findlook(curit,curlook);
int curp=curit.getcurpath(producers);
int num=ptosize[curp];
int index=ptoindex[curp];
for(int i=0;i<num;i++)//把找到的所有的新的p都构造新的item,进行加入操作
{
item newit(i+index,0);
newit.index=i+index;
newit.pp=0;
for(hash_set<int>::iterator j=curlook.begin();j!=curlook.end();j++)
{
newit.lookhead.insert(*j);
}
ItemSet::iterator ii=curSet.find(newit);
if(ii==curSet.end())//it is new
{
curSet.insert(newit);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -