📄 lexanalysis.cpp
字号:
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <vector>
#include <queue>
#include <stack>
#include <algorithm>
#include <memory>
using namespace std;
//**************常量声明***********
// 类终极符结构体
struct Term {
string face;
vector<char> factT;
};
// Token 类型
struct Token {
int tokenKind;// Token 类型
string tokenKindName;// Token 类型名称
vector<pair<int,string> > turnTo;// 用来存放可能转向的状态集
bool isAcceptable;//是否可接受
};
// token名称记录
map<int,string> tokenName;
// NFA->DFA的的中间表示方式
struct DFA_State {
// 即sourceState通过terminal->desState;
vector<int> sourceState;
int terminal;
vector<int> desState;
};
// 重新编号DFA
struct DFANewState {
int source;//起始状态
int destination;//到达状态
int terminal;//通过的终极符
};
// 存放所有状态
vector<Token> allState;
//用来存放保留字的变量
map <string,int> reverseconst;
// 状态数量
int stateNum;
// 终极符数量
int terminalNum;
//假设最大能有的状态总数
const int STATENUMMAX = 400;
//假设最大能有的终极符个数
const int SIGNALMAX = 50;
//NFA状态转换图
vector<int> NFA[STATENUMMAX][SIGNALMAX];// NFA表
vector<int> NFAClosed[STATENUMMAX];//各个状态的*闭包
//正则文法的输入
vector<pair<string,int> > inputTokenKind;
vector<vector<string> > input;
//类终级符号
vector<Term> falseTerminal;
map<string,int> fterminal;
// 状态集合和终极字符集合,用对称形式,便于查找
map<string,int> state;
map<int,string> state2;
map<string,int> terminal;
map<int,string> terminal2;
vector<int> inner;
int PointToTerminal[256];
//输入和输出文件变量
ifstream in("C:\\学习\\作业\\Lex\\IN.txt");
ofstream out("C:\\学习\\作业\\Lex\\OUT.txt");
ifstream in1("C:\\学习\\作业\\Lex\\TEST.txt");
ofstream out1("C:\\学习\\作业\\Lex\\TOKEN.txt");
// 以下一些变量用以NFA->DFA中的使用
// 用来存放待转换的状态
queue<vector<int> > MulState;
//NFA->DFA后的状态
vector<vector<int> > StateForDFA;
vector<DFA_State> StateForDFA1;
// 重新编号后的DFA状态
vector<DFANewState> NewDFA;
map <vector<int>,int> IsAlreadyHave;
map<int,vector<int> > OpoHave;
// 标记DFA状态的情况
struct dfaAccepted {
bool IsAccepted;
string Name;
};
// 标记DFA是否为终态,p[i] = true,表示是终态,否则不是.
vector<dfaAccepted> DFAIsAccepted;
// 未最小化前的DFA状态总数
int DFAStateNum;
// 最小化DFA用到的结构
vector<vector<int> > allDFAState;
// DFAChange[i] 是 i+1到到达的状态
map<int,vector<int> > DFAChange;
int DFA[STATENUMMAX][SIGNALMAX];// DFA表
//************初始化***********************
// 初始化关键字表
int InitReverse() {
reverseconst["main"] = 1;
reverseconst["int"] = 2;
reverseconst["long"] = 3;
reverseconst["double"] = 4;
reverseconst["float"] = 5;
reverseconst["char"] = 6;
reverseconst["if"] = 7;
reverseconst["else"] = 8;
reverseconst["for"] = 9;
reverseconst["while"] = 10;
reverseconst["do"] = 11;
reverseconst["return"] = 12;
reverseconst["void"] = 13;
reverseconst["break"] = 14;
reverseconst["continue"] = 15;
reverseconst["goto"] = 16;
return 0;
}
int InitDFA() {
// 初始化DFA表,刚开始都有没联系,所以就均为0.
for(int i = 0; i< STATENUMMAX; ++i) {
for(int j = 0; j< SIGNALMAX; ++j) {
DFA[i][j] = 0;// 表示从i到j没有路径
}
}
return 0;
}
//********************函数声明*******************
// 一些函数的声明
void print(vector<int> );
void remarkDFA();
void sortDFA();
bool MyCmp(DFANewState a,DFANewState b);
void FillDFATable();
void Prase(string ,int);
//初始化常量和一些变量
void InitString()
{
InitReverse();//初始化保留字
InitDFA();//初始化DFA
}
//记录输入正则文法,以用来判断输入的文法是否正确
void InputGrammar()
{
string s1,s2,s3;
int Line = 0;
vector <string> temp;
string str; // Token种类
while(in >> str) {
if(str == "#") break;
inputTokenKind.push_back(make_pair(str,Line));
while(in >> s1 >> s2 >> s3) {
if(s1 == "%" && s2 == "%" && s3 == "%") break;
temp.push_back(s1);
temp.push_back(s2);
temp.push_back(s3);
input.push_back(temp);
temp.clear();
Line++;
}
inputTokenKind.push_back(make_pair(str,Line));
}
while(in >> s1 >> s2) {
Term temp;
temp.face = s1;
char T;
while(in >> T) {
if(T == '#') break;
temp.factT.push_back(T);
}
falseTerminal.push_back(temp);
fterminal[s1] = (int)fterminal.size();
}
}
// 搜索所有状态,并标记,存放在state里.
void GetState()
{
out << "\n标记所有状态:\n\n";
int t = 0;
int m = 0;
for(int i = 0; i< (int)input.size(); ++i) {
if(i>=inputTokenKind[m].second) m++;
if(state.find(input[i][0]) == state.end()) {
state[input[i][0]] = t;
tokenName[t] = inputTokenKind[m].first; //设置对应状态的tokenName
state2[t++] = input[i][0];
}
if(state.find(input[i][2]) == state.end()) {
state[input[i][2]] = t;
tokenName[t] = inputTokenKind[m].first; //设置对应状态的tokenName
state2[t++] = input[i][2];
}
}
stateNum = (int)state.size();
for(map<string,int>::iterator it = state.begin(); it!=state.end(); ++it) {
out << it->first << "\t" << it->second << endl;
}
return ;
}
// 获取终极符号
void GetTerminal()
{
out << "\n终极字符:\n";
int t = 0;
for(int i = 0; i< (int)input.size(); ++i) {
if(terminal.find(input[i][1]) == terminal.end()) {
terminal[input[i][1]] = t;
terminal2[t++] = input[i][1];
inner.push_back(-1);
}
}
terminalNum = (int)terminal.size();
for(int i = 0; i< (int)falseTerminal.size(); ++i) {
inner[terminal[falseTerminal[i].face]] = i;
}
for(map<string,int>::iterator it = terminal.begin(); it!=terminal.end(); ++it) {
out << it->first << '\t' << it->second << endl;
}
for(int i = 0; i< 256; ++i) PointToTerminal[i] = -1;
for(map<string,int>::iterator it = terminal.begin(); it!=terminal.end(); ++it) {
if(fterminal.find(it->first) == fterminal.end()) {
char c = (it->first)[0];
PointToTerminal[c] = it->second;
}
else {
int t = 0;
while(falseTerminal[t].face != it->first) t++;
for(int j = 0; j< (int)falseTerminal[t].factT.size(); ++j) {
PointToTerminal[falseTerminal[t].factT[j]] = it->second;
}
}
}
}
// 从正则文法转到NFA
int ChangeToNFA()
{
Token temp;
// 初始所有状态
for(int i = 0; i< stateNum; ++i) {
allState.push_back(temp);
allState[i].isAcceptable = false;
allState[i].tokenKind = i;
allState[i].tokenKindName = tokenName[i];
}
map<string,int> have;
// 记录状态
for(int i = 0; i<(int)input.size(); ++i) {
int m = state[input[i][0]];
int n = terminal[input[i][1]];
have[input[i][0]] = i+1;
NFA[m][n].push_back(state[input[i][2]]);
}
for(int i = 0; i<(int)input.size(); ++i) {
if(have.find(input[i][2]) == have.end()) allState[state[input[i][2]]].isAcceptable = true;
}
return 0;
}
//显示NFA表
void ShowNFA()
{
out << "\nNFA状态表: \n\n";
out <<"标号\t"<<"状态\t"<<"类型名\t"<<"是否接受\n";
for(int i = 0; i< (int)allState.size(); ++i) {
out << i <<"\t" << state2[i] << "\t" << allState[i].tokenKindName << "\t";
if(allState[i].isAcceptable == true) out << 1 << '\t';
else out << 0 << "\t";
for(int j = 0; j< (int)allState[i].turnTo.size(); ++j) {
out << allState[i].turnTo[j].first << "," << allState[i].turnTo[j].second << "\t";
}
out << endl;
}
}
// 求所有NFA状态的@闭包
void GetClosure() {
int T = terminal["@"];
queue<int> S;
for(int i = 0; i< stateNum; ++i) {
vector<int> temp;
NFAClosed[i].push_back(i);
if((int)NFA[i][T].size() == 0) {
continue;
}
bool *tag = new bool[stateNum];
memset(tag,false,sizeof(tag));
for(int j = 0; j< (int)NFA[i][T].size(); ++j) {
S.push(NFA[i][T][j]);
tag[NFA[i][T][j]] = true;
NFAClosed[i].push_back(NFA[i][T][j]);
}
while(!S.empty()) {
int t = S.front();
S.pop();
for(int j = 0; j< (int)NFA[t][T].size(); ++j) {
if(tag[NFA[t][T][j]] == true) continue;
S.push(NFA[t][T][j]);
tag[NFA[t][T][j]] = true;
NFAClosed[i].push_back(NFA[t][T][j]);
}
}
sort(NFAClosed[i].begin(),NFAClosed[i].end());
}
cout << "test begin\n";
for(int i = 0; i< stateNum; ++i) {
cout << i << '\t';
for(int j = 0; j< (int)NFAClosed[i].size(); ++j) {
cout << NFAClosed[i][j] << '\t';
}
cout << endl;
}
}
// 获取开始状态
int GetDFAStartState()
{
for(int i = 0; i< stateNum; ++i) {
if(state2[i] == "$") return i;// 因为文法中规定每个类型的开始符号均为$
}
return -1;
}
// 从NFA转到DFA
void NFAToDFA()
{
cout << "NFA->DFA begin:\n";
int start = GetDFAStartState();
vector<int> temp;
temp.push_back(start);
MulState.push(temp);
StateForDFA.push_back(temp);
DFA_State dfaState;
dfaState.sourceState.clear();
dfaState.terminal = -1;
dfaState.desState = temp;
StateForDFA1.push_back(dfaState);
// stateOrder新生成的状态数
int stateOrder = 1;
IsAlreadyHave[temp] = stateOrder;
OpoHave[stateOrder++] = temp;
int currentState = 0;
vector<int> product;
while(!MulState.empty()) {
temp = MulState.front();
// 排序后比较两个状态是否相同就比较简单
sort(temp.begin(),temp.end());
int *tag = new int[stateNum+1];
for(int k = 0; k<= stateNum; ++k) tag[k] = 0;
for(int j = 0; j< terminalNum; ++j) {
if(terminal2[j] == "@") continue;
product.clear();
for(int k = 0; k<= stateNum; ++k) tag[k] = 0;
for(int i = 0; i< (int)temp.size(); ++i) {
int t = temp[i];
if(NFA[t][j].size() == 0) continue;
for(int k = 0; k< (int)NFA[t][j].size(); ++k) {
int ts = NFA[t][j][k];
for(int h = 0; h< (int)NFAClosed[ts].size(); ++h) {
if(tag[NFAClosed[ts][h]] == 1) continue;
product.push_back(NFAClosed[ts][h]);
tag[NFAClosed[ts][h]] = 1;
}
}
}
if(product.size() == 0) continue;
sort(product.begin(),product.end());
// 若该状态未出现过,则加入
if(IsAlreadyHave.find(product)==IsAlreadyHave.end()) {
dfaState.terminal = j;
dfaState.sourceState = temp;
dfaState.desState = product;
StateForDFA1.push_back(dfaState);
StateForDFA.push_back(product);
MulState.push(product);
IsAlreadyHave[product] = stateOrder;
OpoHave[stateOrder++] = product;
}
else {
dfaState.terminal = j;
dfaState.sourceState = temp;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -