📄 dfascan.cpp
字号:
DFA::~DFA()
{
delete [] exp;
delete [] post;
delete [] edge;
delete [] AcceptStates;
NFATable->Clear();
DFATable->Clear();
}
// 获取输入
void DFA::GetRegExp()
{
cout << "在下面输入正则表达式" << endl;
cin >> exp;
for (int i=0; exp[i]!='\0'; i++)
{
if (exp[i] == '~')
{
cout << "\n字符'~'已经被程序禁用!" << endl;
getch();
exit(1);
}
}
cout << "\n------------------------" << endl;
}
// 加入cat-node作为连结点标志
void DFA::InsertCatNode()
{
int i = 0, j, len = strlen(exp);
while (exp[i+1] != '\0')
{
if (((exp[i] != '(' && exp[i] != '.' && exp[i] != '|')
|| exp[i] == ')'
|| exp[i] == '*')
&& (exp[i+1] != ')' && exp[i+1] != '.' && exp[i+1] != '|' && exp[i+1] != '*'))
{
for (j=len; j>i+1; j--)
{
exp[j] = exp[j-1];
}
exp[i+1] = '.';
len++;
exp[len] = '\0';
i++;
}
i++;
}
cout << "\n第一步: 加入连结点\n"
<< exp << "\n"
<< "字符串长度: " << len
<< "\n\n------------------------" << endl;
}
// 定义运算符的优先级
int DFA::Precedence(char symbol)
{
int priority;
switch (symbol)
{
case '|': priority = 1; break;
case '.': priority = 2; break;
case '*': priority = 3; break;
default: priority = 0; break;
}
return priority;
}
// 将正则式转为逆波兰式
void DFA::RegExpToPost()
{
int i = 0, j = 0;
char ch, cl;
strcpy(post, "\0");
LStack<char> *ls = new LStack<char>();
ls->Clear();
ls->Push('#');
ch = exp[i];
while (ch != '\0')
{
if (ch == '(')
{
ls->Push(ch);
ch = exp[++i];
}
else if (ch == ')')
{
while (ls->GetTop() != '(')
{
post[j++] = ls->Pop();
}
ls->Pop();
ch = exp[++i];
}
else if ((ch == '|') || (ch == '*') || (ch == '.'))
{
cl = ls->GetTop();
while (Precedence(cl) >= Precedence(ch))
{
post[j++] = cl;
ls->Pop();
cl = ls->GetTop();
}
ls->Push(ch);
ch = exp[++i];
}
else
{
post[j++] = ch;
ch = exp[++i];
}
}
ch = ls->Pop();
while ((ch == '|') || (ch == '*') || (ch == '.'))
{
post[j++] = ch;
ch = ls->Pop();
}
post[j] = '\0';
ls->Clear();
cout << "\n第二步: 转为后缀式\n"
<< post << "\n"
<< "字符串长度: " << strlen(post)
<< "\n\n------------------------" << endl;
}
// 扫描逆波兰式中除运算符以外的字符的数目
void DFA::GetEdgeNumber()
{
int i = 0, j;
edgeNumber = 0;
while (post[i] != '\0')
{
if (post[i] == '.' || post[i] == '|' || post[i] == '*')
{
i++;
continue;
}
for (j=0; j<edgeNumber; j++)
{
if (post[i] == edge[j])
{
break;
}
}
if (j == edgeNumber)
{
edge[edgeNumber] = post[i];
edgeNumber++;
}
i++;
}
edge[edgeNumber] = '\0';
cout << "\n第三步: 获取字符集\n";
for (i=0; i<edgeNumber; i++)
{
cout << edge[i] << ' ';
}
cout << "\n字符个数: " << edgeNumber
<< "\n\n------------------------" << endl;
}
// 用Thompson构造法构造NFA
void DFA::ThompsonConstruction()
{
int i, j;
char ch;
int s1, s2;
LStack<int> *states = new LStack<int>();
states->Clear();
if (strlen(post) < 1)
{
cout << "No Valid Regular Expression Found!" << endl;
getch();
exit(1);
}
NFATable->SetValue(0, 0);
i = 1;
j = 0;
ch = post[j];
while (ch != '\0')
{
if (ch == '.')
{
s2 = states->Pop();
int temp1 = states->Pop();
int temp2 = states->Pop();
s1 = states->Pop();
NFATable->InsertEdgeByValue(temp2, temp1, '~');
states->Push(s1);
states->Push(s2);
}
else if (ch == '|')
{
s2 = states->Pop();
int temp1 = states->Pop();
int temp2 = states->Pop();
s1 = states->Pop();
NFATable->InsertVertex(i);
NFATable->InsertVertex(i+1);
NFATable->InsertEdgeByValue(i, s1, '~');
NFATable->InsertEdgeByValue(i, temp1, '~');
NFATable->InsertEdgeByValue(temp2, i+1, '~');
NFATable->InsertEdgeByValue(s2, i+1, '~');
s1 = i;
s2 = i+1;
states->Push(s1);
states->Push(s2);
i += 2;
}
else if (ch == '*')
{
s2 = states->Pop();
s1 = states->Pop();
NFATable->InsertVertex(i);
NFATable->InsertVertex(i+1);
NFATable->InsertEdgeByValue(i, i+1, '~');
NFATable->InsertEdgeByValue(s2, s1, '~');
NFATable->InsertEdgeByValue(i, s1, '~');
NFATable->InsertEdgeByValue(s2, i+1, '~');
s1 = i;
s2 = i+1;
states->Push(s1);
states->Push(s2);
i += 2;
}
else
{
NFATable->InsertVertex(i);
NFATable->InsertVertex(i+1);
NFATable->InsertEdgeByValue(i, i+1, ch);
s1 = i;
s2 = i+1;
states->Push(s1);
states->Push(s2);
i += 2;
}
j++;
ch = post[j];
}
s2 = states->Pop();
s1 = states->Pop();
NFATable->InsertEdgeByValue(0, s1, '~');
if (! states->IsEmpty())
{
cout << "Some error in your input string!" << endl;
getch();
exit(1);
}
NFAStatesNumber = s2 + 1;
}
// 比较两个一维数组是否含有完全相同的元素
int DFA::CompArray(int *t1, int *t2)
{
int i = 0, j = 0, len1, len2;
while (t1[i] != -1)
{
i++;
}
len1 = i;
while (t2[j] != -1)
{
j++;
}
len2 = j;
if (len1 != len2)
{
return 0;
}
for (i=0; i<len1; i++)
{
for (j=0; j<len2; j++)
{
if (t1[i] == t2[j])
{
break;
}
}
if (j == len2)
{
return 0;
}
}
return 1;
}
// 最小化Dtran表
int DFA::MinimizeDFAStates(int **Dtran, int *AcceptStates, int DtranNumber, int edgeNumber)
{
int h, i, j, k, l;
for (i=0; i<DtranNumber-1; i++)
{
for (j=i+1; j<DtranNumber; j++)
{
if (AcceptStates[i] == AcceptStates[j])
{
for (k=0; k<edgeNumber; k++)
{
if (Dtran[i][k] != Dtran[j][k])
{
break;
}
}
if (k == edgeNumber)
{
for (l=j; l<DtranNumber-1; l++)
{
for (k=0; k<edgeNumber; k++)
{
Dtran[l][k] = Dtran[l+1][k];
}
AcceptStates[l] = AcceptStates[l+1];
}
for (l=0; l<DtranNumber-1; l++)
{
for (k=0; k<edgeNumber; k++)
{
if (Dtran[l][k] == j)
{
Dtran[l][k] = i;
}
}
}
for (h=j; h<DtranNumber; h++)
{
for (l=0; l<DtranNumber-1; l++)
{
for (k=0; k<edgeNumber; k++)
{
if (Dtran[l][k] == h+1)
{
Dtran[l][k] = h;
}
}
}
}
DtranNumber--;
j--;
}
}
}
}
return DtranNumber;
}
// 用子集构造法构造DFA
void DFA::SubsetConstruction()
{
int i, j, k;
DStatesNumber = 0;
DtranNumber = 0;
// 输出NFA状态表
cout << "\n第四步: 构造NFA\n\n";
NFATable->OutputNFA();
cout << "\n------------------------" << endl;
// 初始化DStates和Dtran两个构造表及AcceptStates数组
DStates = (int**)(new int*[NFAStatesNumber+1]);
for (i=0; i<NFAStatesNumber+1; i++)
{
DStates[i] = new int[NFAStatesNumber+1];
}
Dtran = (int**)(new int*[NFAStatesNumber+1]);
for (i=0; i<NFAStatesNumber+1; i++)
{
Dtran[i] = new int[edgeNumber+1];
}
for (i=0; i<NFAStatesNumber+1; i++)
{
for (j=0; j<edgeNumber+1; j++)
{
Dtran[i][j] = -1;
}
}
AcceptStates = new int[NFAStatesNumber+1];
for (i=0; i<NFAStatesNumber+1; i++)
{
AcceptStates[i] = 0;
}
// 调用闭包函数和移动函数构造DStates和Dtran两个表及AcceptStates数组
int *T = new int[NFAStatesNumber+1];
int *temp = new int[NFAStatesNumber+1];
T[0] = 0;
T[1] = -1;
T = NFATable->Closure(T);
DStates[DStatesNumber] = T;
DStatesNumber++;
k = 0;
while (k < DStatesNumber)
{
for (i=0; edge[i]!='\0'; i++)
{
temp = NFATable->Closure(NFATable->Move(T, edge[i]));
if (temp[0] != -1)
{
for (j=0; j<DStatesNumber; j++)
{
if (CompArray(temp, DStates[j]) == 1)
{
Dtran[k][i] = j;
break;
}
}
if (j == DStatesNumber)
{
DStates[DStatesNumber] = temp;
Dtran[k][i] = DStatesNumber;
DStatesNumber++;
}
}
}
k++;
T = DStates[k];
}
DtranNumber = k;
for (i=0; i<DStatesNumber; i++)
{
for (j=0; DStates[i][j]!= -1; j++)
{
if (DStates[i][j] == NFAStatesNumber - 1)
{
AcceptStates[i] = 1;
break;
}
}
}
// 输出DStates表项
cout << "\n第五步: 构造DStates表\n\n";
for (i=0; i<DStatesNumber; i++)
{
cout << "项目" << i <<": ";
j = 0;
while (DStates[i][j] != -1)
{
cout << DStates[i][j] << " ";
j++;
}
cout << endl;
}
cout << "\n------------------------" << endl;
// 输出Dtran表项
cout << "\n第六步: 构造Dtran表\n\n状态 ";
for (j=0; j<edgeNumber; j++)
{
cout << " " << edge[j];
}
cout << " 是否接收" << endl;
for (i=0; i<DtranNumber; i++)
{
if (i < 10) cout << " " << i << " ";
else if (i < 100) cout << " " << i << " ";
else if (i < 1000) cout << " " << i << " ";
else cout << i << " ";
for (j=0; j<edgeNumber; j++)
{
if (Dtran[i][j] < 0) cout << " ";
else if (Dtran[i][j] < 10) cout << " " << Dtran[i][j];
else if (Dtran[i][j] < 100) cout << " " << Dtran[i][j];
else if (Dtran[i][j] < 1000) cout << " " << Dtran[i][j];
else cout << " " << Dtran[i][j];
}
if (AcceptStates[i] == 1)
{
cout << " Acc";
}
cout << endl;
}
cout << "\n------------------------" << endl;
// 反复检测Dtran表中是否能化简,能则继续化简
int DtranNumberAfterMinimization = MinimizeDFAStates(Dtran, AcceptStates, DtranNumber, edgeNumber);
while (DtranNumberAfterMinimization != DtranNumber)
{
DtranNumber = DtranNumberAfterMinimization;
DtranNumberAfterMinimization = MinimizeDFAStates(Dtran, AcceptStates, DtranNumber, edgeNumber);
}
// 将Dtran表的内容拷贝到DFA表
DFATable = new TransitionTable(DtranNumber, edgeNumber);
for (i=0; i<DtranNumber; i++)
{
for (j=0; j<edgeNumber; j++)
{
DFATable->SetValue(i, j, Dtran[i][j]);
}
}
// 输出DFA状态表
cout << "\n第七步: 构造最小DFA\n\n状态 ";
for (j=0; j<edgeNumber; j++)
{
cout << " " << edge[j];
}
cout << " 是否接收" << endl;
for (i=0; i<DtranNumber; i++)
{
if (i < 10) cout << " " << i << " ";
else if (i < 100) cout << " " << i << " ";
else if (i < 1000) cout << " " << i << " ";
else cout << i << " ";
for (j=0; j<edgeNumber; j++)
{
if (DFATable->GetValue(i, j) < 0) cout << " ";
else if (DFATable->GetValue(i, j) < 10) cout << " " << DFATable->GetValue(i, j);
else if (DFATable->GetValue(i, j) < 100) cout << " " << DFATable->GetValue(i, j);
else if (DFATable->GetValue(i, j) < 1000) cout << " " << DFATable->GetValue(i, j);
else cout << " " << DFATable->GetValue(i, j);
}
if (AcceptStates[i] == 1)
{
cout << " Acc";
}
cout << endl;
}
cout << "\n------------------------" << endl;
// 析构DStates和Dtran表以及AcceptStates数组
for (i=0; i<NFAStatesNumber+1; i++)
{
delete [] DStates[i];
delete [] Dtran[i];
}
delete [] DStates;
delete [] Dtran;
}
// 从字符串中删除第一个字符
void DFA::RemoveFirstSymbol(char *buf, int &len)
{
for (int i=0; i<len-1; i++)
{
buf[i] = buf[i+1];
}
len--;
buf[len] = '\0';
}
// 从ASCII文件中找出所有的匹配式
void DFA::FindMatchingPatternInFile()
{
cout << "\n第八步: 查找匹配字符串\n\n";
// 打开一个只读文件和一个写文件
char filePath[128];
cout << "请输入要打开的文件" << endl;
cin >> filePath;
fstream infile;
infile.open(filePath, ios::in|ios::nocreate);
if (! infile)
{
cout << "\n打开文件" << filePath << "失败!" << endl;
getch();
exit(1);
}
cout << "请输入保存记录的文件" << endl;
cin >> filePath;
fstream outfile;
outfile.open(filePath, ios::out|ios::trunc);
if (! outfile)
{
cout << "\n建立文件" << filePath << "失败!" << endl;
getch();
exit(1);
}
// 开始验证正则表达式的匹配串
int ln = 1, col = 0, bufHead = 0;
int state = 0;
int count = 0;
char buf[256];
int curr = 0;
int len = 0;
char ch;
while (infile.get(ch))
{
if ((ch == '\n') || (ch == ' '))
{
while (curr < len)
{
state = 0;
while ((state != -1) && (curr < len))
{
state = DFATable->Transit(state, buf[curr], edge);
if (AcceptStates[state] == 1)
{
outfile << "\nLn " << ln << ", Col " << bufHead <<": ";
outfile.write(buf, curr+1);
count++;
}
curr++;
}
RemoveFirstSymbol(buf, len);
bufHead++;
curr = 0;
}
if (ch == '\n')
{
ln++;
col = 0;
}
if (ch == ' ')
{
col++;
}
}
else
{
col++;
if (len == 0)
{
bufHead = col;
}
buf[len++] = ch;
if (len >= 256)
{
cout << "读取字符串的长度超过最大限额!" << endl;
getch();
exit(1);
}
}
}
while (curr < len)
{
state = 0;
curr = 0;
while ((state != -1) && (curr < len))
{
state = DFATable->Transit(state, buf[curr], edge);
if (AcceptStates[state] == 1)
{
outfile << "\nLn " << ln << ", Col " << bufHead <<": ";
outfile.write(buf, curr+1);
count++;
}
curr++;
}
RemoveFirstSymbol(buf, len);
bufHead++;
}
if (count > 0)
{
outfile << "\n\n一共找到" << count << "匹配字符串";
}
else
{
outfile << "\n\n没有找到任何匹配字符串";
}
infile.close();
outfile.close();
cout << "\n查找结果已保存在" << filePath << "中\n"
<< "\n------------------------" << endl;
}
/////////////////////////////////////////////////
// 主函数
void main()
{
DFA dfa;
dfa.GetRegExp();
dfa.InsertCatNode();
dfa.RegExpToPost();
dfa.GetEdgeNumber();
dfa.ThompsonConstruction();
dfa.SubsetConstruction();
dfa.FindMatchingPatternInFile();
}
/////////////////////////////////////////////////
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -