⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dfascan.cpp

📁 正则式转化有穷自动机
💻 CPP
📖 第 1 页 / 共 2 页
字号:
DFA::~DFA()
{
	delete [] exp;
	delete [] post;
	delete [] edge;
	delete [] AcceptStates;
	NFATable->Clear();
	DFATable->Clear();
}
// 获取输入
void DFA::GetRegExp()
{
	cout << "在下面输入正则表达式" << endl;
	cin >> exp;
	for (int i=0; exp[i]!='\0'; i++)
	{
		if (exp[i] == '~')
		{
			cout << "\n字符'~'已经被程序禁用!" << endl;
			getch();
			exit(1);
		}
	}
	cout << "\n------------------------" << endl;
}
// 加入cat-node作为连结点标志
void DFA::InsertCatNode()
{
	int i = 0, j, len = strlen(exp);
	while (exp[i+1] != '\0')
	{
		if (((exp[i] != '(' && exp[i] != '.' && exp[i] != '|') 
			|| exp[i] == ')' 
			|| exp[i] == '*')
			&& (exp[i+1] != ')' && exp[i+1] != '.' && exp[i+1] != '|' && exp[i+1] != '*'))
		{
			for (j=len; j>i+1; j--)
			{
				exp[j] = exp[j-1];
			}
			exp[i+1] = '.';
			len++;
			exp[len] = '\0';
			i++;
		}
		i++;
	}
	cout << "\n第一步: 加入连结点\n"
		 << exp << "\n"
		 << "字符串长度: " << len 
		 << "\n\n------------------------" << endl;
}
// 定义运算符的优先级
int DFA::Precedence(char symbol)
{
	int priority;
	switch (symbol)
	{
	case '|': priority = 1; break;
	case '.': priority = 2; break;
	case '*': priority = 3; break;
	default:  priority = 0; break;
	}
	return priority;
}
// 将正则式转为逆波兰式
void DFA::RegExpToPost()
{
	int i = 0, j = 0;
	char ch, cl;
	strcpy(post, "\0");
	LStack<char> *ls = new LStack<char>();
	ls->Clear();
	ls->Push('#');
	ch = exp[i];
	while (ch != '\0')
	{
		if (ch == '(')
		{
			ls->Push(ch);
			ch = exp[++i];
		}
		else if (ch == ')')
		{
			while (ls->GetTop() != '(')
			{
				post[j++] = ls->Pop();
			}
            ls->Pop();
			ch = exp[++i];
		}
		else if ((ch == '|') || (ch == '*') || (ch == '.'))
		{
	    	cl = ls->GetTop();
		   	while (Precedence(cl) >= Precedence(ch))
			{
		    	post[j++] = cl;
		    	ls->Pop();
		    	cl = ls->GetTop();
			}
			ls->Push(ch);
			ch = exp[++i];
		}
		else
		{
			post[j++] = ch;
			ch = exp[++i];
		}
	}
	ch = ls->Pop();
	while ((ch == '|') || (ch == '*') || (ch == '.'))
	{
		post[j++] = ch;
		ch = ls->Pop();
	}
	post[j] = '\0';
	ls->Clear();
	cout << "\n第二步: 转为后缀式\n"
		 << post << "\n"
		 << "字符串长度: " << strlen(post) 
		 << "\n\n------------------------" << endl;
}
// 扫描逆波兰式中除运算符以外的字符的数目
void DFA::GetEdgeNumber()
{
	int i = 0, j;
	edgeNumber = 0;
	while (post[i] != '\0')
	{
		if (post[i] == '.' || post[i] == '|' || post[i] == '*')
		{
			i++;
			continue;
		}
		for (j=0; j<edgeNumber; j++)
		{
			if (post[i] == edge[j])
			{
				break;
			}
		}
		if (j == edgeNumber)
		{
			edge[edgeNumber] = post[i];
			edgeNumber++;
		}
		i++;
	}
	edge[edgeNumber] = '\0';
	cout << "\n第三步: 获取字符集\n";
	for (i=0; i<edgeNumber; i++)
	{
		cout << edge[i] << ' ';
	}
	cout << "\n字符个数: " << edgeNumber 
		 << "\n\n------------------------" << endl;
}
// 用Thompson构造法构造NFA
void DFA::ThompsonConstruction()
{
	int i, j;
	char ch;
	int s1, s2;
	LStack<int> *states = new LStack<int>();
	states->Clear();
	if (strlen(post) < 1)
	{
		cout << "No Valid Regular Expression Found!" << endl;
		getch();
		exit(1);
	}
	NFATable->SetValue(0, 0);
	i = 1;
	j = 0;
	ch = post[j];
    while (ch != '\0')
	{
		if (ch == '.')
		{
			s2 = states->Pop();
			int temp1 = states->Pop();
			int temp2 = states->Pop();
			s1 = states->Pop();
			NFATable->InsertEdgeByValue(temp2, temp1, '~');
			states->Push(s1);
			states->Push(s2);
		}
		else if (ch == '|')
		{
			s2 = states->Pop();
			int temp1 = states->Pop();
			int temp2 = states->Pop();
			s1 = states->Pop();
			NFATable->InsertVertex(i);
			NFATable->InsertVertex(i+1);
			NFATable->InsertEdgeByValue(i, s1, '~');
			NFATable->InsertEdgeByValue(i, temp1, '~');
			NFATable->InsertEdgeByValue(temp2, i+1, '~');
			NFATable->InsertEdgeByValue(s2, i+1, '~');
			s1 = i;
			s2 = i+1;
			states->Push(s1);
			states->Push(s2);
			i += 2;
		}
		else if (ch == '*')
		{
			s2 = states->Pop();
			s1 = states->Pop();
			NFATable->InsertVertex(i);
			NFATable->InsertVertex(i+1);
			NFATable->InsertEdgeByValue(i, i+1, '~');
			NFATable->InsertEdgeByValue(s2, s1, '~');
			NFATable->InsertEdgeByValue(i, s1, '~');
			NFATable->InsertEdgeByValue(s2, i+1, '~');
			s1 = i;
			s2 = i+1;
			states->Push(s1);
			states->Push(s2);
			i += 2;
		}
		else
		{
			NFATable->InsertVertex(i);
			NFATable->InsertVertex(i+1);
			NFATable->InsertEdgeByValue(i, i+1, ch);
			s1 = i;
			s2 = i+1;
			states->Push(s1);
			states->Push(s2);
			i += 2;
		}
		j++;
		ch = post[j];
	}
	s2 = states->Pop();
	s1 = states->Pop();
	NFATable->InsertEdgeByValue(0, s1, '~');
	if (! states->IsEmpty())
	{
		cout << "Some error in your input string!" << endl;
		getch();
		exit(1);
	}
	NFAStatesNumber = s2 + 1;
}
// 比较两个一维数组是否含有完全相同的元素
int DFA::CompArray(int *t1, int *t2)
{
	int i = 0, j = 0, len1, len2;
	while (t1[i] != -1)
	{
		i++;
	}
	len1 = i;
	while (t2[j] != -1)
	{
		j++;
	}
	len2 = j;
	if (len1 != len2)
	{
		return 0;
	}
	for (i=0; i<len1; i++)
	{
		for (j=0; j<len2; j++)
		{
			if (t1[i] == t2[j])
			{
				break;
			}
		}
		if (j == len2)
		{
			return 0;
		}
	}
	return 1;
}
// 最小化Dtran表
int DFA::MinimizeDFAStates(int **Dtran, int *AcceptStates, int DtranNumber, int edgeNumber)
{
	int h, i, j, k, l;
	for (i=0; i<DtranNumber-1; i++)
	{
		for (j=i+1; j<DtranNumber; j++)
		{
			if (AcceptStates[i] == AcceptStates[j])
			{
				for (k=0; k<edgeNumber; k++)
				{
					if (Dtran[i][k] != Dtran[j][k])
					{
						break;
					}
				}
				if (k == edgeNumber)
				{
					for (l=j; l<DtranNumber-1; l++)
					{
						for (k=0; k<edgeNumber; k++)
						{
							Dtran[l][k] = Dtran[l+1][k];
						}
						AcceptStates[l] = AcceptStates[l+1];
					}
					for (l=0; l<DtranNumber-1; l++)
					{
						for (k=0; k<edgeNumber; k++)
						{
							if (Dtran[l][k] == j)
							{
								Dtran[l][k] = i;
							}
						}
					}
					for (h=j; h<DtranNumber; h++)
					{
						for (l=0; l<DtranNumber-1; l++)
						{
							for (k=0; k<edgeNumber; k++)
							{
								if (Dtran[l][k] == h+1)
								{
									Dtran[l][k] = h;
								}
							}
						}
					}
					DtranNumber--;
					j--;
				}
			}
		}
	}
	return DtranNumber;
}
// 用子集构造法构造DFA
void DFA::SubsetConstruction()
{
	int i, j, k;
	DStatesNumber = 0;
	DtranNumber = 0;
	// 输出NFA状态表
	cout << "\n第四步: 构造NFA\n\n";
    NFATable->OutputNFA();
	cout << "\n------------------------" << endl;
	// 初始化DStates和Dtran两个构造表及AcceptStates数组
	DStates = (int**)(new int*[NFAStatesNumber+1]);
	for (i=0; i<NFAStatesNumber+1; i++)
	{
		DStates[i] = new int[NFAStatesNumber+1];
	}
	Dtran = (int**)(new int*[NFAStatesNumber+1]);
	for (i=0; i<NFAStatesNumber+1; i++)
	{
		Dtran[i] = new int[edgeNumber+1];
	}
	for (i=0; i<NFAStatesNumber+1; i++)
	{
		for (j=0; j<edgeNumber+1; j++)
		{
			Dtran[i][j] = -1;
		}
	}
	AcceptStates = new int[NFAStatesNumber+1];
	for (i=0; i<NFAStatesNumber+1; i++)
	{
		AcceptStates[i] = 0;
	}
	// 调用闭包函数和移动函数构造DStates和Dtran两个表及AcceptStates数组
    int *T = new int[NFAStatesNumber+1];
	int *temp = new int[NFAStatesNumber+1];
	T[0] = 0;
	T[1] = -1;
	T = NFATable->Closure(T);
	DStates[DStatesNumber] = T;
	DStatesNumber++;
	k = 0;
	while (k < DStatesNumber)
	{
		for (i=0; edge[i]!='\0'; i++)
		{
			temp = NFATable->Closure(NFATable->Move(T, edge[i]));
			if (temp[0] != -1)
			{
				for (j=0; j<DStatesNumber; j++)
				{
					if (CompArray(temp, DStates[j]) == 1)
					{
						Dtran[k][i] = j;
						break;
					}
				}
				if (j == DStatesNumber)
				{
					DStates[DStatesNumber] = temp;
					Dtran[k][i] = DStatesNumber;
					DStatesNumber++;
				}
			}
		}
		k++;
		T = DStates[k];
	}
	DtranNumber = k;
	for (i=0; i<DStatesNumber; i++)
	{
		for (j=0; DStates[i][j]!= -1; j++)
		{
			if (DStates[i][j] == NFAStatesNumber - 1)
			{
				AcceptStates[i] = 1;
				break;
			}
		}
	}
	// 输出DStates表项
	cout << "\n第五步: 构造DStates表\n\n";
	for (i=0; i<DStatesNumber; i++)
	{
		cout << "项目" << i <<":  ";
		j = 0;
		while (DStates[i][j] != -1)
		{
			cout << DStates[i][j] << " ";
			j++;
		}
		cout << endl;
	}
	cout << "\n------------------------" << endl;
	// 输出Dtran表项
	cout << "\n第六步: 构造Dtran表\n\n状态 ";
	for (j=0; j<edgeNumber; j++)
	{
		cout << "    " << edge[j];
	}
	cout << "    是否接收" << endl;
	for (i=0; i<DtranNumber; i++)
	{
		if (i < 10)  cout << "   " << i << " ";
		else if (i < 100)  cout << "  " << i << " ";
		else if (i < 1000)  cout << " " << i << " ";
		else  cout << i << " ";
		for (j=0; j<edgeNumber; j++)
		{
			if (Dtran[i][j] < 0)  cout << "     ";
			else if (Dtran[i][j] < 10)  cout << "    " << Dtran[i][j];
			else if (Dtran[i][j] < 100)  cout << "   " << Dtran[i][j];
			else if (Dtran[i][j] < 1000)  cout << "  " << Dtran[i][j];
			else  cout << " " << Dtran[i][j];
		}
		if (AcceptStates[i] == 1)
		{
			cout << "    Acc";
		}
		cout << endl;
	}
	cout << "\n------------------------" << endl;
	// 反复检测Dtran表中是否能化简,能则继续化简
	int DtranNumberAfterMinimization = MinimizeDFAStates(Dtran, AcceptStates, DtranNumber, edgeNumber);
	while (DtranNumberAfterMinimization != DtranNumber)
	{
		DtranNumber = DtranNumberAfterMinimization;
		DtranNumberAfterMinimization = MinimizeDFAStates(Dtran, AcceptStates, DtranNumber, edgeNumber);
	}
	// 将Dtran表的内容拷贝到DFA表
	DFATable = new TransitionTable(DtranNumber, edgeNumber);
	for (i=0; i<DtranNumber; i++)
	{
		for (j=0; j<edgeNumber; j++)
		{
			DFATable->SetValue(i, j, Dtran[i][j]);
		}
	}
	// 输出DFA状态表
	cout << "\n第七步: 构造最小DFA\n\n状态 ";
	for (j=0; j<edgeNumber; j++)
	{
		cout << "    " << edge[j];
	}
	cout << "    是否接收" << endl;
	for (i=0; i<DtranNumber; i++)
	{
		if (i < 10)  cout << "   " << i << " ";
		else if (i < 100)  cout << "  " << i << " ";
		else if (i < 1000)  cout << " " << i << " ";
		else  cout << i << " ";
		for (j=0; j<edgeNumber; j++)
		{
			if (DFATable->GetValue(i, j) < 0)  cout << "     ";
			else if (DFATable->GetValue(i, j) < 10)  cout << "    " << DFATable->GetValue(i, j);
			else if (DFATable->GetValue(i, j) < 100)  cout << "   " << DFATable->GetValue(i, j);
			else if (DFATable->GetValue(i, j) < 1000)  cout << "  " << DFATable->GetValue(i, j);
			else  cout << " " << DFATable->GetValue(i, j);
		}
		if (AcceptStates[i] == 1)
		{
			cout << "    Acc";
		}
		cout << endl;
	}
	cout << "\n------------------------" << endl;
	// 析构DStates和Dtran表以及AcceptStates数组
	for (i=0; i<NFAStatesNumber+1; i++)
	{
		delete [] DStates[i];
		delete [] Dtran[i];
	}
	delete [] DStates;
	delete [] Dtran;
}
// 从字符串中删除第一个字符
void DFA::RemoveFirstSymbol(char *buf, int &len)
{
	for (int i=0; i<len-1; i++)
	{
		buf[i] = buf[i+1];
	}
	len--;
	buf[len] = '\0';
}
// 从ASCII文件中找出所有的匹配式
void DFA::FindMatchingPatternInFile()
{
	cout << "\n第八步: 查找匹配字符串\n\n";
	// 打开一个只读文件和一个写文件
	char filePath[128];
	cout << "请输入要打开的文件" << endl;
	cin >> filePath;
	fstream infile;
	infile.open(filePath, ios::in|ios::nocreate);
	if (! infile)
	{
		cout << "\n打开文件" << filePath << "失败!" << endl;
		getch();
		exit(1);
	}
	cout << "请输入保存记录的文件" << endl;
	cin >> filePath;
	fstream outfile;
	outfile.open(filePath, ios::out|ios::trunc);
	if (! outfile)
	{
		cout << "\n建立文件" << filePath << "失败!" << endl;
		getch();
		exit(1);
	}
	// 开始验证正则表达式的匹配串
	int ln = 1, col = 0, bufHead = 0;
	int state = 0;
	int count = 0;
	char buf[256];
	int curr = 0;
	int len = 0;
	char ch;
	while (infile.get(ch))
	{
		if ((ch == '\n') || (ch == ' '))
		{	
			while (curr < len)
			{
				state = 0;
				while ((state != -1) && (curr < len))
				{
					state = DFATable->Transit(state, buf[curr], edge);
			    	if (AcceptStates[state] == 1)
					{
						outfile << "\nLn " << ln << ", Col " << bufHead <<": ";
						outfile.write(buf, curr+1);
				    	count++;
					}
					curr++;
				}
				RemoveFirstSymbol(buf, len);
				bufHead++;
				curr = 0;
			}
			if (ch == '\n')
			{
				ln++;
				col = 0;
			}
			if (ch == ' ')
			{
				col++;
			}
		}
		else
		{
			col++;
			if (len == 0)
			{
				bufHead = col;
			}
			buf[len++] = ch;
			if (len >= 256)
			{
				cout << "读取字符串的长度超过最大限额!" << endl;
				getch();
				exit(1);
			}
		}
	}
	while (curr < len)
	{
		state = 0;
	  	curr = 0;
		while ((state != -1) && (curr < len))
		{
			state = DFATable->Transit(state, buf[curr], edge);
		   	if (AcceptStates[state] == 1)
			{
				outfile << "\nLn " << ln << ", Col " << bufHead <<": ";
				outfile.write(buf, curr+1);
			   	count++;
			}
			curr++;
		}
		RemoveFirstSymbol(buf, len);
		bufHead++;
	}
	if (count > 0)
	{
		outfile << "\n\n一共找到" << count << "匹配字符串";
	}
	else
	{
		outfile << "\n\n没有找到任何匹配字符串";
	}
	infile.close();
	outfile.close();
	cout << "\n查找结果已保存在" << filePath << "中\n"
	     << "\n------------------------" << endl;
}

/////////////////////////////////////////////////

// 主函数
void main()
{
	DFA dfa;
	dfa.GetRegExp();
	dfa.InsertCatNode();
	dfa.RegExpToPost();
	dfa.GetEdgeNumber();
	dfa.ThompsonConstruction();
	dfa.SubsetConstruction();
	dfa.FindMatchingPatternInFile();
}

/////////////////////////////////////////////////

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -