⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dfa.cpp

📁 输入正则表达式
💻 CPP
📖 第 1 页 / 共 2 页
字号:
};
// 构造函数
DFA::DFA()
{
	exp = new char[128];
	post = new char[128];
	edge = new char[128];
    edgeNumber = 0;
	NFAStatesNumber = 0;
	DFAStatesNumber = 0;
	DStatesNumber = 0;
	DtranNumber = 0;
	NFATable = new AdjacentTable();
}
// 析构函数
DFA::~DFA()
{
	delete [] exp;
	delete [] post;
	delete [] edge;
	delete [] AcceptStates;
	NFATable->Clear();
	DFATable->Clear();
}
// 获取输入
void DFA::GetRegExp()
{
	cout << "请输入正规式: " ;
	cin >> exp;
	for (int i=0; exp[i]!='\0'; i++)
	{
		if (!(exp[i]>'A'||exp[i]<'Z'||exp[i]>'a'||exp[i]<'z'
			||exp[i]>'0'||exp[i]<'9'||exp[i]!='|'||exp[i]!='('||exp[i]!=')'||exp[i]!='*'))
		{
			cout << "输入的正规式中含有非法字符!" << endl;
			getch();
			exit(1);
		}
	}
}
// 加入cat-node作为连结点标志
void DFA::InsertCatNode()
{
	int i = 0, j, len = strlen(exp);
	while (exp[i+1] != '\0')
	{
		if (((exp[i] != '(' && exp[i] != '.' && exp[i] != '|') 
			|| exp[i] == ')' 
			|| exp[i] == '*')
			&& (exp[i+1] != ')' && exp[i+1] != '.' && exp[i+1] != '|' && exp[i+1] != '*'))
		{
			for (j=len; j>i+1; j--)
			{
				exp[j] = exp[j-1];
			}
			exp[i+1] = '.';
			len++;
			exp[len] = '\0';
			i++;
		}
		i++;
	}
}
// 定义运算符的优先级
int DFA::Precedence(char symbol)
{
	int priority;
	switch (symbol)
	{
	case '|': priority = 1; break;
	case '.': priority = 2; break;
	case '*': priority = 3; break;
	default:  priority = 0; break;
	}
	return priority;
}
// 将正则式转为逆波兰式
void DFA::RegExpToPost()
{
	int i = 0, j = 0;
	char ch, cl;
	strcpy(post, "\0");
	LStack<char> *ls = new LStack<char>();
	ls->Clear();
	ls->Push('#');
	ch = exp[i];
	while (ch != '\0')
	{
		if (ch == '(')
		{
			ls->Push(ch);
			ch = exp[++i];
		}
		else if (ch == ')')
		{
			while (ls->GetTop() != '(')
			{
				post[j++] = ls->Pop();
			}
            ls->Pop();
			ch = exp[++i];
		}
		else if ((ch == '|') || (ch == '*') || (ch == '.'))
		{
	    	cl = ls->GetTop();
		   	while (Precedence(cl) >= Precedence(ch))
			{
		    	post[j++] = cl;
		    	ls->Pop();
		    	cl = ls->GetTop();
			}
			ls->Push(ch);
			ch = exp[++i];
		}
		else
		{
			post[j++] = ch;
			ch = exp[++i];
		}
	}
	ch = ls->Pop();
	while ((ch == '|') || (ch == '*') || (ch == '.'))
	{
		post[j++] = ch;
		ch = ls->Pop();
	}
	post[j] = '\0';
	ls->Clear();
}
// 扫描逆波兰式中除运算符以外的字符的数目
void DFA::GetEdgeNumber()
{
	int i = 0, j;
	edgeNumber = 0;
	while (post[i] != '\0')
	{
		if (post[i] == '.' || post[i] == '|' || post[i] == '*')
		{
			i++;
			continue;
		}
		for (j=0; j<edgeNumber; j++)
		{
			if (post[i] == edge[j])
			{
				break;
			}
		}
		if (j == edgeNumber)
		{
			edge[edgeNumber] = post[i];
			edgeNumber++;
		}
		i++;
	}
	edge[edgeNumber] = '\0';
}
// 用Thompson构造法构造NFA
void DFA::ThompsonConstruction()
{
	int i, j;
	char ch;
	int s1, s2;
	LStack<int> *states = new LStack<int>();
	states->Clear();
	if (strlen(post) < 1)
	{
		cout << "No Valid Regular Expression Found!" << endl;
		getch();
		exit(1);
	}
	NFATable->SetValue(0, 0);
	i = 1;
	j = 0;
	ch = post[j];
    while (ch != '\0')
	{
		if (ch == '.')
		{
			s2 = states->Pop();
			int temp1 = states->Pop();
			int temp2 = states->Pop();
			s1 = states->Pop();
			NFATable->InsertEdgeByValue(temp2, temp1, '~');
			states->Push(s1);
			states->Push(s2);
		}
		else if (ch == '|')
		{
			s2 = states->Pop();
			int temp1 = states->Pop();
			int temp2 = states->Pop();
			s1 = states->Pop();
			NFATable->InsertVertex(i);
			NFATable->InsertVertex(i+1);
			NFATable->InsertEdgeByValue(i, s1, '~');
			NFATable->InsertEdgeByValue(i, temp1, '~');
			NFATable->InsertEdgeByValue(temp2, i+1, '~');
			NFATable->InsertEdgeByValue(s2, i+1, '~');
			s1 = i;
			s2 = i+1;
			states->Push(s1);
			states->Push(s2);
			i += 2;
		}
		else if (ch == '*')
		{
			s2 = states->Pop();
			s1 = states->Pop();
			NFATable->InsertVertex(i);
			NFATable->InsertVertex(i+1);
			NFATable->InsertEdgeByValue(i, i+1, '~');
			NFATable->InsertEdgeByValue(s2, s1, '~');
			NFATable->InsertEdgeByValue(i, s1, '~');
			NFATable->InsertEdgeByValue(s2, i+1, '~');
			s1 = i;
			s2 = i+1;
			states->Push(s1);
			states->Push(s2);
			i += 2;
		}
		else
		{
			NFATable->InsertVertex(i);
			NFATable->InsertVertex(i+1);
			NFATable->InsertEdgeByValue(i, i+1, ch);
			s1 = i;
			s2 = i+1;
			states->Push(s1);
			states->Push(s2);
			i += 2;
		}
		j++;
		ch = post[j];
	}
	s2 = states->Pop();
	s1 = states->Pop();
	NFATable->InsertEdgeByValue(0, s1, '~');
	if (! states->IsEmpty())
	{
		cout << "Some error in your input string!" << endl;
		getch();
		exit(1);
	}
	NFAStatesNumber = s2 + 1;
}
// 比较两个一维数组是否含有完全相同的元素
int DFA::CompArray(int *t1, int *t2)
{
	int i = 0, j = 0, len1, len2;
	while (t1[i] != -1)
	{
		i++;
	}
	len1 = i;
	while (t2[j] != -1)
	{
		j++;
	}
	len2 = j;
	if (len1 != len2)
	{
		return 0;
	}
	for (i=0; i<len1; i++)
	{
		for (j=0; j<len2; j++)
		{
			if (t1[i] == t2[j])
			{
				break;
			}
		}
		if (j == len2)
		{
			return 0;
		}
	}
	return 1;
}
// 最小化Dtran表
int DFA::MinimizeDFAStates(int **Dtran, int *AcceptStates, int DtranNumber, int edgeNumber)
{
	int h, i, j, k, l;
	for (i=0; i<DtranNumber-1; i++)
	{
		for (j=i+1; j<DtranNumber; j++)
		{
			if (AcceptStates[i] == AcceptStates[j])
			{
				for (k=0; k<edgeNumber; k++)
				{
					if (Dtran[i][k] != Dtran[j][k])
					{
						break;
					}
				}
				if (k == edgeNumber)
				{
					for (l=j; l<DtranNumber-1; l++)
					{
						for (k=0; k<edgeNumber; k++)
						{
							Dtran[l][k] = Dtran[l+1][k];
						}
						AcceptStates[l] = AcceptStates[l+1];
					}
					for (l=0; l<DtranNumber-1; l++)
					{
						for (k=0; k<edgeNumber; k++)
						{
							if (Dtran[l][k] == j)
							{
								Dtran[l][k] = i;
							}
						}
					}
					for (h=j; h<DtranNumber; h++)
					{
						for (l=0; l<DtranNumber-1; l++)
						{
							for (k=0; k<edgeNumber; k++)
							{
								if (Dtran[l][k] == h+1)
								{
									Dtran[l][k] = h;
								}
							}
						}
					}
					DtranNumber--;
					j--;
				}
			}
		}
	}
	return DtranNumber;
}
// 用子集构造法构造DFA
void DFA::SubsetConstruction()
{
	int i, j, k;
	DStatesNumber = 0;
	DtranNumber = 0;
	
	DStates = (int**)(new int*[NFAStatesNumber+1]);
	for (i=0; i<NFAStatesNumber+1; i++)
	{
		DStates[i] = new int[NFAStatesNumber+1];
	}
	Dtran = (int**)(new int*[NFAStatesNumber+1]);
	for (i=0; i<NFAStatesNumber+1; i++)
	{
		Dtran[i] = new int[edgeNumber+1];
	}
	for (i=0; i<NFAStatesNumber+1; i++)
	{
		for (j=0; j<edgeNumber+1; j++)
		{
			Dtran[i][j] = -1;
		}
	}
	AcceptStates = new int[NFAStatesNumber+1];
	for (i=0; i<NFAStatesNumber+1; i++)
	{
		AcceptStates[i] = 0;
	}
	// 调用闭包函数和移动函数构造DStates和Dtran两个表及AcceptStates数组
    int *T = new int[NFAStatesNumber+1];
	int *temp = new int[NFAStatesNumber+1];
	T[0] = 0;
	T[1] = -1;
	T = NFATable->Closure(T);
	DStates[DStatesNumber] = T;
	DStatesNumber++;
	k = 0;
	while (k < DStatesNumber)
	{
		for (i=0; edge[i]!='\0'; i++)
		{
			temp = NFATable->Closure(NFATable->Move(T, edge[i]));
			if (temp[0] != -1)
			{
				for (j=0; j<DStatesNumber; j++)
				{
					if (CompArray(temp, DStates[j]) == 1)
					{
						Dtran[k][i] = j;
						break;
					}
				}
				if (j == DStatesNumber)
				{
					DStates[DStatesNumber] = temp;
					Dtran[k][i] = DStatesNumber;
					DStatesNumber++;
				}
			}
		}
		k++;
		T = DStates[k];
	}
	DtranNumber = k;
	for (i=0; i<DStatesNumber; i++)
	{
		for (j=0; DStates[i][j]!= -1; j++)
		{
			if (DStates[i][j] == NFAStatesNumber - 1)
			{
				AcceptStates[i] = 1;
				break;
			}
		}
	}
    // 反复检测Dtran表中是否能化简,能则继续化简
	int DtranNumberAfterMinimization = MinimizeDFAStates(Dtran, AcceptStates, DtranNumber, edgeNumber);
	while (DtranNumberAfterMinimization != DtranNumber)
	{
		DtranNumber = DtranNumberAfterMinimization;
		DtranNumberAfterMinimization = MinimizeDFAStates(Dtran, AcceptStates, DtranNumber, edgeNumber);
	}
	// 将Dtran表的内容拷贝到DFA表
	DFATable = new TransitionTable(DtranNumber, edgeNumber);
	for (i=0; i<DtranNumber; i++)
	{
		for (j=0; j<edgeNumber; j++)
		{
			DFATable->SetValue(i, j, Dtran[i][j]);
		}
	}
	// 析构DStates和Dtran表以及AcceptStates数组
	for (i=0; i<NFAStatesNumber+1; i++)
	{
		delete [] DStates[i];
		delete [] Dtran[i];
	}
	delete [] DStates;
	delete [] Dtran;
}
// 从字符串中删除第一个字符
void DFA::RemoveFirstSymbol(char *buf, int &len)
{
	for (int i=0; i<len-1; i++)
	{
		buf[i] = buf[i+1];
	}
	len--;
	buf[len] = '\0';
}
// 从ASCII文件中找出所有的匹配式
void DFA::FindMatchingPatternInFile()
{
	
	char filePath[128];
	cout <<endl<< "请输入要查找的文件路径和文件名: ";
	cin >> filePath;
	fstream infile;
	infile.open(filePath, ios::in|ios::nocreate);
	if (! infile)
	{
		cout << "\n打开文件" << filePath << "失败!" << endl;
		getch();
		exit(1);
	}
	// 开始验证正则表达式的匹配串
	cout<<endl<<"查找结果如下:"<<endl;

	int ln = 1, col = 0, bufHead = 0;
	int state = 0;
	int count = 0;
	char buf[1024],buf2[1024];
	int curr = 0;
	int curr2=0;
	int cur=0;
	int len = 0;
	char ch;
	while (infile.get(ch))
	{
		if ((ch == '\n') || (ch == ' '))
		{	
			while (curr < len)
			{
				state = 0;
				while ((state != -1) && (curr < len))
				{
					state = DFATable->Transit(state, buf[curr], edge);
				
			    	if (AcceptStates[state] == 1)
					{
						
						if(curr>curr2)
						{
							curr2=curr;
							for(int i=0;i<=curr;i++)
								buf2[i]=buf[i];
						}
				    	
					}
					curr++;
					
				}
				if(curr2>0&&curr2>=cur)
				{
				cout << "\n行 " << ln << ", 列 " << bufHead <<": ";
				for(int wang=0;wang<curr2+1;wang++)
				{
					cout<<buf2[wang];
				}
				count++;
				cur=curr2;
				curr2=0;
				}
			
				RemoveFirstSymbol(buf, len);
				bufHead++;
			    curr = 0;
				
			}
			if (ch == '\n')
			{
				ln++;
				for(int i=0;i<cur;i++)
				{
			        buf[i]=' ';
		        	buf2[i]=' ';
				}
				len=curr=cur=curr2=col=0;
			}
			if (ch == ' ')
			{
				col++;
				for(int i=0;i<cur;i++)
				{
			        buf[i]=' ';
		        	buf2[i]=' ';
				}
				len=curr=curr2=cur=0;
			}
		}
		else
		{
			col++;
			if (len == 0)
			{
				bufHead = col;
			}
			buf[len++] = ch;
			if (len >= 1024)
			{
				cout << "读取字符串的长度超过最大限额!" << endl;
				getch();
				exit(1);
			}
		}
	}

	if (count > 0)
	{
		cout << "\n\n一共找到" << count << "个匹配的子串"<<endl;
	}
	else
	{
		cout<< "\n\n没有找到任何匹配的子串"<<endl;
	}
	infile.close();
	
	getch();
}

/////////////////////////////////////////////////

// 主函数
void main()
{
	DFA dfa;
	dfa.GetRegExp();
	dfa.InsertCatNode();
	dfa.RegExpToPost();
	dfa.GetEdgeNumber();
	dfa.ThompsonConstruction();
	dfa.SubsetConstruction();
	dfa.FindMatchingPatternInFile();
}

/////////////////////////////////////////////////

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -