📄 ag_regex.h

📁 正则文法匹配检测器
💻 H
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*! \file AG_RegEx.h
    \brief Regular Expression Parser
	\author Amer Gerzic

	Use it as following:					\n
											\n
	string strPattern;						\n
	int nPos = 0;							\n
											\n
	if(SetRegEx("....."))					\n
	{										\n
		if(FindFirst(...))					\n
		{									\n
			// Do something with result		\n
			while(FindNext(...))			\n
			{								\n
				// Process Result			\n
			}								\n
		}									\n
	}										\n
*/

#pragma once

// for debug STL exceeds 255 chars and MS compiler complains, so disable it
#pragma warning(disable:4786)
#pragma warning(disable:4503)

#include <string>
#include <stack>
#include <map>
#include <deque>
#include <vector>
#include <set>
#include <algorithm>
#include <list>

using namespace std;

class CAG_RegEx  
{
protected:
	//! State Class
	class CAG_State
	{
	protected:
		//! Transitions from this state to other 
		multimap<char, CAG_State*> m_Transition;

		//! State ID
		int m_nStateID;

		//! Set of NFA state from which this state is constructed
		set<CAG_State*> m_NFAStates;

	public:
		//! Default constructor
		CAG_State() : m_nStateID(-1), m_bAcceptingState(FALSE) {};

		//! parameterized constructor
		CAG_State(int nID) : m_nStateID(nID), m_bAcceptingState(FALSE) {};

		//! Constructs new state from the set of other states
		/*! This is necessary for subset construction algorithm
			because there a new DFA state is constructed from 
			one or more NFA states
		*/
		CAG_State(set<CAG_State*> NFAState, int nID)
		{
			m_NFAStates			= NFAState;
			m_nStateID			= nID;
			
			// DFA state is accepting state if it is constructed from 
			// an accepting NFA state
			m_bAcceptingState	= FALSE;
			set<CAG_State*>::iterator iter;
			for(iter=NFAState.begin(); iter!=NFAState.end(); ++iter)
				if((*iter)->m_bAcceptingState)
					m_bAcceptingState = TRUE;
		};

		//! Copy Constructor
		CAG_State(const CAG_State &other)
		{ *this = other; };

		//! Destructor
		virtual ~CAG_State() {};

		//! True if this state is accepting state
		BOOL m_bAcceptingState;

		//! Adds a transition from this state to the other
		void AddTransition(char chInput, CAG_State *pState)
		{
			ASSERT(pState != NULL);
			m_Transition.insert(make_pair(chInput, pState));
		};

		//! Removes all transition that go from this state to pState
		void RemoveTransition(CAG_State* pState)
		{
			multimap<char, CAG_State*>::iterator iter;
			for(iter=m_Transition.begin(); iter!=m_Transition.end();)
			{
				CAG_State *toState = iter->second;
				if(toState == pState)
					m_Transition.erase(iter++);
				else ++iter;
			}
		};

		//! Returns all transitions from this state on specific input
		void GetTransition(char chInput, vector<CAG_State*> &States)
		{
			// clear the array first
			States.clear();

			// Iterate through all values with the key chInput
			multimap<char, CAG_State*>::iterator iter;
			for(iter = m_Transition.lower_bound(chInput);
				iter!= m_Transition.upper_bound(chInput);
				++iter)
				{
					CAG_State *pState = iter->second;
					ASSERT(pState != NULL);
					States.push_back(pState);
				}
		};

		//! Returns the state id in form of string
		CString GetStateID()
		{
			CString strRes;
			if(m_bAcceptingState)
				strRes.Format(_T("{%d}"), m_nStateID);
			else strRes.Format(_T("%d"), m_nStateID);
			return strRes;
		};

		/*! Returns the set of NFA states from 
			which this DFA state was constructed
		*/
		set<CAG_State*>& GetNFAState()
		{ return m_NFAStates; };

		//! Returns true if this state is dead end
		/*! By dead end I mean that this state is not
			accepting state and there are no transitions
			leading away from this state. This function
			is used for reducing the DFA.
		*/
		BOOL IsDeadEnd()
		{
			if(m_bAcceptingState)
				return FALSE;
			if(m_Transition.empty())
				return TRUE;
			
			multimap<char, CAG_State*>::iterator iter;
			for(iter=m_Transition.begin(); iter!=m_Transition.end(); ++iter)
			{
				CAG_State *toState = iter->second;
				if(toState != this)
					return FALSE;
			}

			TRACE("State %d is dead end.\n", m_nStateID); 
			
			return TRUE;
		};	

		//! Override the assignment operator
		CAG_State& operator=(const CAG_State& other)
		{ 
			m_Transition	= other.m_Transition; 
			m_nStateID		= other.m_nStateID;
			m_NFAStates		= other.m_NFAStates;
		};

		//! Override the comparison operator
		BOOL operator==(const CAG_State& other)
		{
			if(m_NFAStates.empty())
				return(m_nStateID == other.m_nStateID);
			else return(m_NFAStates == other.m_NFAStates);
		};
	};

	//! Structure to track the pattern recognition
	class CAG_PatternState
	{
	public:
		//! Default Constructor
		CAG_PatternState() : m_pState(NULL), m_nStartIndex(-1) {};

		//! Copy Constructor
		CAG_PatternState(const CAG_PatternState &other)
		{ *this = other; };

		//! Destructor
		virtual ~CAG_PatternState() {};

		//! Pointer to current state in DFA
		CAG_State* m_pState;

		//! 0-Based index of the starting position
		int m_nStartIndex;

		//! Override the assignment operator
		CAG_PatternState& operator=(const CAG_PatternState& other)
		{
			ASSERT(other.m_pState != NULL);
			m_pState		= other.m_pState;
			m_nStartIndex	= other.m_nStartIndex;
			return *this;
		};
	};

	//! NFA Table
	typedef deque<CAG_State*> FSA_TABLE;

	/*! NFA Table is stored in a deque of CAG_States.
		Each CAG_State object has a multimap of 
		transitions where the key is the input
		character and values are the references to
		states to which it transfers.
	*/
	FSA_TABLE m_NFATable;

	//! DFA table is stores in same format as NFA table
	FSA_TABLE m_DFATable;

	//! Operand Stack
	/*! If we use the Thompson's Algorithm then we realize
		that each operand is a NFA automata on its own!
	*/
	stack<FSA_TABLE> m_OperandStack;

	//! Operator Stack
	/*! Operators are simple characters like "*" & "|" */
	stack<char> m_OperatorStack;

	//! Keeps track of state IDs
	int m_nNextStateID;

	//! Set of input characters
	set<char> m_InputSet;

	//! List of current partially found patterns
	list<CAG_PatternState*> m_PatternList;

	//! Contains the current patterns accessed
	int m_nPatternIndex;

	//! Contains the text to check
	string m_strText;

	//! Contains all found patterns
	vector<string> m_vecPattern;

	//! Contains all position where these pattern start in the text
	vector<int> m_vecPos;

	//! Constructs basic Thompson Construction Element
	/*! Constructs basic NFA for single character and
		pushes it onto the stack.
	*/
	void Push(char chInput);

	//! Pops an element from the operand stack
	/*! The return value is TRUE if an element 
		was poped successfully, otherwise it is
		FALSE (syntax error) 
	*/
	BOOL Pop(FSA_TABLE &NFATable);

	//! Checks is a specific character and operator
	BOOL IsOperator(char ch) { return((ch == 42) || (ch == 124) || (ch == 40) || (ch == 41) || (ch == 8)); };

	//! Returns operator presedence
	/*! Returns TRUE if presedence of opLeft <= opRight.
	
			Kleens Closure	- highest
			Concatenation	- middle
			Union			- lowest
	*/
	BOOL Presedence(char opLeft, char opRight)
	{
		if(opLeft == opRight)
			return TRUE;

		if(opLeft == '*')
			return FALSE;

		if(opRight == '*')
			return TRUE;

		if(opLeft == 8)
			return FALSE;

		if(opRight == 8)
			return TRUE;

		if(opLeft == '|')
			return FALSE;
		
		return TRUE;
	};

	//! Checks if the specific character is input character
	BOOL IsInput(char ch) { return(!IsOperator(ch)); };

	//! Checks is a character left parantheses
	BOOL IsLeftParanthesis(char ch) { return(ch == 40); };

	//! Checks is a character right parantheses
	BOOL IsRightParanthesis(char ch) { return(ch == 41); };

	//! Evaluates the next operator from the operator stack
	BOOL Eval();

	//! Evaluates the concatenation operator
	/*! This function pops two operands from the stack 
		and evaluates the concatenation on them, pushing
		the result back on the stack.
	*/
	BOOL Concat();

	//! Evaluates the Kleen's closure - star operator
	/*! Pops one operator from the stack and evaluates
		the star operator on it. It pushes the result
		on the operand stack again.
	*/
	BOOL Star();

	//! Evaluates the union operator
	/*! Pops 2 operands from the stack and evaluates
		the union operator pushing the result on the
		operand stack.
	*/
	BOOL Union();

	//! Inserts char 8 where the concatenation needs to occur
	/*! The method used to parse regular expression here is 
		similar to method of evaluating the arithmetic expressions.
		A difference here is that each arithmetic expression is
12 下一页
💿 文件大小 9 K
👤 上传用户 sy361
📂 所属分类编译器/解释器
🏷️ 相关标签

#正 #检测器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -