⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rexalgorithm.cpp

📁 Compiles a regular expression into a fast automaton 编译正规表达式成机器代码(源码)(11KB)
💻 CPP
📖 第 1 页 / 共 4 页
字号:
    vector< REXA_CharSet >    unionSets(256);
    REXA_CharSetSet           resultSet;
    REXA_CharSetSet::const_iterator it;
    for(it= rCharSets.begin();it!=rCharSets.end();++it){
        charSets[it->count()].insert(*it);
    }

//cumulative union
    for(int k= 1;k<256;k++){
        for(int j=0;j<k;j++){
            for(it= charSets[j].begin();it!= charSets[j].end();++it){
                unionSets[k]|= *it;
            }
        }
    }

//reduce charset into disjoint charsets
    for(int l=0;l<256;l++){
        for(it= charSets[l].begin();it!= charSets[l].end();++it){
            REXA_CharSet reducedSet= *it;
            REXA_CharSet unionSet= ~unionSets[l];
            reducedSet&= unionSet;
            resultSet.insert(reducedSet);
        }
    }

    return resultSet;
}
//-----------------------------------------------------------------------------
void REXA_TranslatingTable::CollectCharSets(const REXA_NDFAState* pCurState,
                     set<int>& rVisitedStates,
                     REXA_CharSetSet& rCharSets)
//-----------------------------------------------------------------------------
{
    if( rVisitedStates.find(pCurState->m_id) == rVisitedStates.end() ){
        rVisitedStates.insert(pCurState->m_id);
        for(int i=0;i<pCurState->m_transitions.size();i++){
            if( pCurState->m_transitions[i].m_transChars.count()!=0 ){
                rCharSets.insert(pCurState->m_transitions[i].m_transChars);
                CollectCharSets(
					pCurState->m_transitions[i].m_nextState,
					rVisitedStates,rCharSets);
            }
        }
    }
}



//# Generates a translation table from the NDFA-states present in 'vecpStates'
//example: NDFA having the States S,A,E.  S('a') -> A. S('a') -> E. A('b') -> A. A('b') -> E.
//TranslationTable: 
//  REXA_StateSet    'a'     'b'
//--------------------------
//    {S}      {A,E}    
//   {A,E}             {A,E}
// corresponding DFA: {S}('a') -> {A,E}. {A,E}('b') -> {A,E}
//-----------------------------------------------------------------------------
void REXA_TranslatingTable::GenerateTranslationTable(set<const REXA_NDFAState*>& rSetState)
//-----------------------------------------------------------------------------
{
    set< REXA_StateSet > setPendingStateSets;
	REXA_StateSet startStateSet;

	startStateSet.insert(m_pStartState);
    setPendingStateSets.insert(startStateSet);

    while( setPendingStateSets.size()>0 ){
        REXA_StateSet curStateSet= *setPendingStateSets.begin();
        setPendingStateSets.erase(curStateSet);
        if( m_mapTransitions.find(curStateSet)==m_mapTransitions.end() ){
            TransitionVector transVector(m_vecCharsets.size());
			for(int i=0;i<m_vecCharsets.size();i++){
				REXA_StateSet::const_iterator it;
				for(it= curStateSet.begin();it!=curStateSet.end();++it){
					for(int j=0;j<(*it)->m_transitions.size();j++){	
						const  REXA_NDFATransition& rTransition= (*it)->m_transitions[j];
						REXA_CharSet charSet= rTransition.m_transChars;
						charSet&= m_vecCharsets[i];
						if( charSet.any() ){
							transVector[i].insert(rTransition.m_nextState);
						}
					}
				}
			}
			m_mapTransitions.insert(make_pair(curStateSet,transVector));
			for(int l=0;l<transVector.size();l++){
				setPendingStateSets.insert(transVector[l]);
			}
        }
    }
}

static REXA_DFAState* GetIllegalState()
{
    static REXA_DFAState illegalState(REXA_DFAState::eNoAnswer,true);
    return &illegalState;
}
static REXA_DFAState* GetEosState()
{
    bool bFirstTime= true;
    static REXA_DFAState eosState(REXA_DFAState::eEos,true);
    if( bFirstTime ){
        bFirstTime= false;
        for(int i=0;i<sizeof(eosState.aNext)/sizeof(eosState.aNext[0]);i++){
            eosState.aNext[i]= &eosState;
        }
    }
    return &eosState;
}

REXA_DFAState* REXA_DFAState::ms_pIllegalState= ::GetIllegalState();
REXA_DFAState* REXA_DFAState::ms_pEosState= ::GetEosState();

//-------------------------------------------------------------------------------
REXA_DFAState::REXA_DFAState(int nAnwserId,bool bIsFinal)
		:m_nAnwserId(nAnwserId),
         m_bIsFinal(bIsFinal),
         m_bIsAccepting(false)	
//-------------------------------------------------------------------------------
{
    for(int i=0;i<256;i++)	aNext[i]=0;
}
//-------------------------------------------------------------------------------
void    REXA_DFAState::CollectStates(REXA_DFAState* pState,set<REXA_DFAState*>& rsetStates)
//-------------------------------------------------------------------------------
{
    if( !pState ) return;

    pair<set<REXA_DFAState*>::iterator,bool> itBool=    rsetStates.insert(pState);
    if( itBool.second ){
        for(int i=0;i<256;i++){
            CollectStates((*itBool.first)->aNext[i],rsetStates);
        }
    }
}

//-----------------------------------------------------------------------------
void    REXA_DFAState::DeleteAll()
//-----------------------------------------------------------------------------
{
    set<REXA_DFAState*> setStates;
    CollectStates(this,setStates);
    set<REXA_DFAState*>::iterator it;
    for(it= setStates.begin();it!=setStates.end();++it){
        if( *it!=ms_pIllegalState && *it!=ms_pEosState ){
            delete *it;
        }
    }
}



//-----------------------------------------------------------------------------
vector<REXA_CharSet> REXA_NDFAToTable::CharsetToVector(const REXA_CharSetSet& rCharSetSet)
//-----------------------------------------------------------------------------
{
	vector<REXA_CharSet> vecCharSet;
	REXA_CharSetSet::const_iterator it;
	for(it= rCharSetSet.begin();it!=rCharSetSet.end();++it){
		vecCharSet.push_back(*it);
	}
	return vecCharSet;
}
//-----------------------------------------------------------------------------
REXA_TranslatingTable::REXA_TranslatingTable    (const REXA_NDFAState* pStartState)
//-----------------------------------------------------------------------------
{
    set<int>                setVisitedStates;
    REXA_CharSetSet              charSets; 
	set<const REXA_NDFAState*>   setStates;
	
	REXA_NDFAState::CollectStates(pStartState,setStates);

	CollectCharSets(pStartState,setVisitedStates,charSets);
	m_vecCharsets=	REXA_NDFAToTable::CharsetToVector(
								ComputeDisjointCharSets(charSets));
    m_pStartState=	pStartState;
	GenerateTranslationTable(setStates);
	ComputeOccCounts();
}


//-----------------------------------------------------------------------------
REXA_NDFAToDfa::REXA_NDFAToDfa	(const REXA_TranslatingTable& rTable,char cEos)
						:m_rTable(rTable),m_cEos((unsigned char)cEos)
//-----------------------------------------------------------------------------
{
	
}
//-----------------------------------------------------------------------------
REXA_DFAState* REXA_NDFAToDfa::CreateDfaStates(
					const REXA_StateSet&				    rStateSet,
					map<REXA_StateSet,REXA_DFAState*>&	rMapStateSetDFAState)
//-----------------------------------------------------------------------------
{  
    pair<map<REXA_StateSet,REXA_DFAState*>::iterator,bool> pairItBool=
			rMapStateSetDFAState.insert(make_pair(rStateSet,(REXA_DFAState*)0));
	if( pairItBool.second ){
		REXA_DFAState* pDFAState= new REXA_DFAState();
		pairItBool.first->second= pDFAState;
		REXA_TranslatingTable::TranslationMap::const_iterator it=
			m_rTable.m_mapTransitions.find(rStateSet);
		if( it!= m_rTable.m_mapTransitions.end() ){
			const TransitionVector& rvecTrans= it->second;
			for(int i=0;i<rvecTrans.size();i++){
				if( rvecTrans[i].size()>0 ){
					REXA_DFAState* pTransState= 
						CreateDfaStates(rvecTrans[i],rMapStateSetDFAState);
					for(int j=0;j<256;j++){
                        if( m_rTable.m_vecCharsets[i][j] ){
                            pDFAState->aNext[j]= pTransState;
                        }
					}
				}
			}
			if( rStateSet.IsAccepting() ){
				int nAnswerCode= m_rTable.GetAnswerCode(rStateSet);
                pDFAState->m_bIsAccepting= true;
                pDFAState->m_nAnwserId= nAnswerCode;
				REXA_DFAState* pAcceptState= 
					new REXA_DFAState(nAnswerCode,true);
                pAcceptState->m_bIsAccepting= true;
				for(int j=0;j<256;j++){
					if( !pDFAState->aNext[j] ){
						pDFAState->aNext[j]= pAcceptState;
					}
				}
			}else{
				for(int j=0;j<256;j++){
					if( !pDFAState->aNext[j] ){
                        pDFAState->aNext[j]= REXA_DFAState::ms_pIllegalState;
					}
				}
                pDFAState->aNext[m_cEos]= REXA_DFAState::ms_pEosState;
			}
		}
        return pDFAState;
    }
	return  pairItBool.first->second;
}
//-----------------------------------------------------------------------------
REXA_DFAState* REXA_NDFAToDfa::TableToDfaStates()
//-----------------------------------------------------------------------------
{ 
	REXA_StateSet startStateSet;
	startStateSet.insert(m_rTable.m_pStartState);

	map<REXA_StateSet,REXA_DFAState*> mapStateSetDFAState;

    REXA_TranslatingTable::TranslationMap::const_iterator it;
	it= m_rTable.m_mapTransitions.find(startStateSet);
	if( it!=m_rTable.m_mapTransitions.end() ){
		return  CreateDfaStates(
					it->first,
					mapStateSetDFAState);
		
    }
    return 0;
}

//-----------------------------------------------------------------------------
REXA_Parser::REXA_Parser     (char cEos)
//-----------------------------------------------------------------------------
{
    m_pParserImp= new REXA_ParserImpl(cEos);

}
//-----------------------------------------------------------------------------
REXA_Parser::~REXA_Parser    ()
//-----------------------------------------------------------------------------
{
    delete m_pParserImp;
}
//-----------------------------------------------------------------------------
REXA_DFAState*      REXA_Parser::ParseRegExp     (string strRegExp)
//-----------------------------------------------------------------------------
{
    const REXA_NDFAState*    pNdfa=  m_pParserImp->ParseRegExp(strRegExp);
	REXA_TranslatingTable    table(pNdfa);
	REXA_NDFAToDfa           ndfaToDfa(table,m_pParserImp->m_cEos);
    return                   ndfaToDfa.TableToDfaStates();
}
//-----------------------------------------------------------------------------
void                REXA_Parser::AddRegDefinition (   string strDefName,
                                                string strDefExpression,
                                                int nAnswer)
//-----------------------------------------------------------------------------
{
    string str= strDefName+ "=" + strDefExpression;
    m_pParserImp->ParseRegExpDefinition(str,nAnswer);
}
//-----------------------------------------------------------------------------
void                REXA_Parser::RemoveRegDefinition (   string strDefName)
//-----------------------------------------------------------------------------
{
    m_pParserImp->m_mapNamedStates.erase(strDefName);
}
//-----------------------------------------------------------------------------
void                REXA_Parser::GetRegDefinitions   (vector<string>& vecDefinitions)
//-----------------------------------------------------------------------------
{
    map<string,REXA_NDFAState*>::iterator it;
    for(it= m_pParserImp->m_mapNamedStates.begin();
        it!=m_pParserImp->m_mapNamedStates.end();
        ++it){
        vecDefinitions.push_back(it->first);
    }
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -