⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rexalgorithm.cpp

📁 关于使用正则进行词法分析的程序
💻 CPP
📖 第 1 页 / 共 4 页
字号:
                rpFrom+= 2;
            }
            return c;
        default:    c= *rpFrom++; return c;
        }
    }
    return *rpFrom++;
}
//------------------------------------------------------------------------------
void    REXA_Scanner::AddRange(
                           unsigned char cFirst, 
                           unsigned char cLast, 
                           REXA_CharSet& rCharSet)
//------------------------------------------------------------------------------
{
    if( cFirst>cLast ){
        swap(cFirst,cLast);
    }
    for(unsigned char c= cFirst; c<cLast;c++){
        rCharSet[c]= true;
    }
    rCharSet[cLast]= true;
}
//------------------------------------------------------------------------------
REXA_ESymbols REXA_Scanner::operator()	()
//------------------------------------------------------------------------------
{
	m_pTokBeg= m_pTokEnd= SkipWhiteSpace();
    switch(*m_pTokEnd){
    case '\0':  return m_eLastSymbol=eEof;
    case '+':   m_pTokEnd++; return m_eLastSymbol=ePlus;
    case '?':   m_pTokEnd++; return m_eLastSymbol=eOpt;
    case '*':   m_pTokEnd++; return m_eLastSymbol=eStar;
    case '(':   m_pTokEnd++; return m_eLastSymbol=eLParen;
    case ')':   m_pTokEnd++; return m_eLastSymbol=eRParen;
	case '|':   m_pTokEnd++; return m_eLastSymbol=eAlternative;
    case '[': {
                m_pTokEnd++;
                REXA_CharSet cset;
                int c;
                bool bIsEscape;
				bool bIsFirstTime= true;
                bool bPrevWasRangeSeparator= false;
				bool bUseComplement= false;
                int  cPrev;
				cPrev=GetChar(m_pTokEnd,bIsEscape);
				if( cPrev=='^' && !bIsEscape ){
					bUseComplement= true;
                    cPrev=GetChar(m_pTokEnd,bIsEscape);
				}
				while(	cPrev!=-1	&&	!(cPrev==']' && !bIsEscape) ){
					c=GetChar(m_pTokEnd,bIsEscape);
					if( !bIsEscape && c=='-'  ){
						c=GetChar(m_pTokEnd,bIsEscape);
                        if(c==-1){
                            break;
                        }
						AddRange(
							(unsigned char)cPrev,(unsigned char)c,cset);
						cPrev= GetChar(m_pTokEnd,bIsEscape);
					}else{
                        cset[(unsigned char)cPrev]= true;
                        cPrev= c;
					}
				}
                if( cPrev==-1  || c==-1 ){
                    throw REXA_Exception(
                                m_pTokBeg,m_pcszToParse,"closing ] missing");
                }
                m_charSet= cset;
				if(bUseComplement){
					m_charSet.flip();
				}
                return m_eLastSymbol=eCharSet;
              }
    case '.':{
                m_pTokEnd++;
                m_charSet= REXA_CharSet();
                for(int i=0;i<256;i++){
                    m_charSet[i]= true;
                }
                return m_eLastSymbol=eCharSet;
             }
    case '$':{
                if(  isalpha(m_pTokEnd[1])||m_pTokEnd[1]=='_' ){
                    m_pTokEnd++;
                    m_strLiteral="";
                    while(  isalnum(*m_pTokEnd) ||  *m_pTokEnd=='_' ){
                        m_strLiteral+= *m_pTokEnd++;
                    }
                    return m_eLastSymbol=eIdent;
                 }
             }
    default:
        {
            if( *m_pTokEnd=='\\' && m_pTokEnd[1]=='i' ){
                m_pTokEnd++; 
                m_pTokEnd++;
                return m_eLastSymbol=eIgnoreCase;
            }else{
                bool bIsEscape;
                int c=GetChar(m_pTokEnd,bIsEscape);
                m_charSet= REXA_CharSet();
                m_charSet[(unsigned char)c]= true;
                return m_eLastSymbol=eCharSet;
            }
        }
    }
}

///////////////////////////////////////////////////////////////////////////////
// grammar related functions
///////////////////////////////////////////////////////////////////////////////
//-----------------------------------------------------------------------------
REXA_ParserImpl::~REXA_ParserImpl()
//-----------------------------------------------------------------------------
{
    for(int i=0;i<m_vecAllStates.size();++i){
        delete m_vecAllStates[i];
    }
}
//-----------------------------------------------------------------------------
REXA_ParserImpl::REXA_ParserImpl(const REXA_ParserImpl& parserToCopy)
    :m_nStateIdMax(parserToCopy.m_nStateIdMax)
//-----------------------------------------------------------------------------
{
    for(int i=0;i<parserToCopy.m_vecAllStates.size();i++){
        m_vecAllStates.push_back(parserToCopy.m_vecAllStates[i]->Clone());
    }
}
//-----------------------------------------------------------------------------
REXA_ParserImpl& REXA_ParserImpl::operator=   (const REXA_ParserImpl& parser)
//-----------------------------------------------------------------------------
{
    int i;
    for(i=0;i<m_vecAllStates.size();++i){
        delete m_vecAllStates[i];
    }
    m_nStateIdMax= parser.m_nStateIdMax;
    for(i=0;i<parser.m_vecAllStates.size();i++){
        m_vecAllStates.push_back(parser.m_vecAllStates[i]->Clone());
    }
    return *this;
}

//-----------------------------------------------------------------------------
void		REXA_ParserImpl::InsertNamedState(REXA_BegEnd name,
										   REXA_NDFAState* pState,int nAnswer)
//-----------------------------------------------------------------------------
{
	vector<REXA_NDFAState*>  vecAccepting= CollectAcceptingStates(pState);
	for(int i=0;i<vecAccepting.size();i++){
		vecAccepting[i]->m_nAnswer= nAnswer;
	}
    pair<map<string,REXA_NDFAState*>::iterator,bool> itBool=
        m_mapNamedStates.insert(make_pair(string(name.first,name.second),pState));
    if( !itBool.second ){
        itBool.first->second= pState;
    }
}
//-----------------------------------------------------------------------------
void REXA_ParserImpl::NamedRegExp		(int nAnswer)
//-----------------------------------------------------------------------------
//$1 NamedRegExp= Ident "=" RegExp.
{
	m_scanner.Match(eIdent);
	REXA_BegEnd name= m_scanner.GetToken();

	m_scanner.AdvanceAndMatch(eCharSet);
    if( m_scanner.m_pTokEnd[-1]!= '=' ){
            throw REXA_Exception(m_scanner.m_pTokBeg,m_scanner.m_pcszToParse,"'=' expected");
    }
    m_scanner();
	REXA_NDFAState* pState=RegExp();
    if( m_scanner.GetLastSymbol()!=eEof){
         throw REXA_Exception(
                        m_scanner.m_pTokBeg,
                        m_scanner.m_pcszToParse,
                        "unexpected symbol");
    }
	InsertNamedState(name,pState,nAnswer);
}
//-----------------------------------------------------------------------------
const REXA_NDFAState*  REXA_ParserImpl::ParseRegExp          (string strRegExp)
//-----------------------------------------------------------------------------
{
    m_scanner.SetSource(strRegExp.c_str());
    m_scanner();
    const REXA_NDFAState* cRegExp= RegExp();
    if( m_scanner.GetLastSymbol()!=eEof){
         throw REXA_Exception(
                        m_scanner.m_pTokBeg,
                        m_scanner.m_pcszToParse,
                        "unexpected symbol");
    }
    return cRegExp;
}
//-----------------------------------------------------------------------------
bool       REXA_ParserImpl::ParseRegExpDefinition(string strRegExpDefiniton,
											   int nAnswer)
//-----------------------------------------------------------------------------
{
    m_scanner.SetSource(strRegExpDefiniton.c_str());
    m_scanner();
    NamedRegExp(nAnswer);
    return true;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::RegExp			()
//-----------------------------------------------------------------------------
//$2 RegExp= Term {"|" Term}.
{
    REXA_NDFAState* pState= Term();
    while( m_scanner.GetLastSymbol()==eAlternative ){
        m_scanner();
        pState= Union(pState,Term());
    }
    return pState;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Term			()
//-----------------------------------------------------------------------------
//$3 Term= Factor {Factor}.
{
    REXA_NDFAState* pState= Factor();
    while( m_scanner.GetLastSymbol()&m_eAtomSet ){
        pState= Concat(pState,Factor());
    }
    return pState;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Factor			()
//-----------------------------------------------------------------------------
//$4 Factor= Atom ["*"|"+"|"?"|"\i"].
{
    REXA_NDFAState* pState= Atom();
    switch(m_scanner.GetLastSymbol()){
    case eStar:
        {
            pState= MakeStar(pState);
            m_scanner();
            return pState;
        }
    case ePlus:
        {
            pState= MakePlus(pState);
            m_scanner();
            return pState;
        }
    case eOpt:
        {
            pState= MakeOptional(pState);
            m_scanner();
            return pState;
        }
    case eIgnoreCase:
        {
            pState= MakeIgnoreCase(pState);
            m_scanner();
            return pState;
        }
    default:
        return pState;
    }
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Atom				()
//-----------------------------------------------------------------------------
//$5 Atom= REXA_CharSet |  Ident | "(" RegExp ").
{
    switch(m_scanner.GetLastSymbol()){
    case eCharSet:
        {
			REXA_NDFAState* pState= ParseCharSet(m_scanner.GetCharSet());
            m_scanner();
            return pState;
        }

    case eIdent:
        {
            REXA_NDFAState* pState= Name(m_scanner.GetToken());
			m_scanner();
            return pState;
        }
    case eLParen:
        {
			m_scanner();
            REXA_NDFAState* pState= RegExp();
            m_scanner.Match(eRParen);
            m_scanner();
            return pState;
        }
    default: throw REXA_Exception(
                        m_scanner.m_pTokBeg,
                        m_scanner.m_pcszToParse,
                        "[,',( or ident expected");
        
    }
    return 0;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Name             (REXA_BegEnd nameToFind)
//-----------------------------------------------------------------------------
{

	map<string,REXA_NDFAState*>::iterator it=
		m_mapNamedStates.find(string(nameToFind.first,nameToFind.second));
	if( it!= m_mapNamedStates.end() ){
		return CloneNDFA(it->second);;
	}
    throw REXA_Exception(   m_scanner.m_pTokBeg,
                            m_scanner.m_pcszToParse,
                            string(nameToFind.first,nameToFind.second-nameToFind.first)
                            + " not found in name dictionary");
    return 0;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::ParseCharSet   (REXA_CharSet& rCharSet)
//-----------------------------------------------------------------------------
{
    REXA_NDFAState* pStartState= GetNewNDFAState(NextStateId(),false);
	REXA_NDFAState* pFinalState= GetNewNDFAState(NextStateId(),true);
	pStartState->m_transitions.push_back(REXA_NDFATransition(rCharSet,pFinalState));
    return pStartState;
}

//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Concat         (REXA_NDFAState* pState0,REXA_NDFAState* pState1)
//-----------------------------------------------------------------------------
{ //Terms: F0 resp. F1: StateGraph reachable from 'pState0' resp. 'pState1'
	//1. for each transition in F0 leading from state q to an accepting state 
	//   add a transition leading from q to 'pState1'
	vector<pair<REXA_NDFAState*,REXA_NDFATransition> > vecTrans= 
								CollectAcceptingTransitions(pState0);
    vector<REXA_NDFAState*>  vecAccepting= CollectAcceptingStates(pState0);
    bool bMakeState0Accepting= false;
	for(int i=0;i<vecTrans.size();i++){
		vecTrans[i].first->m_transitions.push_back(
			REXA_NDFATransition(vecTrans[i].second.m_transChars,pState1));

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -