📄 rexalgorithm.cpp
字号:
rpFrom+= 2;
}
return c;
default: c= *rpFrom++; return c;
}
}
return *rpFrom++;
}
//------------------------------------------------------------------------------
void REXA_Scanner::AddRange(
unsigned char cFirst,
unsigned char cLast,
REXA_CharSet& rCharSet)
//------------------------------------------------------------------------------
{
if( cFirst>cLast ){
swap(cFirst,cLast);
}
for(unsigned char c= cFirst; c<cLast;c++){
rCharSet[c]= true;
}
rCharSet[cLast]= true;
}
//------------------------------------------------------------------------------
REXA_ESymbols REXA_Scanner::operator() ()
//------------------------------------------------------------------------------
{
m_pTokBeg= m_pTokEnd= SkipWhiteSpace();
switch(*m_pTokEnd){
case '\0': return m_eLastSymbol=eEof;
case '+': m_pTokEnd++; return m_eLastSymbol=ePlus;
case '?': m_pTokEnd++; return m_eLastSymbol=eOpt;
case '*': m_pTokEnd++; return m_eLastSymbol=eStar;
case '(': m_pTokEnd++; return m_eLastSymbol=eLParen;
case ')': m_pTokEnd++; return m_eLastSymbol=eRParen;
case '|': m_pTokEnd++; return m_eLastSymbol=eAlternative;
case '[': {
m_pTokEnd++;
REXA_CharSet cset;
int c;
bool bIsEscape;
bool bIsFirstTime= true;
bool bPrevWasRangeSeparator= false;
bool bUseComplement= false;
int cPrev;
cPrev=GetChar(m_pTokEnd,bIsEscape);
if( cPrev=='^' && !bIsEscape ){
bUseComplement= true;
cPrev=GetChar(m_pTokEnd,bIsEscape);
}
while( cPrev!=-1 && !(cPrev==']' && !bIsEscape) ){
c=GetChar(m_pTokEnd,bIsEscape);
if( !bIsEscape && c=='-' ){
c=GetChar(m_pTokEnd,bIsEscape);
if(c==-1){
break;
}
AddRange(
(unsigned char)cPrev,(unsigned char)c,cset);
cPrev= GetChar(m_pTokEnd,bIsEscape);
}else{
cset[(unsigned char)cPrev]= true;
cPrev= c;
}
}
if( cPrev==-1 || c==-1 ){
throw REXA_Exception(
m_pTokBeg,m_pcszToParse,"closing ] missing");
}
m_charSet= cset;
if(bUseComplement){
m_charSet.flip();
}
return m_eLastSymbol=eCharSet;
}
case '.':{
m_pTokEnd++;
m_charSet= REXA_CharSet();
for(int i=0;i<256;i++){
m_charSet[i]= true;
}
return m_eLastSymbol=eCharSet;
}
case '$':{
if( isalpha(m_pTokEnd[1])||m_pTokEnd[1]=='_' ){
m_pTokEnd++;
m_strLiteral="";
while( isalnum(*m_pTokEnd) || *m_pTokEnd=='_' ){
m_strLiteral+= *m_pTokEnd++;
}
return m_eLastSymbol=eIdent;
}
}
default:
{
if( *m_pTokEnd=='\\' && m_pTokEnd[1]=='i' ){
m_pTokEnd++;
m_pTokEnd++;
return m_eLastSymbol=eIgnoreCase;
}else{
bool bIsEscape;
int c=GetChar(m_pTokEnd,bIsEscape);
m_charSet= REXA_CharSet();
m_charSet[(unsigned char)c]= true;
return m_eLastSymbol=eCharSet;
}
}
}
}
///////////////////////////////////////////////////////////////////////////////
// grammar related functions
///////////////////////////////////////////////////////////////////////////////
//-----------------------------------------------------------------------------
REXA_ParserImpl::~REXA_ParserImpl()
//-----------------------------------------------------------------------------
{
for(int i=0;i<m_vecAllStates.size();++i){
delete m_vecAllStates[i];
}
}
//-----------------------------------------------------------------------------
REXA_ParserImpl::REXA_ParserImpl(const REXA_ParserImpl& parserToCopy)
:m_nStateIdMax(parserToCopy.m_nStateIdMax)
//-----------------------------------------------------------------------------
{
for(int i=0;i<parserToCopy.m_vecAllStates.size();i++){
m_vecAllStates.push_back(parserToCopy.m_vecAllStates[i]->Clone());
}
}
//-----------------------------------------------------------------------------
REXA_ParserImpl& REXA_ParserImpl::operator= (const REXA_ParserImpl& parser)
//-----------------------------------------------------------------------------
{
int i;
for(i=0;i<m_vecAllStates.size();++i){
delete m_vecAllStates[i];
}
m_nStateIdMax= parser.m_nStateIdMax;
for(i=0;i<parser.m_vecAllStates.size();i++){
m_vecAllStates.push_back(parser.m_vecAllStates[i]->Clone());
}
return *this;
}
//-----------------------------------------------------------------------------
void REXA_ParserImpl::InsertNamedState(REXA_BegEnd name,
REXA_NDFAState* pState,int nAnswer)
//-----------------------------------------------------------------------------
{
vector<REXA_NDFAState*> vecAccepting= CollectAcceptingStates(pState);
for(int i=0;i<vecAccepting.size();i++){
vecAccepting[i]->m_nAnswer= nAnswer;
}
pair<map<string,REXA_NDFAState*>::iterator,bool> itBool=
m_mapNamedStates.insert(make_pair(string(name.first,name.second),pState));
if( !itBool.second ){
itBool.first->second= pState;
}
}
//-----------------------------------------------------------------------------
void REXA_ParserImpl::NamedRegExp (int nAnswer)
//-----------------------------------------------------------------------------
//$1 NamedRegExp= Ident "=" RegExp.
{
m_scanner.Match(eIdent);
REXA_BegEnd name= m_scanner.GetToken();
m_scanner.AdvanceAndMatch(eCharSet);
if( m_scanner.m_pTokEnd[-1]!= '=' ){
throw REXA_Exception(m_scanner.m_pTokBeg,m_scanner.m_pcszToParse,"'=' expected");
}
m_scanner();
REXA_NDFAState* pState=RegExp();
if( m_scanner.GetLastSymbol()!=eEof){
throw REXA_Exception(
m_scanner.m_pTokBeg,
m_scanner.m_pcszToParse,
"unexpected symbol");
}
InsertNamedState(name,pState,nAnswer);
}
//-----------------------------------------------------------------------------
const REXA_NDFAState* REXA_ParserImpl::ParseRegExp (string strRegExp)
//-----------------------------------------------------------------------------
{
m_scanner.SetSource(strRegExp.c_str());
m_scanner();
const REXA_NDFAState* cRegExp= RegExp();
if( m_scanner.GetLastSymbol()!=eEof){
throw REXA_Exception(
m_scanner.m_pTokBeg,
m_scanner.m_pcszToParse,
"unexpected symbol");
}
return cRegExp;
}
//-----------------------------------------------------------------------------
bool REXA_ParserImpl::ParseRegExpDefinition(string strRegExpDefiniton,
int nAnswer)
//-----------------------------------------------------------------------------
{
m_scanner.SetSource(strRegExpDefiniton.c_str());
m_scanner();
NamedRegExp(nAnswer);
return true;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::RegExp ()
//-----------------------------------------------------------------------------
//$2 RegExp= Term {"|" Term}.
{
REXA_NDFAState* pState= Term();
while( m_scanner.GetLastSymbol()==eAlternative ){
m_scanner();
pState= Union(pState,Term());
}
return pState;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Term ()
//-----------------------------------------------------------------------------
//$3 Term= Factor {Factor}.
{
REXA_NDFAState* pState= Factor();
while( m_scanner.GetLastSymbol()&m_eAtomSet ){
pState= Concat(pState,Factor());
}
return pState;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Factor ()
//-----------------------------------------------------------------------------
//$4 Factor= Atom ["*"|"+"|"?"|"\i"].
{
REXA_NDFAState* pState= Atom();
switch(m_scanner.GetLastSymbol()){
case eStar:
{
pState= MakeStar(pState);
m_scanner();
return pState;
}
case ePlus:
{
pState= MakePlus(pState);
m_scanner();
return pState;
}
case eOpt:
{
pState= MakeOptional(pState);
m_scanner();
return pState;
}
case eIgnoreCase:
{
pState= MakeIgnoreCase(pState);
m_scanner();
return pState;
}
default:
return pState;
}
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Atom ()
//-----------------------------------------------------------------------------
//$5 Atom= REXA_CharSet | Ident | "(" RegExp ").
{
switch(m_scanner.GetLastSymbol()){
case eCharSet:
{
REXA_NDFAState* pState= ParseCharSet(m_scanner.GetCharSet());
m_scanner();
return pState;
}
case eIdent:
{
REXA_NDFAState* pState= Name(m_scanner.GetToken());
m_scanner();
return pState;
}
case eLParen:
{
m_scanner();
REXA_NDFAState* pState= RegExp();
m_scanner.Match(eRParen);
m_scanner();
return pState;
}
default: throw REXA_Exception(
m_scanner.m_pTokBeg,
m_scanner.m_pcszToParse,
"[,',( or ident expected");
}
return 0;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Name (REXA_BegEnd nameToFind)
//-----------------------------------------------------------------------------
{
map<string,REXA_NDFAState*>::iterator it=
m_mapNamedStates.find(string(nameToFind.first,nameToFind.second));
if( it!= m_mapNamedStates.end() ){
return CloneNDFA(it->second);;
}
throw REXA_Exception( m_scanner.m_pTokBeg,
m_scanner.m_pcszToParse,
string(nameToFind.first,nameToFind.second-nameToFind.first)
+ " not found in name dictionary");
return 0;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::ParseCharSet (REXA_CharSet& rCharSet)
//-----------------------------------------------------------------------------
{
REXA_NDFAState* pStartState= GetNewNDFAState(NextStateId(),false);
REXA_NDFAState* pFinalState= GetNewNDFAState(NextStateId(),true);
pStartState->m_transitions.push_back(REXA_NDFATransition(rCharSet,pFinalState));
return pStartState;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Concat (REXA_NDFAState* pState0,REXA_NDFAState* pState1)
//-----------------------------------------------------------------------------
{ //Terms: F0 resp. F1: StateGraph reachable from 'pState0' resp. 'pState1'
//1. for each transition in F0 leading from state q to an accepting state
// add a transition leading from q to 'pState1'
vector<pair<REXA_NDFAState*,REXA_NDFATransition> > vecTrans=
CollectAcceptingTransitions(pState0);
vector<REXA_NDFAState*> vecAccepting= CollectAcceptingStates(pState0);
bool bMakeState0Accepting= false;
for(int i=0;i<vecTrans.size();i++){
vecTrans[i].first->m_transitions.push_back(
REXA_NDFATransition(vecTrans[i].second.m_transChars,pState1));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -