📄 rexalgorithm.cpp
字号:
{
if( eSymbol!=operator()() ){
throw REXA_Exception(eSymbol,GetToken(),m_pcszToParse);
}
}
//------------------------------------------------------------------------------
char REXA_Scanner::GetChar(const char*& rpFrom,bool& rbIsEscape)
//------------------------------------------------------------------------------
{
rbIsEscape= false;
if( *rpFrom=='\0' ) return '\0';
char c;
if( *rpFrom=='\\' ){
rbIsEscape= true;
rpFrom++;
switch(*rpFrom){
case '0': return '\0';
case 'n': rpFrom++; return '\n';
case 't': rpFrom++; return '\t';
case 'v': rpFrom++; return '\v';
case 'a': rpFrom++; return '\a';
case 'b': rpFrom++; return '\b';
case 'r': rpFrom++; return '\r';
case 'x': c= *rpFrom++;
if( isxdigit(rpFrom[0]) && isxdigit(rpFrom[1]) ){
unsigned hexChar=0;
for(int i=0;i<2;i++){
hexChar*=16;
hexChar+= isdigit(rpFrom[i])?(rpFrom[i]-'0'):(toupper(rpFrom[i])-'A'+16);
}
c= (char)hexChar;
rpFrom+= 2;
}
return c;
default: c= *rpFrom++; return c;
}
}
return *rpFrom++;
}
//------------------------------------------------------------------------------
void REXA_Scanner::AddRange(
unsigned char cFirst,
unsigned char cLast,
REXA_CharSet& rCharSet)
//------------------------------------------------------------------------------
{
if( cFirst>cLast ){
swap(cFirst,cLast);
}
for(unsigned char c= cFirst; c<cLast;c++){
rCharSet[c]= true;
}
rCharSet[cLast]= true;
}
//------------------------------------------------------------------------------
REXA_ESymbols REXA_Scanner::operator() ()
//------------------------------------------------------------------------------
{
m_pTokBeg= m_pTokEnd= SkipWhiteSpace();
switch(*m_pTokEnd){
case '\0': return m_eLastSymbol=eEof;
case '+': m_pTokEnd++; return m_eLastSymbol=ePlus;
case '?': m_pTokEnd++; return m_eLastSymbol=eOpt;
case '*': m_pTokEnd++; return m_eLastSymbol=eStar;
case '(': m_pTokEnd++; return m_eLastSymbol=eLParen;
case ')': m_pTokEnd++; return m_eLastSymbol=eRParen;
case '=': m_pTokEnd++; return m_eLastSymbol=eAssign;
case '|': m_pTokEnd++; return m_eLastSymbol=eAlternative;
case '[': {
m_pTokEnd++;
REXA_CharSet cset;
char c;
bool bIsEscape;
bool bIsFirstTime= true;
bool bPrevWasRangeSeparator= false;
bool bUseComplement= false;
char cPrev;
cPrev=GetChar(m_pTokEnd,bIsEscape);
if( cPrev=='^' && !bIsEscape ){
bUseComplement= true;
cPrev=GetChar(m_pTokEnd,bIsEscape);
}
while( cPrev && !(cPrev==']' && !bIsEscape) ){
c=GetChar(m_pTokEnd,bIsEscape);
if( !bIsEscape && c=='-' ){
c=GetChar(m_pTokEnd,bIsEscape);
if(!c){
break;
}
AddRange(
(unsigned char)cPrev,(unsigned char)c,cset);
cPrev= GetChar(m_pTokEnd,bIsEscape);
}else{
cset[(unsigned char)cPrev]= true;
cPrev= c;
}
}
if( !cPrev || !c ){
throw REXA_Exception(
m_pTokBeg,m_pcszToParse,"closing ] missing");
}
m_charSet= cset;
if(bUseComplement){
m_charSet.flip();
}
return m_eLastSymbol=eCharSet;
}
case '.':{
m_pTokEnd++;
m_charSet= REXA_CharSet();
for(int i=0;i<256;i++){
m_charSet[i]= true;
}
return m_eLastSymbol=eCharSet;
}
case '$':{
if( isalpha(m_pTokEnd[1])||m_pTokEnd[1]=='_' ){
m_pTokEnd++;
m_strLiteral="";
while( isalpha(*m_pTokEnd)||isdigit(*m_pTokEnd)||m_pTokEnd[1]=='_' ){
m_strLiteral+= *m_pTokEnd++;
}
return m_eLastSymbol=eIdent;
}
}
default:
{
if( *m_pTokEnd=='\\' && m_pTokEnd[1]=='i' ){
m_pTokEnd++;
m_pTokEnd++;
return m_eLastSymbol=eIgnoreCase;
}else{
bool bIsEscape;
char c=GetChar(m_pTokEnd,bIsEscape);
m_charSet= REXA_CharSet();
m_charSet[(unsigned char)c]= true;
return m_eLastSymbol=eCharSet;
}
}
}
}
///////////////////////////////////////////////////////////////////////////////
// grammar related functions
///////////////////////////////////////////////////////////////////////////////
//-----------------------------------------------------------------------------
REXA_ParserImpl::~REXA_ParserImpl()
//-----------------------------------------------------------------------------
{
for(int i=0;i<m_vecAllStates.size();++i){
delete m_vecAllStates[i];
}
}
//-----------------------------------------------------------------------------
void REXA_ParserImpl::InsertNamedState(REXA_BegEnd name,
REXA_NDFAState* pState,int nAnswer)
//-----------------------------------------------------------------------------
{
vector<REXA_NDFAState*> vecAccepting= CollectAcceptingStates(pState);
for(int i=0;i<vecAccepting.size();i++){
vecAccepting[i]->m_nAnswer= nAnswer;
}
pair<map<string,REXA_NDFAState*>::iterator,bool> itBool=
m_mapNamedStates.insert(make_pair(string(name.first,name.second),pState));
if( !itBool.second ){
itBool.first->second= pState;
}
}
//-----------------------------------------------------------------------------
void REXA_ParserImpl::NamedRegExp (int nAnswer)
//-----------------------------------------------------------------------------
//$1 NamedRegExp= Ident "=" RegExp.
{
m_scanner.Match(eIdent);
REXA_BegEnd name= m_scanner.GetToken();
m_scanner.AdvanceAndMatch(eAssign);
m_scanner();
REXA_NDFAState* pState=RegExp();
if( m_scanner.GetLastSymbol()!=eEof){
throw REXA_Exception(
m_scanner.m_pTokBeg,
m_scanner.m_pcszToParse,
"unexpected symbol");
}
InsertNamedState(name,pState,nAnswer);
}
//-----------------------------------------------------------------------------
const REXA_NDFAState* REXA_ParserImpl::ParseRegExp (string strRegExp)
//-----------------------------------------------------------------------------
{
m_scanner.SetSource(strRegExp.c_str());
m_scanner();
const REXA_NDFAState* cRegExp= RegExp();
if( m_scanner.GetLastSymbol()!=eEof){
throw REXA_Exception(
m_scanner.m_pTokBeg,
m_scanner.m_pcszToParse,
"unexpected symbol");
}
return cRegExp;
}
//-----------------------------------------------------------------------------
bool REXA_ParserImpl::ParseRegExpDefinition(string strRegExpDefiniton,
int nAnswer)
//-----------------------------------------------------------------------------
{
m_scanner.SetSource(strRegExpDefiniton.c_str());
m_scanner();
NamedRegExp(nAnswer);
return true;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::RegExp ()
//-----------------------------------------------------------------------------
//$2 RegExp= Term {"|" Term}.
{
REXA_NDFAState* pState= Term();
while( m_scanner.GetLastSymbol()==eAlternative ){
m_scanner();
pState= Union(pState,Term());
}
return pState;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Term ()
//-----------------------------------------------------------------------------
//$3 Term= Factor {Factor}.
{
REXA_NDFAState* pState= Factor();
while( m_scanner.GetLastSymbol()&m_eAtomSet ){
pState= Concat(pState,Factor());
}
return pState;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Factor ()
//-----------------------------------------------------------------------------
//$4 Factor= Atom ["*"|"+"|"?"|"\i"].
{
REXA_NDFAState* pState= Atom();
switch(m_scanner.GetLastSymbol()){
case eStar:
{
pState= MakeStar(pState);
m_scanner();
return pState;
}
case ePlus:
{
pState= MakePlus(pState);
m_scanner();
return pState;
}
case eOpt:
{
pState= MakeOptional(pState);
m_scanner();
return pState;
}
case eIgnoreCase:
{
pState= MakeIgnoreCase(pState);
m_scanner();
return pState;
}
default:
return pState;
}
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Atom ()
//-----------------------------------------------------------------------------
//$5 Atom= REXA_CharSet | Ident | "(" RegExp ").
{
switch(m_scanner.GetLastSymbol()){
case eCharSet:
{
REXA_NDFAState* pState= ParseCharSet(m_scanner.GetCharSet());
m_scanner();
return pState;
}
case eIdent:
{
REXA_NDFAState* pState= Name(m_scanner.GetToken());
m_scanner();
return pState;
}
case eLParen:
{
m_scanner();
REXA_NDFAState* pState= RegExp();
m_scanner.Match(eRParen);
m_scanner();
return pState;
}
default: throw REXA_Exception(
m_scanner.m_pTokBeg,
m_scanner.m_pcszToParse,
"[,',( or ident expected");
}
return 0;
}
//-----------------------------------------------------------------------------
REXA_NDFAState* REXA_ParserImpl::Name (REXA_BegEnd nameToFind)
//-----------------------------------------------------------------------------
{
map<string,REXA_NDFAState*>::iterator it=
m_mapNamedStates.find(string(nameToFind.first,nameToFind.second));
if( it!= m_mapNamedStates.end() ){
return CloneNDFA(it->second);;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -