📄 rexalgorithm.cpp
字号:
vector< REXA_CharSet > unionSets(256);
REXA_CharSetSet resultSet;
REXA_CharSetSet::const_iterator it;
for(it= rCharSets.begin();it!=rCharSets.end();++it){
charSets[it->count()].insert(*it);
}
//cumulative union
for(int k= 1;k<256;k++){
for(int j=0;j<k;j++){
for(it= charSets[j].begin();it!= charSets[j].end();++it){
unionSets[k]|= *it;
}
}
}
//reduce charset into disjoint charsets
for(int l=0;l<256;l++){
for(it= charSets[l].begin();it!= charSets[l].end();++it){
REXA_CharSet reducedSet= *it;
REXA_CharSet unionSet= ~unionSets[l];
reducedSet&= unionSet;
resultSet.insert(reducedSet);
}
}
return resultSet;
}
//-----------------------------------------------------------------------------
void REXA_TranslatingTable::CollectCharSets(const REXA_NDFAState* pCurState,
set<int>& rVisitedStates,
REXA_CharSetSet& rCharSets)
//-----------------------------------------------------------------------------
{
if( rVisitedStates.find(pCurState->m_id) == rVisitedStates.end() ){
rVisitedStates.insert(pCurState->m_id);
for(int i=0;i<pCurState->m_transitions.size();i++){
if( pCurState->m_transitions[i].m_transChars.count()!=0 ){
rCharSets.insert(pCurState->m_transitions[i].m_transChars);
CollectCharSets(
pCurState->m_transitions[i].m_nextState,
rVisitedStates,rCharSets);
}
}
}
}
//# Generates a translation table from the NDFA-states present in 'vecpStates'
//example: NDFA having the States S,A,E. S('a') -> A. S('a') -> E. A('b') -> A. A('b') -> E.
//TranslationTable:
// REXA_StateSet 'a' 'b'
//--------------------------
// {S} {A,E}
// {A,E} {A,E}
// corresponding DFA: {S}('a') -> {A,E}. {A,E}('b') -> {A,E}
//-----------------------------------------------------------------------------
void REXA_TranslatingTable::GenerateTranslationTable(set<const REXA_NDFAState*>& rSetState)
//-----------------------------------------------------------------------------
{
set< REXA_StateSet > setPendingStateSets;
REXA_StateSet startStateSet;
startStateSet.insert(m_pStartState);
setPendingStateSets.insert(startStateSet);
while( setPendingStateSets.size()>0 ){
REXA_StateSet curStateSet= *setPendingStateSets.begin();
setPendingStateSets.erase(curStateSet);
if( m_mapTransitions.find(curStateSet)==m_mapTransitions.end() ){
TransitionVector transVector(m_vecCharsets.size());
for(int i=0;i<m_vecCharsets.size();i++){
REXA_StateSet::const_iterator it;
for(it= curStateSet.begin();it!=curStateSet.end();++it){
for(int j=0;j<(*it)->m_transitions.size();j++){
const REXA_NDFATransition& rTransition= (*it)->m_transitions[j];
REXA_CharSet charSet= rTransition.m_transChars;
charSet&= m_vecCharsets[i];
if( charSet.any() ){
transVector[i].insert(rTransition.m_nextState);
}
}
}
}
m_mapTransitions.insert(make_pair(curStateSet,transVector));
for(int l=0;l<transVector.size();l++){
setPendingStateSets.insert(transVector[l]);
}
}
}
}
static REXA_DFAState* GetIllegalState()
{
static REXA_DFAState illegalState(REXA_DFAState::eNoAnswer,true);
return &illegalState;
}
static REXA_DFAState* GetEosState()
{
bool bFirstTime= true;
static REXA_DFAState eosState(REXA_DFAState::eEos,true);
if( bFirstTime ){
bFirstTime= false;
for(int i=0;i<sizeof(eosState.aNext)/sizeof(eosState.aNext[0]);i++){
eosState.aNext[i]= &eosState;
}
}
return &eosState;
}
REXA_DFAState* REXA_DFAState::ms_pIllegalState= ::GetIllegalState();
REXA_DFAState* REXA_DFAState::ms_pEosState= ::GetEosState();
//-------------------------------------------------------------------------------
REXA_DFAState::REXA_DFAState(int nAnwserId,bool bIsFinal)
:m_nAnwserId(nAnwserId),
m_bIsFinal(bIsFinal),
m_bIsAccepting(false)
//-------------------------------------------------------------------------------
{
for(int i=0;i<256;i++) aNext[i]=0;
}
//-------------------------------------------------------------------------------
void REXA_DFAState::CollectStates(REXA_DFAState* pState,set<REXA_DFAState*>& rsetStates)
//-------------------------------------------------------------------------------
{
if( !pState ) return;
pair<set<REXA_DFAState*>::iterator,bool> itBool= rsetStates.insert(pState);
if( itBool.second ){
for(int i=0;i<256;i++){
CollectStates((*itBool.first)->aNext[i],rsetStates);
}
}
}
//-----------------------------------------------------------------------------
void REXA_DFAState::DeleteAll()
//-----------------------------------------------------------------------------
{
set<REXA_DFAState*> setStates;
CollectStates(this,setStates);
set<REXA_DFAState*>::iterator it;
for(it= setStates.begin();it!=setStates.end();++it){
if( *it!=ms_pIllegalState && *it!=ms_pEosState ){
delete *it;
}
}
}
//-----------------------------------------------------------------------------
vector<REXA_CharSet> REXA_NDFAToTable::CharsetToVector(const REXA_CharSetSet& rCharSetSet)
//-----------------------------------------------------------------------------
{
vector<REXA_CharSet> vecCharSet;
REXA_CharSetSet::const_iterator it;
for(it= rCharSetSet.begin();it!=rCharSetSet.end();++it){
vecCharSet.push_back(*it);
}
return vecCharSet;
}
//-----------------------------------------------------------------------------
REXA_TranslatingTable::REXA_TranslatingTable (const REXA_NDFAState* pStartState)
//-----------------------------------------------------------------------------
{
set<int> setVisitedStates;
REXA_CharSetSet charSets;
set<const REXA_NDFAState*> setStates;
REXA_NDFAState::CollectStates(pStartState,setStates);
CollectCharSets(pStartState,setVisitedStates,charSets);
m_vecCharsets= REXA_NDFAToTable::CharsetToVector(
ComputeDisjointCharSets(charSets));
m_pStartState= pStartState;
GenerateTranslationTable(setStates);
ComputeOccCounts();
}
//-----------------------------------------------------------------------------
REXA_NDFAToDfa::REXA_NDFAToDfa (const REXA_TranslatingTable& rTable,char cEos)
:m_rTable(rTable),m_cEos((unsigned char)cEos)
//-----------------------------------------------------------------------------
{
}
//-----------------------------------------------------------------------------
REXA_DFAState* REXA_NDFAToDfa::CreateDfaStates(
const REXA_StateSet& rStateSet,
map<REXA_StateSet,REXA_DFAState*>& rMapStateSetDFAState)
//-----------------------------------------------------------------------------
{
pair<map<REXA_StateSet,REXA_DFAState*>::iterator,bool> pairItBool=
rMapStateSetDFAState.insert(make_pair(rStateSet,(REXA_DFAState*)0));
if( pairItBool.second ){
REXA_DFAState* pDFAState= new REXA_DFAState();
pairItBool.first->second= pDFAState;
REXA_TranslatingTable::TranslationMap::const_iterator it=
m_rTable.m_mapTransitions.find(rStateSet);
if( it!= m_rTable.m_mapTransitions.end() ){
const TransitionVector& rvecTrans= it->second;
for(int i=0;i<rvecTrans.size();i++){
if( rvecTrans[i].size()>0 ){
REXA_DFAState* pTransState=
CreateDfaStates(rvecTrans[i],rMapStateSetDFAState);
for(int j=0;j<256;j++){
if( m_rTable.m_vecCharsets[i][j] ){
pDFAState->aNext[j]= pTransState;
}
}
}
}
if( rStateSet.IsAccepting() ){
int nAnswerCode= m_rTable.GetAnswerCode(rStateSet);
pDFAState->m_bIsAccepting= true;
pDFAState->m_nAnwserId= nAnswerCode;
REXA_DFAState* pAcceptState=
new REXA_DFAState(nAnswerCode,true);
pAcceptState->m_bIsAccepting= true;
for(int j=0;j<256;j++){
if( !pDFAState->aNext[j] ){
pDFAState->aNext[j]= pAcceptState;
}
}
}else{
for(int j=0;j<256;j++){
if( !pDFAState->aNext[j] ){
pDFAState->aNext[j]= REXA_DFAState::ms_pIllegalState;
}
}
pDFAState->aNext[m_cEos]= REXA_DFAState::ms_pEosState;
}
}
return pDFAState;
}
return pairItBool.first->second;
}
//-----------------------------------------------------------------------------
REXA_DFAState* REXA_NDFAToDfa::TableToDfaStates()
//-----------------------------------------------------------------------------
{
REXA_StateSet startStateSet;
startStateSet.insert(m_rTable.m_pStartState);
map<REXA_StateSet,REXA_DFAState*> mapStateSetDFAState;
REXA_TranslatingTable::TranslationMap::const_iterator it;
it= m_rTable.m_mapTransitions.find(startStateSet);
if( it!=m_rTable.m_mapTransitions.end() ){
return CreateDfaStates(
it->first,
mapStateSetDFAState);
}
return 0;
}
//-----------------------------------------------------------------------------
REXA_Parser::REXA_Parser (char cEos)
//-----------------------------------------------------------------------------
{
m_pParserImp= new REXA_ParserImpl(cEos);
}
//-----------------------------------------------------------------------------
REXA_Parser::~REXA_Parser ()
//-----------------------------------------------------------------------------
{
delete m_pParserImp;
}
//-----------------------------------------------------------------------------
REXA_DFAState* REXA_Parser::ParseRegExp (string strRegExp)
//-----------------------------------------------------------------------------
{
const REXA_NDFAState* pNdfa= m_pParserImp->ParseRegExp(strRegExp);
REXA_TranslatingTable table(pNdfa);
REXA_NDFAToDfa ndfaToDfa(table,m_pParserImp->m_cEos);
return ndfaToDfa.TableToDfaStates();
}
//-----------------------------------------------------------------------------
void REXA_Parser::AddRegDefinition ( string strDefName,
string strDefExpression,
int nAnswer)
//-----------------------------------------------------------------------------
{
string str= strDefName+ "=" + strDefExpression;
m_pParserImp->ParseRegExpDefinition(str,nAnswer);
}
//-----------------------------------------------------------------------------
void REXA_Parser::RemoveRegDefinition ( string strDefName)
//-----------------------------------------------------------------------------
{
m_pParserImp->m_mapNamedStates.erase(strDefName);
}
//-----------------------------------------------------------------------------
void REXA_Parser::GetRegDefinitions (vector<string>& vecDefinitions)
//-----------------------------------------------------------------------------
{
map<string,REXA_NDFAState*>::iterator it;
for(it= m_pParserImp->m_mapNamedStates.begin();
it!=m_pParserImp->m_mapNamedStates.end();
++it){
vecDefinitions.push_back(it->first);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -