⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nfa-to-dfa.cpp

📁 Full support for extended regular expressions (those with intersection and complement); Support for
💻 CPP
📖 第 1 页 / 共 2 页
字号:

#define NFA_TO_DFA_VERSION "6"

// NFA to DFA conversion library.
// (C) Alexander Okhotin <okhotin@cs.queensu.ca>, 1999-2002.
// (C) Vladimir Prus <ghost@cs.msu.su>, 2000.

#ifdef __BORLANDC__
#define _RWSTD_CONTAINER_BUFFER_SIZE 1024
#endif

static const int _bubble_threshold = 6;

#include <cstddef>
#include <cstdlib>

using namespace std;

#include "dolphin.h"
#include "nfa-to-dfa.h"
#include "nfa-to-dfa-stl.h"
#include "stl.h"

#include <stack>
#include <queue>
#include <fstream>

//#define DFA_DEBUG_EPSILON_CLOSURE
//#define DFA_DEBUG_SUBSET_CONSTRUCTION
//#define DFA_DEBUG_SUBSET_INDEX_LOOKUP
//#define DFA_DEBUG_DEAD_STATES
//#define DFA_THOROUGHLY_DEBUG_DEAD_STATES
//#define DEBUG_DFA_MINIMIZATION


// (AO) The following comment belongs to VP. I don't understand a single word
// in it.

/*	Internal data structures used to speed up convert_nfa_to_dfa
	I belive that the biggest problem is that 1-2 transitions on
	one symbol exists, thus if we proceess each symbol
	individually, STL algorithm call themself can take more time
	then actuall processing
	*/

struct internal_nfa_state
{
	int first_transition, last_transition;
	int first_epsilon_transition, last_epsilon_transition;
	
	internal_nfa_state(int first_transition, int last_transition) :
		first_transition(first_transition),
		last_transition(last_transition)
	{
	}	
};

struct internal_dfa_state
{
	vector<int> nfa_states;
	
	internal_dfa_state() : nfa_states(10) { nfa_states.clear(); }
	static int hash(const internal_dfa_state &ds)
	{
		return accumulate(ds.nfa_states, 0);
	}
	friend bool operator<(const internal_dfa_state &s1, const internal_dfa_state &s2)
	{
		return s1.nfa_states < s2.nfa_states;
	}
	friend bool operator==(const internal_dfa_state &s1, const internal_dfa_state &s2)
	{
		return s1.nfa_states == s2.nfa_states;
	}
};

template<class lookup_structure> struct internal_nfa_dfa
{
	NFA &nfa;
	
//	vector<int> epsilon_transitions;
	lookup_structure cache;
	typedef typename lookup_structure::iterator cache_iterator;
	vector<cache_iterator> dfa_state_aux;
	vector<bool> dfa_accepting;
	
	typedef vector<pair<int, int> > one_state_data;
	typedef vector<pair<int, int> >::iterator one_state_data_iterator;
	
	vector<vector<int> > pred;
	
	vector<bool> accepting_state_is_reachable;
	
	int rejected_states;
	
	internal_nfa_dfa(NFA &nfa);
	void make_epsilon_transitions(NFA &nfa);
	void follow_epsilon(const vector<int> &nfa_states, vector<int> &closure);
	
	int add_dfa_state(vector<int> &nfa_states)
	{
		bool dfa_state_is_accepting(false);
		for(vector<int>::iterator i=nfa_states.begin(); i!=nfa_states.end(); i++)
			if(nfa.states[*i].is_accepting)
			{
				dfa_state_is_accepting = true;
				break;
			}
		
		internal_dfa_state ds;
		ds.nfa_states=nfa_states;
		cache_iterator i=cache.find(ds);
		if(i!=cache.end())
			return i->second;
		else
		{
			pair<cache_iterator, bool> p=cache.insert(make_pair(ds, dfa_state_aux.size()));
			dfa_state_aux.push_back(p.first);
			dfa_accepting.push_back(dfa_state_is_accepting);
			pred.push_back(vector<int>(8));
			pred.back().clear();
			return dfa_state_aux.size()-1;
		}
	}
	const vector<int>& nfa_states_for_dfa_state(int dfa_state)
	{
		return dfa_state_aux[dfa_state]->first.nfa_states;
	}
	void get_dfa_state_data(int dfa_state, one_state_data &osd)
	{
		cache_iterator i=dfa_state_aux[dfa_state];
		for(vector<int>::const_iterator j=i->first.nfa_states.begin(); j!=i->first.nfa_states.end(); j++)
			copy(nfa.states[*j].transitions.begin(), nfa.states[*j].transitions.end(), back_inserter(osd));
		//sort(osd.begin(), osd.end(), greater<pair<int, int> >());
		sort(osd, greater< pair<int, int> >());
		unique_and_erase(osd);
	}
	int get_transitions_for_one_symbol(one_state_data &osd, vector<int> &nstates)
	{
		if(osd.empty()) return -1;
		one_state_data_iterator i=osd.end()-1;
		int sym=i->first;
		while(i->first==sym && i>=osd.begin())
			nstates.push_back((i--)->second);
		i++;
		osd.erase(i, osd.end());
		make_set(nstates);
		return sym;		   
	}
	void mark_accessibility(int dfa_state, int target_state)
	{
		pred[target_state].push_back(dfa_state);
	}
	void find_dead_states()
	{
		// Tells us we have seen this node
		vector<bool> stacked(dfa_accepting);
		// Initial nodes
		vector<int> acc=find_all_indices(stacked, true);
		// Nodes that we've seen but haven't processed yet
		stack<int, vector<int> > s(acc);
		
		while(!s.empty())
		{
			int t=s.top(); s.pop();
			vector<int> &p=pred[t];
			for(vector<int>::iterator i=p.begin(); i!=p.end(); i++)
				if(!stacked[*i])
				{
					stacked[*i]=true;
					s.push(*i);
				}
		}
		accepting_state_is_reachable.swap(stacked);
	}

	// Just the same as partition from 'nfa-to-dfa.h' but uses vectors
	// everywhere. I wanted to retain the original version for
	// testing purposes and redeclared the struct here
	// See comments below for field explanation
	struct Partition
	{
		std::vector<int> state_to_group;
		std::vector<std::vector<int> > groups;
		std::vector<int> group_to_original_group;
		int group_containing_initial_state;

		Partition(const DFA &dfa) : groups(2)
		{
			// 3 initial groups - acc, nonacc and dead(ignored)
			
			vector<bool>::iterator a = dfa.accepting_state_is_reachable.begin();
			for(vector<DFA::State>::const_iterator i=dfa.states.begin(); i!=dfa.states.end(); i++, a++)
			{
				int group_no=(i->is_accepting ? 1 : ((*a) ? 0 : -1));
				// AO: What does _THAT_ mean?
				dfa.state_to_group.push_back;
				if(group_no!=-1)
					dfa.groups[group_no].push_back(distance(dfa.states.begin(), i));
			}		
		}
	};
	void final_fixup(DFA &dfa)
	{
		for(unsigned int i=0; i<dfa_state_aux.size(); i++)
		{
			vector<int> &v=const_cast<vector<int> &>(nfa_states_for_dfa_state(i));
			dfa.states[i].nfa_states.swap(v);
			dfa.states[i].is_accepting=dfa_accepting[i];
		}
		dfa.state_is_live=accepting_state_is_reachable;
	}
	void print_dfa_statistics(DFA &dfa, ostream &os)
	{
		os << "DFA has " << dfa.states.size() << " states\n";
		int count=0;
		for (int i=0; i<pred.size(); i++)
			count+=pred[i].size();
		os << "State has " << fixed << float(count)/pred.size() 
			<< " predecessors on average\n";
	}
};

template<class lookup_structure> internal_nfa_dfa<lookup_structure>::internal_nfa_dfa(NFA &nfa) : nfa(nfa), rejected_states(0)
{
}

template<class lookup_structure> void internal_nfa_dfa<lookup_structure>::make_epsilon_transitions(NFA &nfa)
{
	for(unsigned int s=0; s<nfa.states.size(); s++)
	{
		// Weed out non-important states	
		vector<int>::iterator cur = nfa.states[s].epsilon_transitions.begin(), ins = cur;
		while(cur!=nfa.states[s].epsilon_transitions.end())
		{
			if(nfa.states[*cur].is_important) 
				*ins++=*cur;
			++cur;
		}
		
		if(ins!=nfa.states[s].epsilon_transitions.end())
			nfa.states[s].epsilon_transitions.erase(ins, nfa.states[s].epsilon_transitions.end());
	}
}

template<class lookup_structure> void internal_nfa_dfa<lookup_structure>::follow_epsilon(const vector<int> &current_nfa_states, vector<int> &closure)
{
	for(vector<int>::const_iterator i=current_nfa_states.begin(); i!=current_nfa_states.end(); i++)
	{
		copy(nfa.states[*i].epsilon_transitions.begin(),
			nfa.states[*i].epsilon_transitions.end(),
			back_inserter(closure));
	}
	
	make_set(closure);	  
}

void epsilon_closure(NFA &nfa)
{
	for(unsigned int i=0; i<nfa.states.size(); i++)
	{
		nfa.states[i].epsilon_transitions.push_back(i);
		make_set(nfa.states[i].epsilon_transitions);
	}
	
	bool once_more=true;
	vector<int> more_transitions;
	vector<int> tmp;
	while(once_more)
	{
		once_more=false;
		
		for(unsigned int i=0; i<nfa.states.size(); i++)
		{
			NFA::State &state=nfa.states[i];
			
			more_transitions.clear();
			
			for(vector<int>::iterator p = state.epsilon_transitions.begin(); p != state.epsilon_transitions.end(); ++p)
				copy(nfa.states[*p].epsilon_transitions.begin(), 
					 nfa.states[*p].epsilon_transitions.end(),
					 back_inserter(more_transitions));
			
			make_set(more_transitions);
			
			unsigned int cs=state.epsilon_transitions.size();				
			
			tmp.clear();
			set_union(state.epsilon_transitions.begin(), state.epsilon_transitions.begin(),
				more_transitions.begin(), more_transitions.end(),
				back_inserter(tmp));
			
			state.epsilon_transitions.swap(tmp);
			
			once_more|=(state.epsilon_transitions.size()>cs);
		}
	}

}

void DFA::align_transition_table()
{
	for(int i=0; i<states.size(); i++)
	{
		vector<int> &transitions=states[i].transitions;
		
		while(transitions.size()<number_of_symbols)
			transitions.push_back(-1);
	}
}

#if 0
struct is_not_important : public binary_function<NFA, int, bool>
{
	bool accepting;
	is_not_important() : accepting(false)
	{
	}
	bool operator()(const NFA &nfa, int state)
	{
		accepting|=nfa.states[state].is_accepting;
		return nfa.states[state].is_important==false;
	}
};
#endif

void convert_nfa_to_dfa(NFA &nfa, DFA &dfa)
{
	if(!nfa.states.size()) return;
	
	epsilon_closure(nfa);
	
	internal_nfa_dfa<map<internal_dfa_state, int> > internal(nfa);
	internal.make_epsilon_transitions(nfa);
	
	vector<int> epsilon_scratch;
	
	dfa.number_of_symbols=nfa.number_of_symbols;
	
	DFA::State dstate0;
	
	epsilon_scratch.clear();	
	internal.follow_epsilon(vector<int>(1, 0), epsilon_scratch);
	internal.add_dfa_state(epsilon_scratch);
	
	dfa.states.push_back(dstate0);
	
	map< vector<int>, int > nfa_states_to_dfa_state;
	vector<int> target_nfa_states;
	internal_nfa_dfa< map<internal_dfa_state, int> > :: one_state_data osd;
		
	for(int i=0; i<dfa.states.size(); i++)
	{
		osd.clear();
		internal.get_dfa_state_data(i, osd);

		nfa_states_to_dfa_state.clear();

		while(true)
		{
			target_nfa_states.clear();
			int sym=internal.get_transitions_for_one_symbol(osd, target_nfa_states);
			
			if(sym==0) continue;
			if(sym==-1) break;
			if(target_nfa_states.empty()) continue;
			
			map<vector<int>, int>::iterator s=nfa_states_to_dfa_state.find(target_nfa_states);
			if(s!=nfa_states_to_dfa_state.end())
			{
				dfa.add_transition(i, s->second, sym);
				continue;
			}
			
			epsilon_scratch.clear();
			internal.follow_epsilon(target_nfa_states, epsilon_scratch);
			
			int cs = dfa.states.size();
			int target_state_number=internal.add_dfa_state(epsilon_scratch /*new_index_entry.nfa_states*/);
			if(target_state_number>=cs)
				dfa.states.push_back(DFA::State());
			internal.mark_accessibility(i, target_state_number);
			
			nfa_states_to_dfa_state[target_nfa_states]=target_state_number;
			
			dfa.add_transition(i, target_state_number, sym);
		}	
	}
	
	internal.find_dead_states();
	internal.final_fixup(dfa);
	
	dfa.align_transition_table();
}

// a state is considered dead if either it is unreachable from the initial
// state, or no final state can be reached from it.
// state_is_live[i]==false <==> i-th state is dead.
void DFA::determine_dead_states()
{
	if(state_is_live.size()) state_is_live.clear();
	
#ifdef DFA_DEBUG_DEAD_STATES
	cout << "DFA::determine_dead_states()\n";
#endif
	int n=states.size();
	
	vector<bool> reachable_from_initial, can_lead_to_final;
#ifdef DFA_THOROUGHLY_DEBUG_DEAD_STATES
	cout << "0 is reachable from initial because it IS the initial state.\n";
#endif
	for(int i=0; i<n; i++)
	{
		reachable_from_initial.push_back(i==0);
		can_lead_to_final.push_back(states[i].is_accepting);
	#ifdef DFA_THOROUGHLY_DEBUG_DEAD_STATES
		if(states[i].is_accepting)
			cout << i << " can lead to final because it IS final.\n";

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -