⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dfa.cpp

📁 Full support for extended regular expressions (those with intersection and complement); Support for
💻 CPP
字号:

#include "dolphin.h"
#include "dfa.h"
using namespace std;
using namespace Whale;

#include <fstream>
#include <stdio.h>
#include "stl.h"
#include "time.h"
#include "process.h"

//#define DEBUG_STATE_GROUP_CREATION
//#define PRINT_SETS_OF_NFA_STATES_CORRESPONDING_TO_DFA_STATES
//#define PRINT_DFA_PARTITION
//#define PRINT_TRANSITION_TABLE

bool analyze_accepting_states_and_report_conflicts()
{
	for(int scn=0; scn<data.start_conditions.size(); scn++)
	{
		StartConditionData &sc=data.start_conditions[scn];
		
		if(find(sc.dfa.state_is_live.begin(), sc.dfa.state_is_live.end(), true)==sc.dfa.state_is_live.end())
		{
			if(data.variables.start_conditions_enabled)
			{
				cout << "Nothing is recognized in start condition " << sc.name << ": all states in the corresponding DFA are dead.\n";
			}
			else
				cout << "All states in the DFA are dead.\n";
			
			continue;
		}
		
		// a set of actions is in conflicts_found if in some state of local
		// dfa there was a conflict between all actions contained in s1.
		set<set<int> > conflicts_found;
		
		for(int i=0; i<sc.dfa.states.size(); i++)
		{
			DFA::State &state=sc.dfa.states[i];
			
			if(sc.dfa.state_is_live[i])
			{
				// this is the only case where conflicts can arise
				
				set<int> expressions_recognized_here;
				set<int> expressions_that_need_lookahead_here;
				for(int ren=0; ren<data.recognized_expressions.size(); ren++)
				{
					RecognizedExpressionData &re=data.recognized_expressions[ren];
					if(re.is_special) continue;
					if(re.offset_of_our_nfa_states_in_nfa_for_start_conditions[scn]<0)
						continue; // if this expression has nothing to do with this sc.
					
					// re.nfa has been incorporated in sc.dfa starting from
					// offset re.offset_of_our_nfa_states_in_nfa_for_start_conditions[scn]
					
					int intermediate_state_number_in_nfa_for_sc=re.offset_of_our_nfa_states_in_nfa_for_start_conditions[scn]+re.intermediate_local_nfa_state;
					int accepting_state_number_in_nfa_for_sc=re.offset_of_our_nfa_states_in_nfa_for_start_conditions[scn]+re.accepting_local_nfa_state;
//					cout << "sc " << scn << ", state " << i << ": re " << ren << " is recognized in nfa-for-sc state " << re.offset_of_our_nfa_states_in_nfa_for_start_conditions[scn] << "+" << re.accepting_local_nfa_state << "=" << accepting_state_number_in_nfa_for_sc << "\n";
					
					if(find(state.nfa_states.begin(), state.nfa_states.end(), accepting_state_number_in_nfa_for_sc)!=state.nfa_states.end())
						expressions_recognized_here.insert(ren);
					
					if(re.intermediate_local_nfa_state>=0 && find(state.nfa_states.begin(), state.nfa_states.end(), intermediate_state_number_in_nfa_for_sc)!=state.nfa_states.end())
						expressions_that_need_lookahead_here.insert(ren);
				}
				
				if(state.is_accepting)
					assert(expressions_recognized_here.size()>0);
				
				if(expressions_recognized_here.size()>=2)
				{
					cout << "Ambiguity between ";
					
					print_a_number_of_expressions(cout, expressions_recognized_here);
					
					if(data.variables.start_conditions_enabled)
						cout << " in start condition " << data.start_conditions[scn].name;
					
					cout << "; conflicting actions defined ";
					vector<Terminal *> action_locations;
					for(set<int>::iterator p=expressions_recognized_here.begin(); p!=expressions_recognized_here.end(); p++)
						action_locations.push_back(data.actions[data.recognized_expressions[*p].start_condition_to_action_number[scn]].declaration->arrow);
					print_a_number_of_terminal_locations(cout, action_locations, "at");
					cout << ".\n";
				}
				if(expressions_recognized_here.size()>0)
					sc.expressions_accepted_in_local_dfa_states.push_back(*expressions_recognized_here.begin());
				else
					sc.expressions_accepted_in_local_dfa_states.push_back(-1);
//				cout << "sc " << scn << ", state " << i << ": we accept " << *expressions_recognized_here.begin() << "\n";
				
				sc.expressions_that_need_lookahead_in_local_dfa_states.push_back(expressions_that_need_lookahead_here);
			}
			else
			{
				sc.expressions_accepted_in_local_dfa_states.push_back(-1);
				sc.expressions_that_need_lookahead_in_local_dfa_states.push_back(set<int>());
			}
		}
	}
	
	return true;
}

void build_final_automaton()
{
	// determining the number of states in the big DFA and the mapping between
	// the states of the big DFA and the states of the DFAs for individual start
	// conditions.
	for(int i=0; i<data.start_conditions.size(); i++)
	{
		StartConditionData &sc=data.start_conditions[i];
		
		sc.offset_of_local_states_in_main_dfa=data.dfa.states.size();
		for(int k=0; k<sc.dfa.states.size(); k++)
		{
			DolphinData::DataOnAutomatonState new_state_data;
			
			new_state_data.number_of_start_condition=i;
			new_state_data.number_of_state_in_dfa_for_sc=k;
			
			int what_is_accepted_in_local_dfa_state=sc.expressions_accepted_in_local_dfa_states[k];
			if(what_is_accepted_in_local_dfa_state>=0)
			{
				RecognizedExpressionData &re=data.recognized_expressions[sc.expressions_accepted_in_local_dfa_states[k]];
//				cout << "accepting state: looking up re " << sc.expressions_accepted_in_local_dfa_states[k] << " in sc " << i << "\n";
				
				new_state_data.action_accepted_here=re.start_condition_to_action_number[i];
			}
			else
				new_state_data.action_accepted_here=-1;
			
			set<int> &what_needs_lookahead_in_local_dfa_state=sc.expressions_that_need_lookahead_in_local_dfa_states[k];
			for(set<int>::iterator p=what_needs_lookahead_in_local_dfa_state.begin(); p!=what_needs_lookahead_in_local_dfa_state.end(); p++)
			{
				RecognizedExpressionData &re=data.recognized_expressions[*p];
//				cout << "lookahead state: looking up re " << *p << " in sc " << i << "\n";
				new_state_data.actions_that_need_lookahead_here.insert(re.start_condition_to_action_number[i]);
			}
			
//			cout << "(sc " << i << ", dfa state " << k << ") -> " << data.final_automaton.size() << "; we accept " << new_state.action_accepted_here << "\n";
			
			data.data_on_dfa_states.push_back(new_state_data);
			data.dfa.add_state();
		}
		
		for(int j=0; j<sc.dfa.states.size(); j++)
			data.dfa.state_is_live.push_back(sc.dfa.state_is_live[j]);
	}
	
	// building the big dfa.
	data.dfa.number_of_symbols=data.variables.alphabet_cardinality;
	data.dfa.align_transition_table();
	for(int i=0; i<data.dfa.states.size(); i++)
	{
		DolphinData::DataOnAutomatonState &state_data=data.data_on_dfa_states[i];
		
		data.dfa.states[i].is_accepting=(state_data.action_accepted_here!=-1);
		
		int local_number_of_state=state_data.number_of_state_in_dfa_for_sc;
		StartConditionData &sc=data.start_conditions[state_data.number_of_start_condition];
		vector<int> &transitions_from_old_state=sc.dfa.states[local_number_of_state].transitions;
		
		for(int j=0; j<data.variables.alphabet_cardinality; j++)
			if(transitions_from_old_state[j]>=0)
			{
				int target_state=sc.offset_of_local_states_in_main_dfa+transitions_from_old_state[j];
				data.dfa.add_transition(i, target_state, j);
			}
	}
	cout << data.dfa.states.size() << " DFA states.\n";
	
	// creating the initial partition.
	int number_of_types_of_states=0;
	map<pair<int, set<int> >, int> types_of_states;
	int number_of_dead_states=0;
	for(int i=0; i<data.dfa.states.size(); i++)
		if(!data.dfa.state_is_live[i])
		{
			data.dfa_partition.state_to_group.push_back(-1);
			number_of_dead_states++;
		}
		else
		{
			DolphinData::DataOnAutomatonState &state_data=data.data_on_dfa_states[i];
			pair<int, set<int> > type_of_this_state=make_pair(state_data.action_accepted_here, state_data.actions_that_need_lookahead_here);
			
			if(types_of_states.count(type_of_this_state))
				data.dfa_partition.state_to_group.push_back(types_of_states[type_of_this_state]);
			else
			{
				int number_of_type_of_this_state=number_of_types_of_states++;
				types_of_states[type_of_this_state]=number_of_type_of_this_state;
				data.dfa_partition.state_to_group.push_back(number_of_type_of_this_state);
			}
		}
	cout << (data.dfa.states.size()-number_of_dead_states) << " live states.\n";
	
	// getting the final partition
	minimize_dfa(data.dfa, data.dfa_partition);
	cout << data.dfa_partition.groups.size() << " states in the minimal DFA.\n";
	
	data.number_of_symbol_classes=0;
	assert(!data.symbol_to_symbol_class.size());
	for(int j=0; j<data.variables.alphabet_cardinality; j++)
		data.symbol_to_symbol_class.push_back(-1);
	vector<int> symbol_class_to_symbol;
	
	for(int j=0; j<data.variables.alphabet_cardinality; j++)
		if(data.symbol_to_symbol_class[j]==-1)
		{
			data.symbol_to_symbol_class[j]=data.number_of_symbol_classes;
			
			for(int k=j+1; k<data.variables.alphabet_cardinality; k++)
			{
				bool flag=true;
				for(int i=0; i<data.dfa.states.size(); i++)
					if(data.dfa.states[i].transitions[k]!=data.dfa.states[i].transitions[j])
					{
						flag=false;
						break;
					}
				
				if(flag)
					data.symbol_to_symbol_class[k]=data.number_of_symbol_classes;
			}
			
			symbol_class_to_symbol.push_back(j);
			data.number_of_symbol_classes++;
		}
	cout << data.number_of_symbol_classes << " symbol classes.\n";
	
	// writing the final transition table to the appropriate place.
	for(int i=0; i<data.dfa_partition.groups.size(); i++)
	{
		int representative_state=*data.dfa_partition.groups[i].begin();
		DolphinData::DataOnAutomatonState &state_data=data.data_on_dfa_states[representative_state];
		
		DolphinData::StateOfFinalAutomaton new_state;
		new_state.action_accepted_here=state_data.action_accepted_here;
		new_state.actions_that_need_lookahead_here=state_data.actions_that_need_lookahead_here;
		
		for(int j=0; j<data.number_of_symbol_classes; j++)
		{
			int target_state_in_old_dfa=data.dfa.states[representative_state].transitions[symbol_class_to_symbol[j]];
			if(target_state_in_old_dfa>=0)
				new_state.transitions.push_back(data.dfa_partition.state_to_group[target_state_in_old_dfa]);
			else
				new_state.transitions.push_back(-1);
		}
		data.final_automaton.push_back(new_state);
	}
}

// checking the final transition table for obvious errors, such as
// i. transitions to non-existent states.
// ii. states that no transition leads to (those besides the initial state).
void dfa_sanity_check()
{
	int number_of_states=data.final_automaton.size();
	int number_of_symbol_classes=data.number_of_symbol_classes;
	
	vector<bool> used_somewhere(number_of_states, false);
	for(int i=0; i<data.start_conditions.size(); i++)
	{
		// the initial state of each local dfa has internal number 0.
		used_somewhere[data.start_conditions[i].offset_of_local_states_in_main_dfa+0]=true;
	}
	for(int i=0; i<number_of_states; i++)
		for(int j=0; j<number_of_symbol_classes; j++)
		{
			if(data.final_automaton[i].transitions[j]<-1 || data.final_automaton[i].transitions[j]>=number_of_states)
			{
				cout << "\n\n\tdfa_sanity_check():\n";
				cout << "Something went while constructing the DFA.\n";
				cout << "State " << i << " has a transition upon symbol class " << j << " to a\n";
				cout << "non-existent state " << data.final_automaton[i].transitions[j] << "\n\n";
				assert(false);
			}
			if(data.final_automaton[i].transitions[j]!=-1)
				used_somewhere[data.final_automaton[i].transitions[j]]=true;
		}
	
	set<int> unused_states;
	for(int i=0; i<number_of_states; i++)
		if(!used_somewhere[i])
			unused_states.insert(i);
	if(unused_states.size())
	{
		cout << "\n\n\tdfa_sanity_check():\n";
		cout << "Something went wrong while constructing the DFA.\n";
		cout << "State group" << (unused_states.size()==1 ? " " : "s ");
		cout << unused_states << " cannot be reached at all.\n\n";
		assert(false);
	}
}

bool construct_dfa()
{
//	cout << "construct_dfa()\n";
	
	// making a separate minimal dfa for each start condition.
	for(int i=0; i<data.start_conditions.size(); i++)
	{
		StartConditionData &sc=data.start_conditions[i];
		
		NFA &nfa=sc.nfa;
		DFA &dfa=sc.dfa;
		
		convert_nfa_to_dfa(nfa, dfa);
		assert(dfa.states.size());
		
		dfa.determine_dead_states();
		
		if(data.variables.dump_dfas_for_start_conditions)
		{
			ofstream log;
			char log_file_name_without_extension[100];
			sprintf(log_file_name_without_extension, "%s.%s.dfa", data.file_name.c_str(), sc.name);

			char log_file_name[100];
			sprintf(log_file_name, "%s.dot", log_file_name_without_extension);
			log.open(log_file_name);

		//	log << "DFA for start condition " << sc.name << "\n\n";
		//	dfa.print(log);

			log << "\n";
			log << "/* A DFA created by Dolphin. */\n\n";
			print_dot_message(log_file_name_without_extension, log);
			log << "\n// DFA for start condition " << sc.name << "\n\n";
			dfa.print_in_dot_format(log);
		}
	}
	
	if(!analyze_accepting_states_and_report_conflicts())
		return false;
	
	build_final_automaton();
	
	if(data.variables.dump_dfa_to_file)
	{
		ofstream log;
		char log_file_name_without_extension[100];
		sprintf(log_file_name_without_extension, "%s.dfa", data.file_name.c_str());
		
		char log_file_name[100];
		sprintf(log_file_name, "%s.dot", log_file_name_without_extension);
		log.open(log_file_name);
		
		log << "\n";
		log << "/* A DFA created by Dolphin. */\n\n";
		print_dot_message(log_file_name_without_extension, log);
		data.dfa.print_in_dot_format(log);
	}
	
//	dfa_sanity_check(); // a postcondition
	
	return true;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -