⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nfa.cpp

📁 Full support for extended regular expressions (those with intersection and complement); Support for
💻 CPP
📖 第 1 页 / 共 2 页
字号:
		nfa.add_epsilon_transition(local_initial_state, new_state_i);
		nfa.add_epsilon_transition(new_state_i, new_state_ii);
		nfa.add_epsilon_transition(new_state_iii, new_state_iv);
		nfa.add_epsilon_transition(new_state_iv, local_final_state);
		for(int j=0; j<nfa.number_of_symbols; j++)
		{
			nfa.add_transition(new_state_i, new_state_i, j);
			nfa.add_transition(new_state_iv, new_state_iv, j);
		}
		
	#ifdef DEBUG_CONTAINS
		cout << "contains: " << local_initial_state << " -> "
			<< new_state_i << " -> " << new_state_ii << " -> "
			<< new_state_iii << " -> " << new_state_iv << " -> "
			<< local_final_state << "\n";
	#endif
		construct_nfa_proc(nfa, expr_cont.expr, new_state_ii, new_state_iii, recursive_path);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionEpsilon))
	{
	#ifdef DEBUG_EPSILON
		cout << "epsilon: " << local_initial_state << " -> "
			<< local_final_state << "\n";
	#endif
		nfa.add_epsilon_transition(local_initial_state, local_final_state);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionSharpSign))
	{
	#ifdef DEBUG_SHARP
		cout << "sharp: " << local_initial_state << " -> "
			<< local_final_state << "\n";
	#endif
		for(int i=0; i<=data.variables.alphabet_cardinality; i++)
			nfa.add_transition(local_initial_state, local_final_state, i);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionSymbol))
	{
		NonterminalExpressionSymbol &expr_s=*dynamic_cast<NonterminalExpressionSymbol *>(expr);
		if(expr_s.expr->is_nts)
		{
			int nn=expr_s.expr->nn;
			assert(nn>=0);
			
			if(data.nonterminals[nn].expanded)
			{
				set<int> actual_symbols=set<int>(*(data.nonterminals[nn].expanded) & UnionOfIntervals<int>(0, data.variables.alphabet_cardinality-1));
				for(set<int>::const_iterator p=actual_symbols.begin(); p!=actual_symbols.end(); p++)
					nfa.add_transition(local_initial_state, local_final_state, *p);
			}
			else if(recursive_path.count(nn))
				nfa.add_epsilon_transition(local_initial_state, recursive_path[nn]);
			else
			{
				// if this usage is not recursive, then
				// i. if it starts a recursion, then mark it as
				//   recursive for the descendants.
				// ii. insert its whole body of rules.
				// iii. if it has been marked, then unmark it.
				
				bool here_starts_a_recursion=data.derivation_paths[nn][nn].v.size();
				int state_used_as_initial;
				
				if(here_starts_a_recursion)
				{
					// introduction of an ancillary state
					// helps to avoid a really bad error.
					state_used_as_initial=nfa.add_state();
					nfa.add_epsilon_transition(local_initial_state, state_used_as_initial);
					recursive_path[nn]=state_used_as_initial;
				}
				else
					state_used_as_initial=local_initial_state;
				
				NonterminalData &nonterminal=data.nonterminals[nn];
				assert(nonterminal.rules.size()==1);
				construct_nfa_proc(nfa, nonterminal.rules[0]->right,
					state_used_as_initial, local_final_state,
					recursive_path);
				
				if(here_starts_a_recursion)
				{
					assert(recursive_path.count(nn) && recursive_path[nn]==state_used_as_initial);
					recursive_path.erase(nn);
				}
			}
		}
		else
		{
		#ifdef DEBUG_TERMINAL_SYMBOL
			cout << "symbol: ";
		#endif
			vector<int> &v=expr_s.expr->s;
			int previous_state=local_initial_state;
			for(int i=0; i<v.size(); i++)
			{
				int next_state=(i<v.size()-1 ? nfa.add_state() : local_final_state);
			#ifdef DEBUG_TERMINAL_SYMBOL
				cout << "(from " << previous_state << " to " << next_state << " on " << v[i] << ")";
			#endif
				nfa.add_transition(previous_state, next_state, v[i]);
				
				if(data.variables.case_insensitive && isalpha(v[i]))
				{
					// the most straightforward method to
					// implement case insensitivity.
					
					if(v[i]!=tolower(v[i]))
						nfa.add_transition(previous_state, next_state, tolower(v[i]));
					if(v[i]!=toupper(v[i]))
						nfa.add_transition(previous_state, next_state, toupper(v[i]));
				}
				
				previous_state=next_state;
			}
		#ifdef DEBUG_TERMINAL_SYMBOL
			cout << "\n";
		#endif
		}
	}
	else
		assert(false);
}

bool construct_nfa()
{
//	cout << "construct_nfa()\n";
//	TimeKeeper tk("nfa");
	
//	data.nfa=NFA(data.variables.alphabet_cardinality);
//	NFA &nfa=data.nfa;
//	int initial_state=nfa.add_state();
//	assert(!initial_state);
	
	// making a separate nfa for each recognized expression
	int number_of_states_in_nfas_for_expressions=0;
	for(int i=0; i<data.recognized_expressions.size(); i++)
	{
		RecognizedExpressionData &re=data.recognized_expressions[i];
		if(re.is_special) continue;
	#ifdef PRINT_WHERE_EXPRESSIONS_ARE_RECOGNIZED
		cout << "Processing " << re.in_serialized_form << "\n";
	#endif
		
		re.nfa=NFA(data.variables.alphabet_cardinality);
		NFA &nfa=re.nfa;
		int initial_state=nfa.add_state();
		
		map<int, int> empty_recursive_path;
		if(re.lookahead) re.intermediate_local_nfa_state=nfa.add_state();
		re.accepting_local_nfa_state=nfa.add_state();
		
		int state_after_expr=(re.lookahead ? re.intermediate_local_nfa_state : re.accepting_local_nfa_state);
		construct_nfa_proc(nfa, re.expr, initial_state, state_after_expr, empty_recursive_path);
		assert(empty_recursive_path.size()==0);
		
		if(re.lookahead)
		{
			int additional_state=nfa.add_state();
			nfa.add_epsilon_transition(re.intermediate_local_nfa_state, additional_state);
			construct_nfa_proc(nfa, re.lookahead,
				additional_state, re.accepting_local_nfa_state,
				empty_recursive_path);
			assert(empty_recursive_path.size()==0);
			
			if(re.lookahead_length==-1)
				nfa.mark_as_important(re.intermediate_local_nfa_state);
		}
		
		nfa.mark_as_accepting(re.accepting_local_nfa_state);
		
	#ifdef PRINT_WHERE_EXPRESSIONS_ARE_RECOGNIZED
		if(re.lookahead)
			cout << "Intermediate local NFA state " << re.intermediate_local_nfa_state
				<< "; recognized ";
		else
			cout << "Recognized ";
		cout << "in local NFA state " << re.accepting_local_nfa_state << "\n";
	#endif
		if(data.variables.dump_nfas_for_expressions)
		{
			ofstream log;
			char log_file_name_without_extension[100];
			sprintf(log_file_name_without_extension, "%s.%u.nfa", data.file_name.c_str(), i);
			
			char log_file_name[100];
			sprintf(log_file_name, "%s.dot", log_file_name_without_extension);
			log.open(log_file_name);
			
			log << "\n";
			log << "/* An NFA created by Dolphin. */\n\n";
			print_dot_message(log_file_name_without_extension, log);
			log << "\n// NFA for expression " << re.in_serialized_form << "\n\n";
			nfa.print_in_dot_format(log);
		}
		number_of_states_in_nfas_for_expressions+=nfa.states.size();
	}
	cout << number_of_states_in_nfas_for_expressions << " states in " << data.recognized_expressions.size() << " NFAs for expressions.\n";
 	
	// Making an NFA for each start condition.
	for(int i=0; i<data.start_conditions.size(); i++)
	{
		StartConditionData &sc=data.start_conditions[i];
		sc.nfa=NFA(data.variables.alphabet_cardinality);
		
		// making a dedicated initial state.
		int initial_state=sc.nfa.add_state();
		assert(initial_state==0);
	}
	
	for(int i=0; i<data.actions.size(); i++)
	{
		ActionData &action=data.actions[i];
		if(action.is_special) continue;
		
		RecognizedExpressionData &re=data.recognized_expressions[action.recognized_expression_number];
		
		for(int j=0; j<data.start_conditions.size(); j++)
			re.offset_of_our_nfa_states_in_nfa_for_start_conditions.push_back(-1);
		
		for(set<int>::iterator p=action.start_condition_numbers.begin(); p!=action.start_condition_numbers.end(); p++)
		{
			StartConditionData &sc=data.start_conditions[*p];
			
			int offset=sc.nfa.incorporate_nfa(re.nfa);
		//	cout << "incorporating nfa for re " << re.in_serialized_form << " in sc " << *p << " at offset " << offset << "\n";
			sc.nfa.add_epsilon_transition(0, offset+0);
			re.offset_of_our_nfa_states_in_nfa_for_start_conditions[*p]=offset;
		}
	}
	
	int number_of_states_in_nfas_for_start_conditions=0;
	for(int i=0; i<data.start_conditions.size(); i++)
	{
		StartConditionData &sc=data.start_conditions[i];
		number_of_states_in_nfas_for_start_conditions+=sc.nfa.states.size();
		
		if(data.variables.dump_nfas_for_start_conditions || (data.variables.dump_nfa_to_file && data.start_conditions.size()>1))
		{
			ofstream log;
			char log_file_name_without_extension[100];
			sprintf(log_file_name_without_extension, "%s.%s.nfa", data.file_name.c_str(), sc.name);
			
			char log_file_name[100];
			sprintf(log_file_name, "%s.dot", log_file_name_without_extension);
			log.open(log_file_name);
			
		//	log << "NFA for start condition " << sc.name << "\n\n";
		//	sc.nfa.print(log);
			
			log << "\n";
			log << "/* An NFA created by Dolphin. */\n\n";
			print_dot_message(log_file_name_without_extension, log);
			log << "\n// NFA for start condition " << sc.name << "\n\n";
			sc.nfa.print_in_dot_format(log);
		}
		if(i==0 && data.variables.dump_nfa_to_file && data.start_conditions.size()==1)
		{
			ofstream log;
			char log_file_name_without_extension[100];
			sprintf(log_file_name_without_extension, "%s.nfa", data.file_name.c_str());
			
			char log_file_name[100];
			sprintf(log_file_name, "%s.dot", log_file_name_without_extension);
			log.open(log_file_name);
			
		//	log << "NFA for start condition " << sc.name << "\n\n";
		//	sc.nfa.print(log);
			
			log << "\n";
			log << "/* An NFA created by Dolphin. */\n\n";
			print_dot_message(log_file_name_without_extension, log);
			sc.nfa.print_in_dot_format(log);
		}
	}
	
	cout << number_of_states_in_nfas_for_start_conditions << " states in ";
	if(data.variables.start_conditions_enabled)
		cout << data.start_conditions.size() << " NFAs for start conditions.\n";
	else
		cout << "NFA.\n";
	
	return true;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -