📄 tables.cpp

📁 Full support for extended regular expressions (those with intersection and complement); Support for
💻 CPP
字号:

// Experimental table compression routines.

#include "dolphin.h"
#include "tables.h"
#include "stl.h"
#include <fstream>
using namespace std;

//#define DUMP_SIMILARITY_RELATION

struct DifferenceBetweenLines
{
	bool identical;
	int region_begin;
	int region_end;
};

void compress_transition_table();
void compress_tables_layer_1();
void compress_tables_layer_2();
DifferenceBetweenLines find_difference_between_layer1_lines(int line1, int line2);

void make_table_of_lookahead_states();
void make_start_condition_to_dfa_state_table();

void make_tables()
{
	if(data.variables.compress_tables)
		compress_transition_table();
	if(data.variables.generate_arbitrary_lookahead_support)
		make_table_of_lookahead_states();
	make_start_condition_to_dfa_state_table();
}

void compress_transition_table()
{
	int m=data.number_of_symbol_classes;
	
	cout << "Compressing transition table: ";
	
	if(data.variables.using_layer2)
	{
		cout << "layer 1";
		cout.flush();
	}
	
	compress_tables_layer_1();
	
	if(data.variables.using_layer2) cout << " (";
	cout << data.tables.layer1_to_state.size() << " lines";
	if(data.variables.using_layer2)
		cout << ")";
	else
		cout << ".\n";
        
        if(data.variables.using_layer2)
	{
		cout << ", layer 2";
		cout.flush();
		
		compress_tables_layer_2();
		
		cout << " (" << data.tables.layer2_to_layer1.size() << " lines).\n";
	}
	
	TableMaker tm(data.tables.compressed_table_of_lines);
	if(!data.variables.using_layer2)
		for(int i=0; i<data.tables.layer1_to_state.size(); i++)
		{
			int i_state=data.tables.layer1_to_state[i];
			std::vector<int> v;
			
			/* INCREMENT IS HERE! */
			for(int j=0; j<m; j++)
				v.push_back(data.final_automaton[i_state].transitions[j]+1);
			
			int offset=tm.add(v);
			data.tables.line_to_offset_in_table_of_lines.push_back(offset);
		}
	else
		for(int i=0; i<data.tables.layer2_to_layer1.size(); i++)
		{
			int i_layer1=data.tables.layer2_to_layer1[i];
			int i_state=data.tables.layer1_to_state[i_layer1];
			std::vector<int> v;
			
			/* INCREMENT IS HERE! */
			for(int j=0; j<m; j++)
				v.push_back(data.final_automaton[i_state].transitions[j]+1);
			
			int offset=tm.add(v);
			data.tables.line_to_offset_in_table_of_lines.push_back(offset);
		}
}

void compress_tables_layer_1()
{
	int n=data.final_automaton.size();
	int m=data.number_of_symbol_classes;
	
	// It could be done in n*log(n) time, but I'm too lazy to do that.
	
	data.tables.state_to_layer1=vector<int>(n, -2);
	
	for(int i=0; i<n; i++)
		if(data.tables.state_to_layer1[i]==-2)
		{
			int this_line_number=data.tables.layer1_to_state.size();
			data.tables.layer1_to_state.push_back(i);
			data.tables.state_to_layer1[i]=this_line_number;
			
			bool it_is_null=true;
			for(int j=0; j<m; j++)
				if(data.final_automaton[i].transitions[j]!=-1)
				{
					it_is_null=false;
					break;
				}
			if(it_is_null)
				data.tables.null_line_number=i;
			
			for(int k=i+1; k<n; k++)
				if(data.tables.state_to_layer1[k]==-2)
				{
					bool all_equal=true;
					for(int j=0; j<m; j++)
						if(data.final_automaton[i].transitions[j]!=data.final_automaton[k].transitions[j])
						{
							all_equal=false;
							break;
						}
					if(all_equal)
						data.tables.state_to_layer1[k]=this_line_number;
				}
		}
}

void compress_tables_layer_2()
{
	int n=data.tables.layer1_to_state.size();
	
	/* similarity_relation[i] is a sorted vector of numbers of those    */
	/* lines that are similar to i-th line. Lines should be distinct.   */
	vector<vector<int> > similarity_relation(n, vector<int>());
	
	for(int i=0; i<n; i++)
		for(int k=0; k<n; k++)
		{
			if(i==k) continue;
			
			DifferenceBetweenLines difference=find_difference_between_layer1_lines(i, k);
			assert(!difference.identical);
			int width=difference.region_end-difference.region_begin;
			assert(width>0);
			
			if(width<=data.variables.table_compression_exception_width)
			{
				similarity_relation[i].push_back(k);
				continue;
			}
		}
	
#ifdef DUMP_SIMILARITY_RELATION
	ofstream a("similarity_relation");
	
	for(int i=0; i<n; i++)
	{
		a << i << ": " << similarity_relation[i] << "\n";
	}
	
	a << "\n";
#endif
	
	set<int> lines_left;
	for(int i=0; i<n; i++) // how to write it in C*n time?
	{
		lines_left.insert(i);
		data.tables.layer1_to_layer2.push_back(-1);
		data.tables.layer1_to_exception_location.push_back(-1);
		data.tables.layer1_to_exception_data.push_back(vector<int>());
	}
	
	while(lines_left.size())
	{
		// The line that has the greatest number of neighbours.
		int the_line=*lines_left.begin();
		for(set<int>::const_iterator p=lines_left.begin(); p!=lines_left.end(); p++)
			if(similarity_relation[*p].size()>similarity_relation[the_line].size())
				the_line=*p;
		
		int layer_2_line=data.tables.layer2_to_layer1.size();
		data.tables.layer2_to_layer1.push_back(the_line);
		
	#ifdef DUMP_SIMILARITY_RELATION
		a << "Processing line " << the_line << " (" << similarity_relation[the_line].size() << " neighbours).\n";
	#endif
		
		// Store information about "exceptions" - those fragments in
		// neighbouring lines that differ from the current line.
		
		for(int i=0; i<similarity_relation[the_line].size(); i++)
		{
			int current_line=similarity_relation[the_line][i];
			int cl_state=data.tables.layer1_to_state[current_line];
			
			DifferenceBetweenLines d=find_difference_between_layer1_lines(the_line, current_line);
			assert(!d.identical);
			assert(d.region_end-d.region_begin<=data.variables.table_compression_exception_width);
			
			data.tables.layer1_to_exception_location[current_line]=d.region_begin;
		//	data.tables.layer1_to_exception_data[current_line]=vector<int>(data.final_transition_table.at(cl_state, d.region_begin), data.final_transition_table.at(cl_state, d.region_end));
			
			// INCREMENT HERE. Ought to bring a uniform enumeration.
			for(int i=d.region_begin; i<d.region_end; i++)
				data.tables.layer1_to_exception_data[current_line].push_back(data.final_automaton[cl_state].transitions[i] + 1);
		}
		
		// Now we shall
		// i. Remove this line together with its neighbours from the
		//	list of lines to process.
		// ii. Remove all references to all these lines from the vectors.
		
		set<int> lines_to_erase(similarity_relation[the_line].begin(), similarity_relation[the_line].end());
		lines_to_erase.insert(the_line);
		assert(lines_to_erase.size()==similarity_relation[the_line].size()+1);
		
		for(set<int>::const_iterator p=lines_to_erase.begin(); p!=lines_to_erase.end(); p++)
		{
			data.tables.layer1_to_layer2[*p]=layer_2_line;
			lines_left.erase(*p);
		}
		
		for(int i=0; i<n; i++)
		{
			vector<int> temp;
			set_difference(similarity_relation[i].begin(), similarity_relation[i].end(),
				lines_to_erase.begin(), lines_to_erase.end(), back_inserter(temp));
			similarity_relation[i]=temp;
		}
	}
}

DifferenceBetweenLines find_difference_between_layer1_lines(int line1, int line2)
{
	DifferenceBetweenLines difference;
	
	int m=data.number_of_symbol_classes;
	
	// Using the simpliest method: two vectors are similar if there exist
	// p1 and p2, such that
	//	p1 < p2,
	//	p2-p1 < some predefined d,
	//	v1 and v2 are identical everywhere except the region between
	//		p1 and p2.
	// It is possible to use any other relation of similarity instead. It
	// should not necessarily be reflexive.
	
	int state1=data.tables.layer1_to_state[line1];
	int state2=data.tables.layer1_to_state[line2];
	
	bool before_region=true;
	difference.region_begin=0;
	difference.region_end=m;
	for(int j=0; j<m; j++)
	{
		if(before_region &&
			data.final_automaton[state1].transitions[j]==
			data.final_automaton[state2].transitions[j])
		{
			difference.region_begin=j+1;
		}
		if(data.final_automaton[state1].transitions[j]!=
			data.final_automaton[state2].transitions[j])
		{
			before_region=false;
			difference.region_end=j+1;
		}
	}
	
//	cout << "[" << difference.region_begin << ", " << difference.region_end << "]\n";
	difference.identical=before_region;
	
	return difference;
}

void make_start_condition_to_dfa_state_table()
{
	for(int i=0; i<data.start_conditions.size(); i++)
	{
		int initial_state_in_uncompressed_dfa=data.start_conditions[i].offset_of_local_states_in_main_dfa;
		int initial_state_in_minimized_dfa=data.dfa_partition.state_to_group[initial_state_in_uncompressed_dfa];
		
		// they are goddamn incremented by one (dactylic pentameter)
		data.tables.start_condition_to_dfa_state_table.push_back(initial_state_in_minimized_dfa+1);
	}
}

void make_table_of_lookahead_states()
{
	cout << "make_table_of_lookahead_states()\n";
	
	TableMaker tm(data.tables.compressed_table_of_lookahead_states);
	for(int i=0; i<data.actions.size(); i++)
	{
		ActionData &action=data.actions[i];
		RecognizedExpressionData &re=data.recognized_expressions[action.recognized_expression_number];
		if(re.lookahead_length!=-1)
		{
			data.tables.offset_in_table_of_lookahead_states.push_back(0);
			data.tables.number_of_lookahead_states.push_back(0);
			continue;
		}
		
		vector<int> states;
		for(int j=0; j<data.final_automaton.size(); j++)
			if(data.final_automaton[j].actions_that_need_lookahead_here.count(i))
				states.push_back(j+1); // damn this increment!
		
		int offset=tm.add(states);
		
		data.tables.offset_in_table_of_lookahead_states.push_back(offset);
		data.tables.number_of_lookahead_states.push_back(states.size());
	}
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -