⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 codegen.cpp

📁 Full support for extended regular expressions (those with intersection and complement); Support for
💻 CPP
📖 第 1 页 / 共 3 页
字号:

#include "dolphin.h"

#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include "codegen.h"
#include "tables.h"
#include "stl.h"
#include "utilities.h"
using namespace std;
using namespace Whale;

void generate_dolphin_class(FILE *a);
void generate_scalar_constants(FILE *a, char *indent, bool it_is_cpp);
void generate_get_token_function(FILE *a);
void generate_table_of_states(FILE *a);
void generate_table_of_lines(FILE *a);
void generate_table_of_symbol_classes(FILE *a);
void generate_table_of_lookahead_states(FILE *a);
void generate_table_of_actions(FILE *a);
void generate_table_of_initial_states(FILE *a);
void generate_whale_emulator(FILE *a, int mode);
void print_parametrized_string(FILE *a, char *s, char *parameters, char *values[]);
void generate_action(FILE *a, const char *indent, ActionData &action, bool break_after);
void generate_actions_for_special_expression(FILE *a, const char *indent, RecognizedExpressionData &re, bool break_after);
string expression_for_initial_state();
bool type_is_a_pointer(char *s);
bool type_should_be_printed_without_space(char *s);
//void insert_cpp_code_in_file(FILE *a, const char *indent, Terminal *code);

// the generated code will have all state and action numbers incremented by
// one - to make 0 rather than -1 an error marker.

void generate_code()
{
	string h_file_name=data.file_name+string(".h");
	string cpp_file_name=data.file_name+string(".cpp");
	FILE *h_file=fopen(h_file_name.c_str(), "w");
	FILE *cpp_file=fopen(cpp_file_name.c_str(), "w");
	
	string inclusion_indicator="__LEXICAL_ANALYZER_GENERATED_BY_DOLPHIN__"+convert_file_name_to_identifier(h_file_name, 1);
	
	fprintf(h_file, "%s", FIRST_LINE_FOR_GENERATED_FILES);
	fprintf(cpp_file, "%s", FIRST_LINE_FOR_GENERATED_FILES);
	
	
	/* .h */
	
	if(data.variables.code_in_h_before_all)
		fprintf(h_file, "\n%s\n", data.variables.code_in_h_before_all);
	
	fprintf(h_file, "\n#ifndef %s\n", inclusion_indicator.c_str());
	fprintf(h_file, "\n#define %s\n", inclusion_indicator.c_str());
	
	if(!data.variables.input_stream_is_FILE_asterisk)
		fprintf(h_file, "\n#include <iostream>\n");
	else
		fprintf(h_file, "\n#include <stdio.h>\n");
	fprintf(h_file, "#include <vector>\n");
	fprintf(h_file, "#include <typeinfo>\n");
	if(data.variables.generate_sanity_checks)
		fprintf(h_file, "#include <stdexcept>\n");
	if(data.variables.store_lexeme_in_string)
		fprintf(h_file, "#include <string>\n");
	
	fprintf(h_file, "\nclass %s;\n", data.variables.dolphin_class_name);
	
	if(data.variables.code_in_h)
		fprintf(h_file, "\n%s\n", data.variables.code_in_h);
	
	if(data.variables.using_whale)
	{
		if(data.variables.whale_emulation_mode)
			generate_whale_emulator(h_file, 1);
		else if(data.variables.allow_inclusion_cycle_between_whale_and_dolphin)
			fprintf(h_file, "\n#include \x22%s\x22\n", data.variables.whale_file);
		else
			fprintf(h_file, "\nnamespace %s { class Terminal; }\n", data.variables.whale_namespace);
	}
	
	generate_dolphin_class(h_file);
	
	if(data.variables.whale_emulation_mode)
		generate_whale_emulator(h_file, 2);
	
	if(data.variables.code_in_h_after_all)
		fprintf(h_file, "\n%s\n", data.variables.code_in_h_after_all);
	
	fprintf(h_file, "\n#endif\n");
	fclose(h_file);
	
	
	/* .cpp */
	
	if(data.variables.code_in_cpp_before_all)
		fprintf(cpp_file, "\n%s\n", data.variables.code_in_cpp_before_all);
	
	fprintf(cpp_file, "\n");
	if(data.variables.generate_arbitrary_lookahead_support)
		fprintf(cpp_file, "#include <utility>\n"); // std::pair is used.
	fprintf(cpp_file, "#include \x22%s\x22\n", h_file_name.c_str());
	if(!data.variables.allow_inclusion_cycle_between_whale_and_dolphin)
		fprintf(cpp_file, "#include \x22%s\x22\n", data.variables.whale_file);
	fprintf(cpp_file, "using namespace std;\n");
	
	fprintf(cpp_file, "\nconst char *%s::dolphin_copyright_notice=\n%s;\n",
		data.variables.dolphin_class_name, COPYRIGHT_NOTICE_FOR_GENERATED_FILES);
	
	if(data.variables.code_in_cpp)
		fprintf(cpp_file, "\n%s\n", data.variables.code_in_cpp);
	
	fprintf(cpp_file, "\n");
	generate_scalar_constants(cpp_file, "", true);
	
	fprintf(cpp_file, "\n");
	generate_get_token_function(cpp_file);
	
	fprintf(cpp_file, "\n");
	fprintf(cpp_file, "void %s::clear_lexeme()\n", data.variables.dolphin_class_name);
	fprintf(cpp_file, "{\n");
	if(data.variables.store_lexeme_in_string)
		fprintf(cpp_file, "\tfor(int i=0; i<lexeme.size(); i++)\n");
	else
		fprintf(cpp_file, "\tfor(int i=0; i<number_of_characters_in_lexeme; i++)\n");
	fprintf(cpp_file, "\t\tinternal_position_counter(buffer[i]);\n");
	fprintf(cpp_file, "\t\n");
	if(data.variables.store_lexeme_in_string)
		fprintf(cpp_file, "\tbuffer.erase(buffer.begin(), buffer.begin()+lexeme.size());\n");
	else
		fprintf(cpp_file, "\tbuffer.erase(buffer.begin(), buffer.begin()+number_of_characters_in_lexeme);\n");
	fprintf(cpp_file, "\t\n");
	if(data.variables.store_lexeme_in_string)
	{
	//	fprintf(cpp_file, "\tlexeme.clear();\n");
		fprintf(cpp_file, "\tlexeme.resize(0);\n");
	}
	else
	{
		fprintf(cpp_file, "\tif(lexeme)\n");
		fprintf(cpp_file, "\t{\n");
		fprintf(cpp_file, "\t\tdelete[] lexeme;\n");
		fprintf(cpp_file, "\t\tlexeme=NULL;\n");
		fprintf(cpp_file, "\t}\n");
	}
	fprintf(cpp_file, "}\n");
	
	fprintf(cpp_file, "\n");
	fprintf(cpp_file, "\nvoid %s::insert_characters(const char *s)\n", data.variables.dolphin_class_name);
	fprintf(cpp_file, "{\n");
	fprintf(cpp_file, "\tint i;\n");
	fprintf(cpp_file, "\tfor(i=0; s[i]; i++);\n");
	fprintf(cpp_file, "\tbuffer.insert(buffer.begin(), s, s+i);\n");
	fprintf(cpp_file, "}\n");
	
	if(data.variables.compress_tables)
		generate_table_of_lines(cpp_file);
	generate_table_of_states(cpp_file);
	generate_table_of_symbol_classes(cpp_file);
	if(data.variables.generate_arbitrary_lookahead_support)
		generate_table_of_lookahead_states(cpp_file);
	if(data.variables.generate_table_of_actions)
		generate_table_of_actions(cpp_file);
	if(data.variables.start_conditions_enabled)
		generate_table_of_initial_states(cpp_file);
	
	if(data.variables.code_in_cpp_after_all)
		fprintf(cpp_file, "\n%s\n", data.variables.code_in_cpp_after_all);
	
	fclose(cpp_file);
}

void generate_dolphin_class(FILE *a)
{
	fprintf(a, "\nclass %s\n", data.variables.dolphin_class_name);
	fprintf(a, "{\n");
	fprintf(a, "\tstatic const char *dolphin_copyright_notice;\n");
	
	fprintf(a, "\t\n" "public:\n");
	
	if(data.variables.code_in_class_before_all)
		fprintf(a, "\t%s\n\t\n", data.variables.code_in_class_before_all);
	
	generate_scalar_constants(a, "\t", false);
	
	if(data.variables.start_conditions_enabled)
	{
		fprintf(a, "\t\n"
			"\tenum StartCondition { ");
		for(int i=0; i<data.start_conditions.size(); i++)
		{
			if(i) fprintf(a, ", ");
			fprintf(a, "%s", data.start_conditions[i].name);
			if(!i) fprintf(a, "=0");
		}
		fprintf(a, " };\n");
	}
	
	fprintf(a, "\t\n" "protected:\n");
	
	fprintf(a, "\tstruct StateData\n");
	fprintf(a, "\t{\n");
	if(data.variables.compress_tables)
	{
		if(data.variables.using_layer2)
		{
			assert(data.variables.table_compression_exception_width==1);
			fprintf(a, "\t\tint exception_location; // -1 for none\n");
			fprintf(a, "\t\t%s exception_data;\n", data.variables.analyzer_state_type);
		}
		
		fprintf(a, "\t\tconst %s *transitions;\n", data.variables.analyzer_state_type);
	}
	else
		fprintf(a, "\t\t%s transitions[number_of_symbol_classes];\n", data.variables.analyzer_state_type);
	fprintf(a, "\t\tint action_upon_accept;\n");
	if(data.variables.generate_arbitrary_lookahead_support)
		fprintf(a, "\t\tbool is_lookahead_state;\n");
	if(data.variables.access_transitions_through_a_method)
	{
		fprintf(a, "\t\t\n");
		fprintf(a, "\t\t%s access_transition(int input_symbol) const\n", data.variables.analyzer_state_type);
		fprintf(a, "\t\t{\n");
		fprintf(a, "\t\t\tif(input_symbol==exception_location)\n");
		fprintf(a, "\t\t\t\treturn exception_data;\n");
		fprintf(a, "\t\t\telse\n");
		fprintf(a, "\t\t\t\treturn transitions[input_symbol];\n");
		fprintf(a, "\t\t}\n");
	}
	fprintf(a, "\t};\n");
	
	if(data.variables.generate_table_of_actions)
	{
		fprintf(a, "\tstruct ActionData\n");
		fprintf(a, "\t{\n");
		if(data.variables.generate_fixed_length_lookahead_support)
			fprintf(a, "\t\tint lookahead_length;\n");
		if(data.variables.generate_arbitrary_lookahead_support)
		{
			fprintf(a, "\t\tconst %s *lookahead_states;\n", data.variables.analyzer_state_type);
			fprintf(a, "\t\tint number_of_lookahead_states;\n");
		}
		if(data.variables.generate_eof_lookahead_support)
			fprintf(a, "\t\tint lookahead_eof; // -1 - if not eof, 0 - doesn't matter, 1 - if eof\n");
		fprintf(a, "\t};\n");
	}
	
	fprintf(a, "\t\n");
	fprintf(a, "\tstatic const int symbol_to_symbol_class[alphabet_cardinality];\n");
	if(data.variables.compress_tables)
		fprintf(a, "\tstatic const %s table_of_lines[size_of_table_of_lines];\n", data.variables.analyzer_state_type);
	fprintf(a, "\tstatic const StateData states[number_of_dfa_states+1];\n");
	if(data.variables.generate_arbitrary_lookahead_support)
		fprintf(a, "\tstatic const %s lookahead_states[size_of_table_of_lookahead_states];\n", data.variables.analyzer_state_type);
	if(data.variables.generate_table_of_actions)
		fprintf(a, "\tstatic const ActionData actions[number_of_actions+1];\n");
	if(data.variables.start_conditions_enabled)
		fprintf(a, "\tstatic const %s initial_dfa_states_for_start_conditions[number_of_start_conditions];\n", data.variables.analyzer_state_type);
	fprintf(a, "\t\n");
	
	string stream_class_suffix=string(type_should_be_printed_without_space(data.variables.input_stream_class) ? "" : " ")+
		string(type_is_a_pointer(data.variables.input_stream_class) ? "" : "&");
	
	fprintf(a, "\t%s%sinput_stream;\n", data.variables.input_stream_class, stream_class_suffix.c_str());
	fprintf(a, "\tstd::vector<%s> buffer;\n", data.variables.input_character_class);
	if(data.variables.start_conditions_enabled)
		fprintf(a, "\tStartCondition start_condition;\n");
	fprintf(a, "\tbool eof_reached;\n");
	if(data.variables.append_data_member)
		fprintf(a, "\tbool append;\n");
	
	fprintf(a, "\t\n");
	if(data.variables.store_lexeme_in_string)
	{
		fprintf(a, "\t%s lexeme;\n", data.variables.internal_string_type);
	}
	else
	{
		fprintf(a, "\t%s *lexeme;\n", data.variables.internal_char_type);
		fprintf(a, "\tint number_of_characters_in_lexeme;\n");
	}
	fprintf(a, "\tint current_line, current_column, current_offset;\n");
	fprintf(a, "\tint tab_size;\n");

	fprintf(a, "\t\n");
	fprintf(a, "\tvoid clear_lexeme();\n");
	
	fprintf(a, "\t\n");
	if(data.variables.using_whale)
	{
		fprintf(a, "\ttemplate<class T> %s::Terminal *make_token()\n", data.variables.whale_namespace);
		fprintf(a, "\t{\n");
		if(data.variables.generate_verbose_prints)
			fprintf(a, "\t\tstd::cout << \x22make_token<\x22 << typeid(T).name() << \x22>()\\n\x22;\n");
		fprintf(a, "\t\t%s::Terminal *t=new T;\n", data.variables.whale_namespace);
		fprintf(a, "\t\tt->line=line();\n");
		fprintf(a, "\t\tt->column=column();\n");
		fprintf(a, "\t\tt->text=capture_lexeme();\n");
		fprintf(a, "\t\treturn t;\n");
		fprintf(a, "\t}\n");
	}
	fprintf(a, "\tvoid internal_position_counter(%s c)\n", data.variables.internal_char_type);
	fprintf(a, "\t{\n");
	fprintf(a, "\t\tif(c=='\\n')\n");
	fprintf(a, "\t\t{\n");
	fprintf(a, "\t\t\tcurrent_line++;\n");
	fprintf(a, "\t\t\tcurrent_column=1;\n");
	fprintf(a, "\t\t}\n");
	fprintf(a, "\t\telse if(c=='\\t')\n");
	fprintf(a, "\t\t\tcurrent_column+=tab_size-(current_column-1)%%tab_size;\n");
	fprintf(a, "\t\telse\n");
	fprintf(a, "\t\t\tcurrent_column++;\n");
	fprintf(a, "\t\t\n");
	fprintf(a, "\t\tcurrent_offset++;\n");
	fprintf(a, "\t}\n");
	fprintf(a, "\t\n");
	
	fprintf(a, "public:\n");
	
	if(!data.variables.internal_char_type_is_char)
	{
		fprintf(a, "\ttemplate<class T> int basic_strlen(const T *s)\n");
		fprintf(a, "\t{\n");
		fprintf(a, "\t\tint i=0;\n");
		fprintf(a, "\t\twhile(s[i++]);\n");
		fprintf(a, "\t\treturn i;\n");
		fprintf(a, "\t}\n");
		
		fprintf(a, "\t\n\ttemplate<class T> T *basic_strdup(const T *s)\n");
		fprintf(a, "\t{\n");
		fprintf(a, "\t\tint l=basic_strlen<T>(s);\n");
		fprintf(a, "\t\tT *result=new wchar_t[l+1];\n");
		fprintf(a, "\t\tfor(int i=0; s[i]; i++)\n");
		fprintf(a, "\t\t\tresult[i]=s[i];\n");
		fprintf(a, "\t\tresult[l]=0;\n");
		fprintf(a, "\t\treturn result;\n");
		fprintf(a, "\t}\n");
	}
	
	if(data.variables.code_in_class)
		fprintf(a, "\t\n\t%s\n", data.variables.code_in_class);
	
	fprintf(a, "\t%s(%s%sstream_supplied) : input_stream(stream_supplied)\n",
		data.variables.dolphin_class_name, data.variables.input_stream_class,
		stream_class_suffix.c_str());
	fprintf(a, "\t{\n");
	if(data.variables.start_conditions_enabled)
		fprintf(a, "\t\tstart_condition=%s;\n", data.start_conditions[0].name);
	if(!data.variables.store_lexeme_in_string)
	{
		fprintf(a, "\t\tlexeme=NULL;\n");
		fprintf(a, "\t\tnumber_of_characters_in_lexeme=0;\n");
	}
	fprintf(a, "\t\teof_reached=false;\n");
	fprintf(a, "\t\tcurrent_line=1;\n");
	fprintf(a, "\t\tcurrent_column=1;\n");
	fprintf(a, "\t\tcurrent_offset=0;\n");
	fprintf(a, "\t\tset_tab_size(8);\n");
	if(data.variables.code_in_constructor)
		fprintf(a, "\t\t\n\t\t%s\n", data.variables.code_in_constructor);
	fprintf(a, "\t}\n");
	if(data.variables.store_lexeme_in_string)
		fprintf(a, "\t~%s() { }\n", data.variables.dolphin_class_name);
	else
		fprintf(a, "\t~%s() { if(lexeme) delete[] lexeme; }\n", data.variables.dolphin_class_name);
	if(data.variables.start_conditions_enabled)
	{
		fprintf(a, "\tvoid set_start_condition(StartCondition new_start_condition) { start_condition=new_start_condition; }\n");
		fprintf(a, "\tStartCondition get_start_condition() const { return start_condition; }\n");
	}
	fprintf(a, "\t%s%sget_token(%s);\n", data.variables.get_token_function_return_value,
		(type_should_be_printed_without_space(data.variables.get_token_function_return_value) ? "" : " "),
		data.variables.get_token_function_parameters);
	if(data.variables.store_lexeme_in_string)
	{
		fprintf(a, "\tconst %s *get_lexeme() const { return lexeme.c_str(); }\n", data.variables.internal_char_type);
		fprintf(a, "\tconst %s &get_lexeme_str() const { return lexeme; }\n", data.variables.internal_string_type);
		if(data.variables.internal_char_type_is_char)
			fprintf(a, "\t%s *capture_lexeme() { char *s=strdup(lexeme.c_str()); clear_lexeme(); return s; }\n", data.variables.internal_char_type);
		else
			fprintf(a, "\t%s *capture_lexeme() { %s *s=basic_strdup<%s>(lexeme.c_str()); clear_lexeme(); return s; }\n", data.variables.internal_char_type, data.variables.internal_char_type, data.variables.internal_char_type);
		fprintf(a, "\tint get_lexeme_length() const { return lexeme.size(); }\n");
	}
	else
	{
		fprintf(a, "\tconst %s *get_lexeme() const { return lexeme; }\n", data.variables.internal_char_type);
		fprintf(a, "\t%s *capture_lexeme() { %s *s=lexeme; lexeme=NULL; clear_lexeme(); return s; }\n", data.variables.internal_char_type, data.variables.internal_char_type);
		fprintf(a, "\tint get_lexeme_length() const { return number_of_characters_in_lexeme; }\n");
	}
	fprintf(a, "\tint line() const { return current_line; }\n");
	fprintf(a, "\tint column() const { return current_column; }\n");
	fprintf(a, "\tint offset() const { return current_offset; }\n");
	fprintf(a, "\tvoid set_tab_size(int n) { tab_size=n; }\n");
	fprintf(a, "\tint get_tab_size() const { return tab_size; }\n");
	fprintf(a, "\tvoid insert_characters(const char *);\n");
	fprintf(a, "\tconst char *copyright_notice() { return dolphin_copyright_notice; }\n");
	
	if(data.variables.code_in_class_after_all)
		fprintf(a, "\t\n\t%s\n", data.variables.code_in_class_after_all);
	
	fprintf(a, "};\n");
}

template<class T> void generate_single_scalar_constant(FILE *a, char *indent, char *type, char *id, T value, char *format, char *class_name, bool it_is_cpp)
{
	if(!it_is_cpp)
	{
		string s=string("%sstatic const %s %s=")+string(format)+string(";\n");

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -