📄 codegen.cpp
字号:
#include "dolphin.h"
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include "codegen.h"
#include "tables.h"
#include "stl.h"
#include "utilities.h"
using namespace std;
using namespace Whale;
void generate_dolphin_class(FILE *a);
void generate_scalar_constants(FILE *a, char *indent, bool it_is_cpp);
void generate_get_token_function(FILE *a);
void generate_table_of_states(FILE *a);
void generate_table_of_lines(FILE *a);
void generate_table_of_symbol_classes(FILE *a);
void generate_table_of_lookahead_states(FILE *a);
void generate_table_of_actions(FILE *a);
void generate_table_of_initial_states(FILE *a);
void generate_whale_emulator(FILE *a, int mode);
void print_parametrized_string(FILE *a, char *s, char *parameters, char *values[]);
void generate_action(FILE *a, const char *indent, ActionData &action, bool break_after);
void generate_actions_for_special_expression(FILE *a, const char *indent, RecognizedExpressionData &re, bool break_after);
string expression_for_initial_state();
bool type_is_a_pointer(char *s);
bool type_should_be_printed_without_space(char *s);
//void insert_cpp_code_in_file(FILE *a, const char *indent, Terminal *code);
// the generated code will have all state and action numbers incremented by
// one - to make 0 rather than -1 an error marker.
void generate_code()
{
string h_file_name=data.file_name+string(".h");
string cpp_file_name=data.file_name+string(".cpp");
FILE *h_file=fopen(h_file_name.c_str(), "w");
FILE *cpp_file=fopen(cpp_file_name.c_str(), "w");
string inclusion_indicator="__LEXICAL_ANALYZER_GENERATED_BY_DOLPHIN__"+convert_file_name_to_identifier(h_file_name, 1);
fprintf(h_file, "%s", FIRST_LINE_FOR_GENERATED_FILES);
fprintf(cpp_file, "%s", FIRST_LINE_FOR_GENERATED_FILES);
/* .h */
if(data.variables.code_in_h_before_all)
fprintf(h_file, "\n%s\n", data.variables.code_in_h_before_all);
fprintf(h_file, "\n#ifndef %s\n", inclusion_indicator.c_str());
fprintf(h_file, "\n#define %s\n", inclusion_indicator.c_str());
if(!data.variables.input_stream_is_FILE_asterisk)
fprintf(h_file, "\n#include <iostream>\n");
else
fprintf(h_file, "\n#include <stdio.h>\n");
fprintf(h_file, "#include <vector>\n");
fprintf(h_file, "#include <typeinfo>\n");
if(data.variables.generate_sanity_checks)
fprintf(h_file, "#include <stdexcept>\n");
if(data.variables.store_lexeme_in_string)
fprintf(h_file, "#include <string>\n");
fprintf(h_file, "\nclass %s;\n", data.variables.dolphin_class_name);
if(data.variables.code_in_h)
fprintf(h_file, "\n%s\n", data.variables.code_in_h);
if(data.variables.using_whale)
{
if(data.variables.whale_emulation_mode)
generate_whale_emulator(h_file, 1);
else if(data.variables.allow_inclusion_cycle_between_whale_and_dolphin)
fprintf(h_file, "\n#include \x22%s\x22\n", data.variables.whale_file);
else
fprintf(h_file, "\nnamespace %s { class Terminal; }\n", data.variables.whale_namespace);
}
generate_dolphin_class(h_file);
if(data.variables.whale_emulation_mode)
generate_whale_emulator(h_file, 2);
if(data.variables.code_in_h_after_all)
fprintf(h_file, "\n%s\n", data.variables.code_in_h_after_all);
fprintf(h_file, "\n#endif\n");
fclose(h_file);
/* .cpp */
if(data.variables.code_in_cpp_before_all)
fprintf(cpp_file, "\n%s\n", data.variables.code_in_cpp_before_all);
fprintf(cpp_file, "\n");
if(data.variables.generate_arbitrary_lookahead_support)
fprintf(cpp_file, "#include <utility>\n"); // std::pair is used.
fprintf(cpp_file, "#include \x22%s\x22\n", h_file_name.c_str());
if(!data.variables.allow_inclusion_cycle_between_whale_and_dolphin)
fprintf(cpp_file, "#include \x22%s\x22\n", data.variables.whale_file);
fprintf(cpp_file, "using namespace std;\n");
fprintf(cpp_file, "\nconst char *%s::dolphin_copyright_notice=\n%s;\n",
data.variables.dolphin_class_name, COPYRIGHT_NOTICE_FOR_GENERATED_FILES);
if(data.variables.code_in_cpp)
fprintf(cpp_file, "\n%s\n", data.variables.code_in_cpp);
fprintf(cpp_file, "\n");
generate_scalar_constants(cpp_file, "", true);
fprintf(cpp_file, "\n");
generate_get_token_function(cpp_file);
fprintf(cpp_file, "\n");
fprintf(cpp_file, "void %s::clear_lexeme()\n", data.variables.dolphin_class_name);
fprintf(cpp_file, "{\n");
if(data.variables.store_lexeme_in_string)
fprintf(cpp_file, "\tfor(int i=0; i<lexeme.size(); i++)\n");
else
fprintf(cpp_file, "\tfor(int i=0; i<number_of_characters_in_lexeme; i++)\n");
fprintf(cpp_file, "\t\tinternal_position_counter(buffer[i]);\n");
fprintf(cpp_file, "\t\n");
if(data.variables.store_lexeme_in_string)
fprintf(cpp_file, "\tbuffer.erase(buffer.begin(), buffer.begin()+lexeme.size());\n");
else
fprintf(cpp_file, "\tbuffer.erase(buffer.begin(), buffer.begin()+number_of_characters_in_lexeme);\n");
fprintf(cpp_file, "\t\n");
if(data.variables.store_lexeme_in_string)
{
// fprintf(cpp_file, "\tlexeme.clear();\n");
fprintf(cpp_file, "\tlexeme.resize(0);\n");
}
else
{
fprintf(cpp_file, "\tif(lexeme)\n");
fprintf(cpp_file, "\t{\n");
fprintf(cpp_file, "\t\tdelete[] lexeme;\n");
fprintf(cpp_file, "\t\tlexeme=NULL;\n");
fprintf(cpp_file, "\t}\n");
}
fprintf(cpp_file, "}\n");
fprintf(cpp_file, "\n");
fprintf(cpp_file, "\nvoid %s::insert_characters(const char *s)\n", data.variables.dolphin_class_name);
fprintf(cpp_file, "{\n");
fprintf(cpp_file, "\tint i;\n");
fprintf(cpp_file, "\tfor(i=0; s[i]; i++);\n");
fprintf(cpp_file, "\tbuffer.insert(buffer.begin(), s, s+i);\n");
fprintf(cpp_file, "}\n");
if(data.variables.compress_tables)
generate_table_of_lines(cpp_file);
generate_table_of_states(cpp_file);
generate_table_of_symbol_classes(cpp_file);
if(data.variables.generate_arbitrary_lookahead_support)
generate_table_of_lookahead_states(cpp_file);
if(data.variables.generate_table_of_actions)
generate_table_of_actions(cpp_file);
if(data.variables.start_conditions_enabled)
generate_table_of_initial_states(cpp_file);
if(data.variables.code_in_cpp_after_all)
fprintf(cpp_file, "\n%s\n", data.variables.code_in_cpp_after_all);
fclose(cpp_file);
}
void generate_dolphin_class(FILE *a)
{
fprintf(a, "\nclass %s\n", data.variables.dolphin_class_name);
fprintf(a, "{\n");
fprintf(a, "\tstatic const char *dolphin_copyright_notice;\n");
fprintf(a, "\t\n" "public:\n");
if(data.variables.code_in_class_before_all)
fprintf(a, "\t%s\n\t\n", data.variables.code_in_class_before_all);
generate_scalar_constants(a, "\t", false);
if(data.variables.start_conditions_enabled)
{
fprintf(a, "\t\n"
"\tenum StartCondition { ");
for(int i=0; i<data.start_conditions.size(); i++)
{
if(i) fprintf(a, ", ");
fprintf(a, "%s", data.start_conditions[i].name);
if(!i) fprintf(a, "=0");
}
fprintf(a, " };\n");
}
fprintf(a, "\t\n" "protected:\n");
fprintf(a, "\tstruct StateData\n");
fprintf(a, "\t{\n");
if(data.variables.compress_tables)
{
if(data.variables.using_layer2)
{
assert(data.variables.table_compression_exception_width==1);
fprintf(a, "\t\tint exception_location; // -1 for none\n");
fprintf(a, "\t\t%s exception_data;\n", data.variables.analyzer_state_type);
}
fprintf(a, "\t\tconst %s *transitions;\n", data.variables.analyzer_state_type);
}
else
fprintf(a, "\t\t%s transitions[number_of_symbol_classes];\n", data.variables.analyzer_state_type);
fprintf(a, "\t\tint action_upon_accept;\n");
if(data.variables.generate_arbitrary_lookahead_support)
fprintf(a, "\t\tbool is_lookahead_state;\n");
if(data.variables.access_transitions_through_a_method)
{
fprintf(a, "\t\t\n");
fprintf(a, "\t\t%s access_transition(int input_symbol) const\n", data.variables.analyzer_state_type);
fprintf(a, "\t\t{\n");
fprintf(a, "\t\t\tif(input_symbol==exception_location)\n");
fprintf(a, "\t\t\t\treturn exception_data;\n");
fprintf(a, "\t\t\telse\n");
fprintf(a, "\t\t\t\treturn transitions[input_symbol];\n");
fprintf(a, "\t\t}\n");
}
fprintf(a, "\t};\n");
if(data.variables.generate_table_of_actions)
{
fprintf(a, "\tstruct ActionData\n");
fprintf(a, "\t{\n");
if(data.variables.generate_fixed_length_lookahead_support)
fprintf(a, "\t\tint lookahead_length;\n");
if(data.variables.generate_arbitrary_lookahead_support)
{
fprintf(a, "\t\tconst %s *lookahead_states;\n", data.variables.analyzer_state_type);
fprintf(a, "\t\tint number_of_lookahead_states;\n");
}
if(data.variables.generate_eof_lookahead_support)
fprintf(a, "\t\tint lookahead_eof; // -1 - if not eof, 0 - doesn't matter, 1 - if eof\n");
fprintf(a, "\t};\n");
}
fprintf(a, "\t\n");
fprintf(a, "\tstatic const int symbol_to_symbol_class[alphabet_cardinality];\n");
if(data.variables.compress_tables)
fprintf(a, "\tstatic const %s table_of_lines[size_of_table_of_lines];\n", data.variables.analyzer_state_type);
fprintf(a, "\tstatic const StateData states[number_of_dfa_states+1];\n");
if(data.variables.generate_arbitrary_lookahead_support)
fprintf(a, "\tstatic const %s lookahead_states[size_of_table_of_lookahead_states];\n", data.variables.analyzer_state_type);
if(data.variables.generate_table_of_actions)
fprintf(a, "\tstatic const ActionData actions[number_of_actions+1];\n");
if(data.variables.start_conditions_enabled)
fprintf(a, "\tstatic const %s initial_dfa_states_for_start_conditions[number_of_start_conditions];\n", data.variables.analyzer_state_type);
fprintf(a, "\t\n");
string stream_class_suffix=string(type_should_be_printed_without_space(data.variables.input_stream_class) ? "" : " ")+
string(type_is_a_pointer(data.variables.input_stream_class) ? "" : "&");
fprintf(a, "\t%s%sinput_stream;\n", data.variables.input_stream_class, stream_class_suffix.c_str());
fprintf(a, "\tstd::vector<%s> buffer;\n", data.variables.input_character_class);
if(data.variables.start_conditions_enabled)
fprintf(a, "\tStartCondition start_condition;\n");
fprintf(a, "\tbool eof_reached;\n");
if(data.variables.append_data_member)
fprintf(a, "\tbool append;\n");
fprintf(a, "\t\n");
if(data.variables.store_lexeme_in_string)
{
fprintf(a, "\t%s lexeme;\n", data.variables.internal_string_type);
}
else
{
fprintf(a, "\t%s *lexeme;\n", data.variables.internal_char_type);
fprintf(a, "\tint number_of_characters_in_lexeme;\n");
}
fprintf(a, "\tint current_line, current_column, current_offset;\n");
fprintf(a, "\tint tab_size;\n");
fprintf(a, "\t\n");
fprintf(a, "\tvoid clear_lexeme();\n");
fprintf(a, "\t\n");
if(data.variables.using_whale)
{
fprintf(a, "\ttemplate<class T> %s::Terminal *make_token()\n", data.variables.whale_namespace);
fprintf(a, "\t{\n");
if(data.variables.generate_verbose_prints)
fprintf(a, "\t\tstd::cout << \x22make_token<\x22 << typeid(T).name() << \x22>()\\n\x22;\n");
fprintf(a, "\t\t%s::Terminal *t=new T;\n", data.variables.whale_namespace);
fprintf(a, "\t\tt->line=line();\n");
fprintf(a, "\t\tt->column=column();\n");
fprintf(a, "\t\tt->text=capture_lexeme();\n");
fprintf(a, "\t\treturn t;\n");
fprintf(a, "\t}\n");
}
fprintf(a, "\tvoid internal_position_counter(%s c)\n", data.variables.internal_char_type);
fprintf(a, "\t{\n");
fprintf(a, "\t\tif(c=='\\n')\n");
fprintf(a, "\t\t{\n");
fprintf(a, "\t\t\tcurrent_line++;\n");
fprintf(a, "\t\t\tcurrent_column=1;\n");
fprintf(a, "\t\t}\n");
fprintf(a, "\t\telse if(c=='\\t')\n");
fprintf(a, "\t\t\tcurrent_column+=tab_size-(current_column-1)%%tab_size;\n");
fprintf(a, "\t\telse\n");
fprintf(a, "\t\t\tcurrent_column++;\n");
fprintf(a, "\t\t\n");
fprintf(a, "\t\tcurrent_offset++;\n");
fprintf(a, "\t}\n");
fprintf(a, "\t\n");
fprintf(a, "public:\n");
if(!data.variables.internal_char_type_is_char)
{
fprintf(a, "\ttemplate<class T> int basic_strlen(const T *s)\n");
fprintf(a, "\t{\n");
fprintf(a, "\t\tint i=0;\n");
fprintf(a, "\t\twhile(s[i++]);\n");
fprintf(a, "\t\treturn i;\n");
fprintf(a, "\t}\n");
fprintf(a, "\t\n\ttemplate<class T> T *basic_strdup(const T *s)\n");
fprintf(a, "\t{\n");
fprintf(a, "\t\tint l=basic_strlen<T>(s);\n");
fprintf(a, "\t\tT *result=new wchar_t[l+1];\n");
fprintf(a, "\t\tfor(int i=0; s[i]; i++)\n");
fprintf(a, "\t\t\tresult[i]=s[i];\n");
fprintf(a, "\t\tresult[l]=0;\n");
fprintf(a, "\t\treturn result;\n");
fprintf(a, "\t}\n");
}
if(data.variables.code_in_class)
fprintf(a, "\t\n\t%s\n", data.variables.code_in_class);
fprintf(a, "\t%s(%s%sstream_supplied) : input_stream(stream_supplied)\n",
data.variables.dolphin_class_name, data.variables.input_stream_class,
stream_class_suffix.c_str());
fprintf(a, "\t{\n");
if(data.variables.start_conditions_enabled)
fprintf(a, "\t\tstart_condition=%s;\n", data.start_conditions[0].name);
if(!data.variables.store_lexeme_in_string)
{
fprintf(a, "\t\tlexeme=NULL;\n");
fprintf(a, "\t\tnumber_of_characters_in_lexeme=0;\n");
}
fprintf(a, "\t\teof_reached=false;\n");
fprintf(a, "\t\tcurrent_line=1;\n");
fprintf(a, "\t\tcurrent_column=1;\n");
fprintf(a, "\t\tcurrent_offset=0;\n");
fprintf(a, "\t\tset_tab_size(8);\n");
if(data.variables.code_in_constructor)
fprintf(a, "\t\t\n\t\t%s\n", data.variables.code_in_constructor);
fprintf(a, "\t}\n");
if(data.variables.store_lexeme_in_string)
fprintf(a, "\t~%s() { }\n", data.variables.dolphin_class_name);
else
fprintf(a, "\t~%s() { if(lexeme) delete[] lexeme; }\n", data.variables.dolphin_class_name);
if(data.variables.start_conditions_enabled)
{
fprintf(a, "\tvoid set_start_condition(StartCondition new_start_condition) { start_condition=new_start_condition; }\n");
fprintf(a, "\tStartCondition get_start_condition() const { return start_condition; }\n");
}
fprintf(a, "\t%s%sget_token(%s);\n", data.variables.get_token_function_return_value,
(type_should_be_printed_without_space(data.variables.get_token_function_return_value) ? "" : " "),
data.variables.get_token_function_parameters);
if(data.variables.store_lexeme_in_string)
{
fprintf(a, "\tconst %s *get_lexeme() const { return lexeme.c_str(); }\n", data.variables.internal_char_type);
fprintf(a, "\tconst %s &get_lexeme_str() const { return lexeme; }\n", data.variables.internal_string_type);
if(data.variables.internal_char_type_is_char)
fprintf(a, "\t%s *capture_lexeme() { char *s=strdup(lexeme.c_str()); clear_lexeme(); return s; }\n", data.variables.internal_char_type);
else
fprintf(a, "\t%s *capture_lexeme() { %s *s=basic_strdup<%s>(lexeme.c_str()); clear_lexeme(); return s; }\n", data.variables.internal_char_type, data.variables.internal_char_type, data.variables.internal_char_type);
fprintf(a, "\tint get_lexeme_length() const { return lexeme.size(); }\n");
}
else
{
fprintf(a, "\tconst %s *get_lexeme() const { return lexeme; }\n", data.variables.internal_char_type);
fprintf(a, "\t%s *capture_lexeme() { %s *s=lexeme; lexeme=NULL; clear_lexeme(); return s; }\n", data.variables.internal_char_type, data.variables.internal_char_type);
fprintf(a, "\tint get_lexeme_length() const { return number_of_characters_in_lexeme; }\n");
}
fprintf(a, "\tint line() const { return current_line; }\n");
fprintf(a, "\tint column() const { return current_column; }\n");
fprintf(a, "\tint offset() const { return current_offset; }\n");
fprintf(a, "\tvoid set_tab_size(int n) { tab_size=n; }\n");
fprintf(a, "\tint get_tab_size() const { return tab_size; }\n");
fprintf(a, "\tvoid insert_characters(const char *);\n");
fprintf(a, "\tconst char *copyright_notice() { return dolphin_copyright_notice; }\n");
if(data.variables.code_in_class_after_all)
fprintf(a, "\t\n\t%s\n", data.variables.code_in_class_after_all);
fprintf(a, "};\n");
}
template<class T> void generate_single_scalar_constant(FILE *a, char *indent, char *type, char *id, T value, char *format, char *class_name, bool it_is_cpp)
{
if(!it_is_cpp)
{
string s=string("%sstatic const %s %s=")+string(format)+string(";\n");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -