📄 codegen.cpp
字号:
fprintf(a, s.c_str(), indent, type, id, value);
}
else
fprintf(a, "%sconst %s %s::%s;\n", indent, type, class_name, id);
}
void generate_scalar_constants(FILE *a, char *indent, bool it_is_cpp)
{
// if(data.first_terminal)
// generate_single_scalar_constant(a, indent, "int", "first_terminal_symbol", data.first_terminal, "%u", data.variables.dolphin_class_name, it_is_cpp);
generate_single_scalar_constant(a, indent, "int", "alphabet_cardinality", data.variables.alphabet_cardinality, "%u", data.variables.dolphin_class_name, it_is_cpp);
generate_single_scalar_constant(a, indent, "int", "number_of_symbol_classes", data.number_of_symbol_classes, "%u", data.variables.dolphin_class_name, it_is_cpp);
if(data.variables.start_conditions_enabled)
generate_single_scalar_constant(a, indent, "int", "number_of_start_conditions", data.start_conditions.size(), "%u", data.variables.dolphin_class_name, it_is_cpp);
generate_single_scalar_constant(a, indent, "int", "number_of_dfa_states", data.final_automaton.size(), "%u", data.variables.dolphin_class_name, it_is_cpp);
/* all state numbers are incremented by one */
if(!data.variables.start_conditions_enabled)
generate_single_scalar_constant(a, indent, data.variables.analyzer_state_type, "initial_dfa_state", data.dfa_partition.state_to_group[0]+1, "%u", data.variables.dolphin_class_name, it_is_cpp);
if(data.variables.compress_tables)
generate_single_scalar_constant(a, indent, "int", "size_of_table_of_lines", data.tables.compressed_table_of_lines.size(), "%u", data.variables.dolphin_class_name, it_is_cpp);
generate_single_scalar_constant(a, indent, "int", "number_of_actions", data.actions.size(), "%u", data.variables.dolphin_class_name, it_is_cpp);
if(data.variables.generate_arbitrary_lookahead_support)
generate_single_scalar_constant(a, indent, "int", "size_of_table_of_lookahead_states", data.tables.compressed_table_of_lookahead_states.size(), "%u", data.variables.dolphin_class_name, it_is_cpp);
}
void generate_get_token_function(FILE *a)
{
fprintf(a, "%s%s%s::get_token(%s)\n", data.variables.get_token_function_return_value,
(type_should_be_printed_without_space(data.variables.get_token_function_return_value) ? "" : " "),
data.variables.dolphin_class_name,
data.variables.get_token_function_parameters);
fprintf(a, "{\n");
if(data.variables.generate_verbose_prints)
{
fprintf(a, "\tcout << \x22" "DolphinLexicalAnalyzer::get_token()\\n\x22;\n");
fprintf(a, "\t\n");
}
if(data.variables.store_lexeme_in_string)
fprintf(a, "\tif(lexeme.size())\n");
else
fprintf(a, "\tif(lexeme)\n");
fprintf(a, "\t\tclear_lexeme();\n");
fprintf(a, "\t\n");
fprintf(a, "\t%s state=%s;\n", data.variables.analyzer_state_type, expression_for_initial_state().c_str());
if(data.variables.append_data_member)
{
fprintf(a, "\tint start_pos=0, accepting_pos=0, action_to_call=0;\n");
fprintf(a, "\tappend=false;\n");
}
else
fprintf(a, "\tint accepting_pos=0, action_to_call=0;\n");
if(data.variables.generate_arbitrary_lookahead_support)
fprintf(a, "\tvector<pair<int, int> > possible_lookahead_positions_and_states;\n");
fprintf(a, "\t\n");
fprintf(a, "\tfor(int pos=%s;; pos++)\n", (data.variables.append_data_member ? "start_pos" : "0"));
fprintf(a, "\t{\n");
fprintf(a, "\t\tbool eof_reached_right_now=false;\n");
fprintf(a, "\t\t\n");
fprintf(a, "\t\tint recognized_action=states[state].action_upon_accept;\n");
if(data.variables.generate_arbitrary_lookahead_support)
{
fprintf(a, "\t\t\n");
fprintf(a, "\t\tif(states[state].is_lookahead_state)\n");
fprintf(a, "\t\t\tpossible_lookahead_positions_and_states.push_back(make_pair(pos, state));\n");
}
fprintf(a, "\t\t\n");
fprintf(a, "\t\tif(buffer.size()==pos)\n");
fprintf(a, "\t\t{\n");
fprintf(a, "\t\t\tif(eof_reached)\n");
fprintf(a, "\t\t\t\teof_reached_right_now=true;\n");
fprintf(a, "\t\t\telse\n");
fprintf(a, "\t\t\t{\n");
fprintf(a, "\t\t\t\t%s c;\n", data.variables.input_character_class);
fprintf(a, "\t\t\t\t");
char *gcfs_parameter_values[]={"c", "input_stream"};
print_parametrized_string(a, data.variables.how_to_get_character_from_stream, "cs", gcfs_parameter_values);
fprintf(a, ";\n");
fprintf(a, "\t\t\t\t\n");
fprintf(a, "\t\t\t\tif(");
char *ceof_parameter_values[]={"input_stream"};
print_parametrized_string(a, data.variables.how_to_check_eof, "s", ceof_parameter_values);
fprintf(a, ")\n");
fprintf(a, "\t\t\t\t{\n");
fprintf(a, "\t\t\t\t\teof_reached=true;\n");
fprintf(a, "\t\t\t\t\teof_reached_right_now=true;\n");
fprintf(a, "\t\t\t\t}\n");
fprintf(a, "\t\t\t\telse\n");
fprintf(a, "\t\t\t\t{\n");
if(data.variables.generate_verbose_prints)
fprintf(a, "\t\t\t\t\tcout << \x22read symbol \x22 << (unsigned int)c << \x22\\n\x22;\n");
fprintf(a, "\t\t\t\t\tbuffer.push_back(c);\n");
fprintf(a, "\t\t\t\t}\n");
fprintf(a, "\t\t\t}\n");
fprintf(a, "\t\t}\n");
fprintf(a, "\t\t\n");
fprintf(a, "\t\tif(eof_reached_right_now)\n");
fprintf(a, "\t\t{\n");
if(data.variables.append_data_member)
fprintf(a, "\t\t\tif(pos==start_pos)\n");
else
fprintf(a, "\t\t\tif(pos==0)\n");
fprintf(a, "\t\t\t{\n");
if(!data.variables.store_lexeme_in_string)
{
const char *indent=(data.variables.append_data_member ? "\t\t\t\t\t" : "\t\t\t\t");
if(data.variables.append_data_member)
{
fprintf(a, "\t\t\t\tif(lexeme==NULL)\n");
fprintf(a, "\t\t\t\t{\n");
}
fprintf(a, "%snumber_of_characters_in_lexeme=0;\n", indent);
fprintf(a, "%slexeme=new %s[1];\n", indent, data.variables.internal_char_type);
fprintf(a, "%slexeme[0]=0;\n", indent);
if(data.variables.append_data_member)
fprintf(a, "\t\t\t\t}\n");
fprintf(a, "\t\t\t\t\n");
}
if(data.recognized_expression_search.count("eof"))
{
RecognizedExpressionData &re=data.recognized_expressions[data.recognized_expression_search["eof"]];
generate_actions_for_special_expression(a, "\t\t\t\t", re, false);
}
else if(data.variables.using_whale)
fprintf(a, "\t\t\t\treturn make_token<%s::TerminalEOF>();\n", data.variables.whale_namespace);
else
fprintf(a, "\t\t\t\treturn 0;\n");
fprintf(a, "\t\t\t}\n");
fprintf(a, "\t\t}\n");
fprintf(a, "\t\telse\n");
fprintf(a, "\t\t{\n");
if(data.variables.unicode)
fprintf(a, "\t\t\tunsigned int c=(unsigned wchar_t)");
else
fprintf(a, "\t\t\tunsigned int c=(unsigned char)");
char *gac_parameter_values[]={"buffer[pos]"};
print_parametrized_string(a, data.variables.how_to_get_actual_character, "c", gac_parameter_values);
fprintf(a, ";\n");
if(data.variables.generate_verbose_prints)
fprintf(a, "\t\t\tcout << \x22transition from \x22 << int(state) << \x22 to \x22;\n");
if(data.variables.access_transitions_through_a_method)
fprintf(a, "\t\t\tstate=states[state].access_transition(symbol_to_symbol_class[c]);\n");
else
fprintf(a, "\t\t\tstate=states[state].transitions[symbol_to_symbol_class[c]];\n");
if(data.variables.generate_verbose_prints)
fprintf(a, "\t\t\tcout << int(state) << \x22 on \x22 << c << \x22\\n\x22;\n");
fprintf(a, "\t\t}\n");
fprintf(a, "\t\t\n");
if(data.variables.generate_eof_lookahead_support)
{
fprintf(a, "\t\tint lookahead_eof=actions[recognized_action].lookahead_eof;\n");
fprintf(a, "\t\t\n");
fprintf(a, "\t\tif(recognized_action && (lookahead_eof==0 ||\n");
fprintf(a, "\t\t\t(lookahead_eof==-1 && !eof_reached_right_now) ||\n");
fprintf(a, "\t\t\t(lookahead_eof==1 && eof_reached_right_now)))\n");
}
else
fprintf(a, "\t\tif(recognized_action)\n");
fprintf(a, "\t\t{\n");
if(data.variables.generate_verbose_prints)
fprintf(a, "\t\t\tcout << \x22pos \x22 << pos << \x22: recognized action \x22 << recognized_action << \x22\\n\x22;\n");
fprintf(a, "\t\t\taccepting_pos=pos;\n");
fprintf(a, "\t\t\taction_to_call=recognized_action;\n");
fprintf(a, "\t\t}\n");
fprintf(a, "\t\t\n");
fprintf(a, "\t\tif(!state || eof_reached_right_now)\n");
fprintf(a, "\t\t{\n");
if(data.variables.eat_one_character_upon_lexical_error)
{
fprintf(a, "\t\t\tif(action_to_call==0)\t// if it is a lexical error,\n");
if(data.variables.append_data_member)
fprintf(a, "\t\t\t\taccepting_pos=start_pos+1; // then eat one character.\n");
else
fprintf(a, "\t\t\t\taccepting_pos=1; // then eat one character.\n");
fprintf(a, "\t\t\t\n");
}
else
fprintf(a, "\t\t\t// if there is no action to call, then accepting_pos=0\n");
if(data.variables.generate_fixed_length_lookahead_support ||
data.variables.generate_arbitrary_lookahead_support)
{
fprintf(a, "\t\t\tif(action_to_call>0)\n");
fprintf(a, "\t\t\t{\n");
fprintf(a, "\t\t\t\tconst ActionData &action=actions[action_to_call];\n");
fprintf(a, "\t\t\t\t\n");
if(data.variables.generate_fixed_length_lookahead_support)
{
fprintf(a, "\t\t\t\tif(action.lookahead_length>0)\n");
fprintf(a, "\t\t\t\t\taccepting_pos-=action.lookahead_length;\n");
}
if(data.variables.generate_arbitrary_lookahead_support)
{
fprintf(a, "\t\t\t\t%sif(action.lookahead_states)\n",
(data.variables.generate_fixed_length_lookahead_support ? "else " : ""));
fprintf(a, "\t\t\t\t{\n");
if(data.variables.generate_verbose_prints)
{
fprintf(a, "\t\t\t\t\tcout << \x22Processing lookahead: state=\x22 << int(state) << \x22, action=\x22 << action_to_call << \x22, accepting_pos=\x22 << accepting_pos << \x22, pos=\x22 << pos << \x22.\\n\x22;\n");
fprintf(a, "\t\t\t\t\t\n");
}
fprintf(a, "\t\t\t\t\tint result=-1;\n");
fprintf(a, "\t\t\t\t\t\n");
fprintf(a, "\t\t\t\t\tfor(int i=possible_lookahead_positions_and_states.size()-1; i>=0; i++)\n");
fprintf(a, "\t\t\t\t\t{\n");
fprintf(a, "\t\t\t\t\t\tpair<int, int> p=possible_lookahead_positions_and_states[i];\n");
if(data.variables.generate_verbose_prints)
fprintf(a, "\t\t\t\t\t\tcout << \x22Trying (\x22 << p.first << \x22, \x22 << p.second << \x22), char \x22 << (unsigned int)(buffer[p.first]) << \x22.\\n\x22;\n");
fprintf(a, "\t\t\t\t\t\tif(p.first>accepting_pos) continue;\n");
fprintf(a, "\t\t\t\t\t\t\n");
fprintf(a, "\t\t\t\t\t\tfor(int j=0; j<action.number_of_lookahead_states; j++)\n");
fprintf(a, "\t\t\t\t\t\t\tif(action.lookahead_states[j]==p.second)\n");
fprintf(a, "\t\t\t\t\t\t\t{\n");
fprintf(a, "\t\t\t\t\t\t\t\tresult=p.first;\n");
fprintf(a, "\t\t\t\t\t\t\t\tbreak;\n");
fprintf(a, "\t\t\t\t\t\t\t}\n");
fprintf(a, "\t\t\t\t\t\t\n");
fprintf(a, "\t\t\t\t\t\tif(result!=-1)\n");
fprintf(a, "\t\t\t\t\t\t\tbreak;\n");
fprintf(a, "\t\t\t\t\t}\n");
fprintf(a, "\t\t\t\t\t\n");
if(data.variables.generate_verbose_prints)
fprintf(a, "\t\t\t\t\tcout << \x22Using position \x22 << result << \x22.\\n\x22;\n");
if(data.variables.generate_sanity_checks)
fprintf(a, "\t\t\t\t\tif(result==-1) throw logic_error(\x22%s::get_token(): Internal error processing lookahead.\x22);\n", data.variables.dolphin_class_name);
fprintf(a, "\t\t\t\t\taccepting_pos=result;\n");
fprintf(a, "\t\t\t\t}\n");
}
fprintf(a, "\t\t\t}\n");
fprintf(a, "\t\t\t\n");
}
if(data.variables.generate_verbose_prints)
fprintf(a, "\t\t\tcout << \x22" "Creating a \x22 << accepting_pos << \x22-character long lexeme.\\n\x22;\n");
if(data.variables.store_lexeme_in_string)
fprintf(a, "\t\t\tcopy(buffer.begin()%s, buffer.begin()+accepting_pos, back_inserter(lexeme));\n",
(data.variables.append_data_member ? "+start_pos" : ""));
else
{
if(data.variables.append_data_member)
{
fprintf(a, "\t\t\tif(lexeme)\n");
fprintf(a, "\t\t\t\tdelete[] lexeme;\n");
}
fprintf(a, "\t\t\tnumber_of_characters_in_lexeme=accepting_pos;\n");
fprintf(a, "\t\t\tlexeme=new %s[number_of_characters_in_lexeme+1];\n", data.variables.internal_char_type);
fprintf(a, "\t\t\tcopy(buffer.begin(), buffer.begin()+number_of_characters_in_lexeme, lexeme);\n");
fprintf(a, "\t\t\tlexeme[number_of_characters_in_lexeme]=0;\n");
}
fprintf(a, "\t\t\t\n");
fprintf(a, "\t\t\tswitch(action_to_call)\n");
fprintf(a, "\t\t\t{\n");
for(int i=0; i<data.actions.size()+1; i++)
{
if(i==0)
{
// action upon error
fprintf(a, "\t\t\tcase %u:\n", i);
if(data.recognized_expression_search.count("error"))
{
RecognizedExpressionData &re=data.recognized_expressions[data.recognized_expression_search["error"]];
generate_actions_for_special_expression(a, "\t\t\t\t", re, true);
}
else
{
if(data.variables.using_whale)
{
fprintf(a, "\t\t\t\tcout << \x22Lexical error at line \x22 << line()\n");
fprintf(a, "\t\t\t\t\t<< \x22 column \x22 << column() << \x22.\\n\x22;\n");
fprintf(a, "\t\t\t\t\n");
}
if(data.variables.using_whale)
fprintf(a, "\t\t\t\treturn make_token<%s::TerminalError>();\n", data.variables.whale_namespace);
else
fprintf(a, "\t\t\t\treturn -1;\n");
}
}
else
{
int an=i-1; // action number in our arrays.
ActionData &action=data.actions[an];
if(action.is_special) continue;
fprintf(a, "\t\t\tcase %u:\n", i);
if(data.variables.generate_verbose_prints)
{
// fprintf(a, "\t\t\t\tcout << \x22" "Action %u\\n\x22;\n", i);
fprintf(a, "\t\t\t\tcout << \x22" "Action %u defined at line %u column %u.\\n\x22;\n", i, action.declaration->arrow->line, action.declaration->arrow->column, i);
}
generate_action(a, "\t\t\t\t", action, true);
}
}
if(data.variables.generate_sanity_checks)
{
fprintf(a, "\t\t\tdefault:\n");
if(data.variables.generate_verbose_prints)
fprintf(a, "\t\t\t\tcout << \x22wrong action number \x22 << action_to_call << \x22\\n\x22;\n");
fprintf(a, "\t\t\t\tthrow logic_error(\x22%s::get_token(): DFA has reached a non-existent state.\x22);\n", data.variables.dolphin_class_name);
}
fprintf(a, "\t\t\t}\n");
fprintf(a, "\t\t\t\n");
if(data.variables.append_data_member)
{
fprintf(a, "\t\t\tif(append)\n");
if(data.variables.store_lexeme_in_string)
fprintf(a, "\t\t\t\tstart_pos=lexeme.size();\n");
else
fprintf(a, "\t\t\t\tstart_pos=number_of_characters_in_lexeme;\n");
fprintf(a, "\t\t\telse\n");
fprintf(a, "\t\t\t{\n");
fprintf(a, "\t\t\t\tclear_lexeme();\n");
fprintf(a, "\t\t\t\tstart_pos=0;\n");
fprintf(a, "\t\t\t}\n");
fprintf(a, "\t\t\t\n");
fprintf(a, "\t\t\tpos=start_pos-1;\n");
}
else
{
if(data.variables.store_lexeme_in_string)
fprintf(a, "\t\t\tif(lexeme.size())\n");
else
fprintf(a, "\t\t\tif(lexeme)\n");
fprintf(a, "\t\t\t\tclear_lexeme();\n");
fprintf(a, "\t\t\tpos=-1;\n");
}
fprintf(a, "\t\t\tstate=%s;\n", expression_for_initial_state().c_str());
fprintf(a, "\t\t\taccepting_pos=0;\n");
fprintf(a, "\t\t\taction_to_call=0;\n");
if(data.variables.generate_arbitrary_lookahead_support)
fprintf(a, "\t\t\tpossible_lookahead_positions_and_states.clear();\n");
fprintf(a, "\t\t}\n");
fprintf(a, "\t}\n");
fprintf(a, "}\n");
}
void generate_table_of_states(FILE *a)
{
fprintf(a, "\nconst %s::StateData %s::states[%s::number_of_dfa_states+1]={\n",
data.variables.dolphin_class_name,
data.variables.dolphin_class_name,
data.variables.dolphin_class_name);
// dummy 0th state.
fprintf(a, "\t{ ");
if(data.variables.compress_tables)
{
if(data.variables.using_layer2)
{
fprintf(a, "0, 0, ");
}
fprintf(a, "NULL");
}
else
{
fprintf(a, "{ ");
for(int j=0; j<data.number_of_symbol_classes; j++)
{
if(j) fprintf(a, ", ");
fprintf(a, "0");
}
fprintf(a, " }");
}
fprintf(a, ", 0"); // action_upon_accept
if(data.variables.generate_arbitrary_lookahead_support)
fprintf(a, ", false");
fprintf(a, " }");
// dfa states.
for(int i=0; i<data.final_automaton.size(); i++)
{
fprintf(a, ",\n" "\t{ ", i);
if(data.variables.compress_tables)
{
int l1_line=data.tables.state_to_layer1[i];
int offset;
if(!data.variables.using_layer2)
offset=data.tables.line_to_offset_in_table_of_lines[l1_line];
else
{
int exc_location=data.tables.layer1_to_exception_location[l1_line];
vector<int> &exc_data=data.tables.layer1_to_exception_data[l1_line];
if(exc_location==-1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -