⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 codegen.cpp

📁 Full support for extended regular expressions (those with intersection and complement); Support for
💻 CPP
📖 第 1 页 / 共 3 页
字号:
		fprintf(a, s.c_str(), indent, type, id, value);
	}
	else
		fprintf(a, "%sconst %s %s::%s;\n", indent, type, class_name, id);
}

void generate_scalar_constants(FILE *a, char *indent, bool it_is_cpp)
{
//	if(data.first_terminal)
//		generate_single_scalar_constant(a, indent, "int", "first_terminal_symbol", data.first_terminal, "%u", data.variables.dolphin_class_name, it_is_cpp);
	generate_single_scalar_constant(a, indent, "int", "alphabet_cardinality", data.variables.alphabet_cardinality, "%u", data.variables.dolphin_class_name, it_is_cpp);
	generate_single_scalar_constant(a, indent, "int", "number_of_symbol_classes", data.number_of_symbol_classes, "%u", data.variables.dolphin_class_name, it_is_cpp);
	if(data.variables.start_conditions_enabled)
		generate_single_scalar_constant(a, indent, "int", "number_of_start_conditions", data.start_conditions.size(), "%u", data.variables.dolphin_class_name, it_is_cpp);
	generate_single_scalar_constant(a, indent, "int", "number_of_dfa_states", data.final_automaton.size(), "%u", data.variables.dolphin_class_name, it_is_cpp);
	
	/* all state numbers are incremented by one */
	if(!data.variables.start_conditions_enabled)
		generate_single_scalar_constant(a, indent, data.variables.analyzer_state_type, "initial_dfa_state", data.dfa_partition.state_to_group[0]+1, "%u", data.variables.dolphin_class_name, it_is_cpp);
	
	if(data.variables.compress_tables)
		generate_single_scalar_constant(a, indent, "int", "size_of_table_of_lines", data.tables.compressed_table_of_lines.size(), "%u", data.variables.dolphin_class_name, it_is_cpp);
	generate_single_scalar_constant(a, indent, "int", "number_of_actions", data.actions.size(), "%u", data.variables.dolphin_class_name, it_is_cpp);
	if(data.variables.generate_arbitrary_lookahead_support)
		generate_single_scalar_constant(a, indent, "int", "size_of_table_of_lookahead_states", data.tables.compressed_table_of_lookahead_states.size(), "%u", data.variables.dolphin_class_name, it_is_cpp);
}

void generate_get_token_function(FILE *a)
{
	fprintf(a, "%s%s%s::get_token(%s)\n", data.variables.get_token_function_return_value,
		(type_should_be_printed_without_space(data.variables.get_token_function_return_value) ? "" : " "),
		data.variables.dolphin_class_name,
		data.variables.get_token_function_parameters);
	fprintf(a, "{\n");
	if(data.variables.generate_verbose_prints)
	{
		fprintf(a, "\tcout << \x22" "DolphinLexicalAnalyzer::get_token()\\n\x22;\n");
		fprintf(a, "\t\n");
	}
	
	if(data.variables.store_lexeme_in_string)
		fprintf(a, "\tif(lexeme.size())\n");
	else
		fprintf(a, "\tif(lexeme)\n");
	fprintf(a, "\t\tclear_lexeme();\n");
	
	fprintf(a, "\t\n");
	fprintf(a, "\t%s state=%s;\n", data.variables.analyzer_state_type, expression_for_initial_state().c_str());
	if(data.variables.append_data_member)
	{
		fprintf(a, "\tint start_pos=0, accepting_pos=0, action_to_call=0;\n");
		fprintf(a, "\tappend=false;\n");
	}
	else
		fprintf(a, "\tint accepting_pos=0, action_to_call=0;\n");
	
	if(data.variables.generate_arbitrary_lookahead_support)
		fprintf(a, "\tvector<pair<int, int> > possible_lookahead_positions_and_states;\n");
	
	fprintf(a, "\t\n");
	fprintf(a, "\tfor(int pos=%s;; pos++)\n", (data.variables.append_data_member ? "start_pos" : "0"));
	fprintf(a, "\t{\n");
	
	fprintf(a, "\t\tbool eof_reached_right_now=false;\n");
	fprintf(a, "\t\t\n");
	
	fprintf(a, "\t\tint recognized_action=states[state].action_upon_accept;\n");
	
	if(data.variables.generate_arbitrary_lookahead_support)
	{
		fprintf(a, "\t\t\n");
		fprintf(a, "\t\tif(states[state].is_lookahead_state)\n");
		fprintf(a, "\t\t\tpossible_lookahead_positions_and_states.push_back(make_pair(pos, state));\n");
	}
	
	fprintf(a, "\t\t\n");
	fprintf(a, "\t\tif(buffer.size()==pos)\n");
	fprintf(a, "\t\t{\n");
	fprintf(a, "\t\t\tif(eof_reached)\n");
	fprintf(a, "\t\t\t\teof_reached_right_now=true;\n");
	fprintf(a, "\t\t\telse\n");
	fprintf(a, "\t\t\t{\n");
	fprintf(a, "\t\t\t\t%s c;\n", data.variables.input_character_class);
	
	fprintf(a, "\t\t\t\t");
	char *gcfs_parameter_values[]={"c", "input_stream"};
	print_parametrized_string(a, data.variables.how_to_get_character_from_stream, "cs", gcfs_parameter_values);
	fprintf(a, ";\n");
	
	fprintf(a, "\t\t\t\t\n");
	
	fprintf(a, "\t\t\t\tif(");
	char *ceof_parameter_values[]={"input_stream"};
	print_parametrized_string(a, data.variables.how_to_check_eof, "s", ceof_parameter_values);
	fprintf(a, ")\n");
	
	fprintf(a, "\t\t\t\t{\n");
	fprintf(a, "\t\t\t\t\teof_reached=true;\n");
	fprintf(a, "\t\t\t\t\teof_reached_right_now=true;\n");
	fprintf(a, "\t\t\t\t}\n");
	fprintf(a, "\t\t\t\telse\n");
	fprintf(a, "\t\t\t\t{\n");
	if(data.variables.generate_verbose_prints)
		fprintf(a, "\t\t\t\t\tcout << \x22read symbol \x22 << (unsigned int)c << \x22\\n\x22;\n");
	fprintf(a, "\t\t\t\t\tbuffer.push_back(c);\n");
	fprintf(a, "\t\t\t\t}\n");
	fprintf(a, "\t\t\t}\n");
	fprintf(a, "\t\t}\n");
	fprintf(a, "\t\t\n");
	fprintf(a, "\t\tif(eof_reached_right_now)\n");
	fprintf(a, "\t\t{\n");
	if(data.variables.append_data_member)
		fprintf(a, "\t\t\tif(pos==start_pos)\n");
	else
		fprintf(a, "\t\t\tif(pos==0)\n");
	fprintf(a, "\t\t\t{\n");
	
	if(!data.variables.store_lexeme_in_string)
	{
		const char *indent=(data.variables.append_data_member ? "\t\t\t\t\t" : "\t\t\t\t");
		if(data.variables.append_data_member)
		{
			fprintf(a, "\t\t\t\tif(lexeme==NULL)\n");
			fprintf(a, "\t\t\t\t{\n");
		}
		fprintf(a, "%snumber_of_characters_in_lexeme=0;\n", indent);
		fprintf(a, "%slexeme=new %s[1];\n", indent, data.variables.internal_char_type);
		fprintf(a, "%slexeme[0]=0;\n", indent);
		if(data.variables.append_data_member)
			fprintf(a, "\t\t\t\t}\n");
		fprintf(a, "\t\t\t\t\n");
	}
	if(data.recognized_expression_search.count("eof"))
	{
		RecognizedExpressionData &re=data.recognized_expressions[data.recognized_expression_search["eof"]];
		generate_actions_for_special_expression(a, "\t\t\t\t", re, false);
	}
	else if(data.variables.using_whale)
		fprintf(a, "\t\t\t\treturn make_token<%s::TerminalEOF>();\n", data.variables.whale_namespace);
	else
		fprintf(a, "\t\t\t\treturn 0;\n");
	
	fprintf(a, "\t\t\t}\n");
	fprintf(a, "\t\t}\n");
	fprintf(a, "\t\telse\n");
	fprintf(a, "\t\t{\n");
	
	if(data.variables.unicode)
		fprintf(a, "\t\t\tunsigned int c=(unsigned wchar_t)");
	else
		fprintf(a, "\t\t\tunsigned int c=(unsigned char)");
	char *gac_parameter_values[]={"buffer[pos]"};
	print_parametrized_string(a, data.variables.how_to_get_actual_character, "c", gac_parameter_values);
	fprintf(a, ";\n");
	
	if(data.variables.generate_verbose_prints)
		fprintf(a, "\t\t\tcout << \x22transition from \x22 << int(state) << \x22 to \x22;\n");
	if(data.variables.access_transitions_through_a_method)
		fprintf(a, "\t\t\tstate=states[state].access_transition(symbol_to_symbol_class[c]);\n");
	else
		fprintf(a, "\t\t\tstate=states[state].transitions[symbol_to_symbol_class[c]];\n");
	if(data.variables.generate_verbose_prints)
		fprintf(a, "\t\t\tcout << int(state) << \x22 on \x22 << c << \x22\\n\x22;\n");
	fprintf(a, "\t\t}\n");
	
	fprintf(a, "\t\t\n");
	if(data.variables.generate_eof_lookahead_support)
	{
		fprintf(a, "\t\tint lookahead_eof=actions[recognized_action].lookahead_eof;\n");
		fprintf(a, "\t\t\n");
		fprintf(a, "\t\tif(recognized_action && (lookahead_eof==0 ||\n");
		fprintf(a, "\t\t\t(lookahead_eof==-1 && !eof_reached_right_now) ||\n");
		fprintf(a, "\t\t\t(lookahead_eof==1 && eof_reached_right_now)))\n");
	}
	else
		fprintf(a, "\t\tif(recognized_action)\n");
	fprintf(a, "\t\t{\n");
	if(data.variables.generate_verbose_prints)
		fprintf(a, "\t\t\tcout << \x22pos \x22 << pos << \x22: recognized action \x22 << recognized_action << \x22\\n\x22;\n");
	fprintf(a, "\t\t\taccepting_pos=pos;\n");
	fprintf(a, "\t\t\taction_to_call=recognized_action;\n");
	fprintf(a, "\t\t}\n");
	fprintf(a, "\t\t\n");
	
	fprintf(a, "\t\tif(!state || eof_reached_right_now)\n");
	fprintf(a, "\t\t{\n");
	
	if(data.variables.eat_one_character_upon_lexical_error)
	{
		fprintf(a, "\t\t\tif(action_to_call==0)\t// if it is a lexical error,\n");
		if(data.variables.append_data_member)
			fprintf(a, "\t\t\t\taccepting_pos=start_pos+1; // then eat one character.\n");
		else
			fprintf(a, "\t\t\t\taccepting_pos=1; // then eat one character.\n");
		fprintf(a, "\t\t\t\n");
	}
	else
		fprintf(a, "\t\t\t// if there is no action to call, then accepting_pos=0\n");
	if(data.variables.generate_fixed_length_lookahead_support ||
		data.variables.generate_arbitrary_lookahead_support)
	{
		fprintf(a, "\t\t\tif(action_to_call>0)\n");
		fprintf(a, "\t\t\t{\n");
		fprintf(a, "\t\t\t\tconst ActionData &action=actions[action_to_call];\n");
		fprintf(a, "\t\t\t\t\n");
		if(data.variables.generate_fixed_length_lookahead_support)
		{
			fprintf(a, "\t\t\t\tif(action.lookahead_length>0)\n");
			fprintf(a, "\t\t\t\t\taccepting_pos-=action.lookahead_length;\n");
		}
		if(data.variables.generate_arbitrary_lookahead_support)
		{
			fprintf(a, "\t\t\t\t%sif(action.lookahead_states)\n",
				(data.variables.generate_fixed_length_lookahead_support ? "else " : ""));
			fprintf(a, "\t\t\t\t{\n");
			if(data.variables.generate_verbose_prints)
			{
				fprintf(a, "\t\t\t\t\tcout << \x22Processing lookahead: state=\x22 << int(state) << \x22, action=\x22 << action_to_call << \x22, accepting_pos=\x22 << accepting_pos << \x22, pos=\x22 << pos << \x22.\\n\x22;\n");
				fprintf(a, "\t\t\t\t\t\n");
			}
			fprintf(a, "\t\t\t\t\tint result=-1;\n");
			fprintf(a, "\t\t\t\t\t\n");
			fprintf(a, "\t\t\t\t\tfor(int i=possible_lookahead_positions_and_states.size()-1; i>=0; i++)\n");
			fprintf(a, "\t\t\t\t\t{\n");
			fprintf(a, "\t\t\t\t\t\tpair<int, int> p=possible_lookahead_positions_and_states[i];\n");
			if(data.variables.generate_verbose_prints)
				fprintf(a, "\t\t\t\t\t\tcout << \x22Trying (\x22 << p.first << \x22, \x22 << p.second << \x22), char \x22 << (unsigned int)(buffer[p.first]) << \x22.\\n\x22;\n");
			fprintf(a, "\t\t\t\t\t\tif(p.first>accepting_pos) continue;\n");
			fprintf(a, "\t\t\t\t\t\t\n");
			fprintf(a, "\t\t\t\t\t\tfor(int j=0; j<action.number_of_lookahead_states; j++)\n");
			fprintf(a, "\t\t\t\t\t\t\tif(action.lookahead_states[j]==p.second)\n");
			fprintf(a, "\t\t\t\t\t\t\t{\n");
			fprintf(a, "\t\t\t\t\t\t\t\tresult=p.first;\n");
			fprintf(a, "\t\t\t\t\t\t\t\tbreak;\n");
			fprintf(a, "\t\t\t\t\t\t\t}\n");
			fprintf(a, "\t\t\t\t\t\t\n");
			fprintf(a, "\t\t\t\t\t\tif(result!=-1)\n");
			fprintf(a, "\t\t\t\t\t\t\tbreak;\n");
			fprintf(a, "\t\t\t\t\t}\n");
			fprintf(a, "\t\t\t\t\t\n");
			if(data.variables.generate_verbose_prints)
				fprintf(a, "\t\t\t\t\tcout << \x22Using position \x22 << result << \x22.\\n\x22;\n");
			if(data.variables.generate_sanity_checks)
				fprintf(a, "\t\t\t\t\tif(result==-1) throw logic_error(\x22%s::get_token(): Internal error processing lookahead.\x22);\n", data.variables.dolphin_class_name);
			fprintf(a, "\t\t\t\t\taccepting_pos=result;\n");
			fprintf(a, "\t\t\t\t}\n");
		}
		fprintf(a, "\t\t\t}\n");
		fprintf(a, "\t\t\t\n");
	}
	if(data.variables.generate_verbose_prints)
		fprintf(a, "\t\t\tcout << \x22" "Creating a \x22 << accepting_pos << \x22-character long lexeme.\\n\x22;\n");
	if(data.variables.store_lexeme_in_string)
		fprintf(a, "\t\t\tcopy(buffer.begin()%s, buffer.begin()+accepting_pos, back_inserter(lexeme));\n",
			(data.variables.append_data_member ? "+start_pos" : ""));
	else
	{
		if(data.variables.append_data_member)
		{
			fprintf(a, "\t\t\tif(lexeme)\n");
			fprintf(a, "\t\t\t\tdelete[] lexeme;\n");
		}
		
		fprintf(a, "\t\t\tnumber_of_characters_in_lexeme=accepting_pos;\n");
		fprintf(a, "\t\t\tlexeme=new %s[number_of_characters_in_lexeme+1];\n", data.variables.internal_char_type);
		fprintf(a, "\t\t\tcopy(buffer.begin(), buffer.begin()+number_of_characters_in_lexeme, lexeme);\n");
		fprintf(a, "\t\t\tlexeme[number_of_characters_in_lexeme]=0;\n");
	}
	fprintf(a, "\t\t\t\n");
	
	fprintf(a, "\t\t\tswitch(action_to_call)\n");
	fprintf(a, "\t\t\t{\n");
	for(int i=0; i<data.actions.size()+1; i++)
	{
		if(i==0)
		{
			// action upon error
			fprintf(a, "\t\t\tcase %u:\n", i);
			if(data.recognized_expression_search.count("error"))
			{
				RecognizedExpressionData &re=data.recognized_expressions[data.recognized_expression_search["error"]];
				generate_actions_for_special_expression(a, "\t\t\t\t", re, true);
			}
			else
			{
				if(data.variables.using_whale)
				{
					fprintf(a, "\t\t\t\tcout << \x22Lexical error at line \x22 << line()\n");
					fprintf(a, "\t\t\t\t\t<< \x22 column \x22 << column() << \x22.\\n\x22;\n");
					fprintf(a, "\t\t\t\t\n");
				}
				if(data.variables.using_whale)
					fprintf(a, "\t\t\t\treturn make_token<%s::TerminalError>();\n", data.variables.whale_namespace);
				else
					fprintf(a, "\t\t\t\treturn -1;\n");
			}
		}
		else
		{
			int an=i-1; // action number in our arrays.
			ActionData &action=data.actions[an];
			if(action.is_special) continue;
			
			fprintf(a, "\t\t\tcase %u:\n", i);
			if(data.variables.generate_verbose_prints)
			{
			//	fprintf(a, "\t\t\t\tcout << \x22" "Action %u\\n\x22;\n", i);
				fprintf(a, "\t\t\t\tcout << \x22" "Action %u defined at line %u column %u.\\n\x22;\n", i, action.declaration->arrow->line, action.declaration->arrow->column, i);
			}
			
			generate_action(a, "\t\t\t\t", action, true);
		}
	}
	if(data.variables.generate_sanity_checks)
	{
		fprintf(a, "\t\t\tdefault:\n");
		if(data.variables.generate_verbose_prints)
			fprintf(a, "\t\t\t\tcout << \x22wrong action number \x22 << action_to_call << \x22\\n\x22;\n");
		fprintf(a, "\t\t\t\tthrow logic_error(\x22%s::get_token(): DFA has reached a non-existent state.\x22);\n", data.variables.dolphin_class_name);
	}
	fprintf(a, "\t\t\t}\n");
	
	fprintf(a, "\t\t\t\n");
	if(data.variables.append_data_member)
	{
		fprintf(a, "\t\t\tif(append)\n");
		if(data.variables.store_lexeme_in_string)
			fprintf(a, "\t\t\t\tstart_pos=lexeme.size();\n");
		else
			fprintf(a, "\t\t\t\tstart_pos=number_of_characters_in_lexeme;\n");
		fprintf(a, "\t\t\telse\n");
		fprintf(a, "\t\t\t{\n");
		fprintf(a, "\t\t\t\tclear_lexeme();\n");
		fprintf(a, "\t\t\t\tstart_pos=0;\n");
		fprintf(a, "\t\t\t}\n");
		fprintf(a, "\t\t\t\n");
		fprintf(a, "\t\t\tpos=start_pos-1;\n");
	}
	else
	{
		if(data.variables.store_lexeme_in_string)
			fprintf(a, "\t\t\tif(lexeme.size())\n");
		else
			fprintf(a, "\t\t\tif(lexeme)\n");
		fprintf(a, "\t\t\t\tclear_lexeme();\n");
		fprintf(a, "\t\t\tpos=-1;\n");
	}
	fprintf(a, "\t\t\tstate=%s;\n", expression_for_initial_state().c_str());
	fprintf(a, "\t\t\taccepting_pos=0;\n");
	fprintf(a, "\t\t\taction_to_call=0;\n");
	if(data.variables.generate_arbitrary_lookahead_support)
		fprintf(a, "\t\t\tpossible_lookahead_positions_and_states.clear();\n");
	fprintf(a, "\t\t}\n");
	fprintf(a, "\t}\n");
	fprintf(a, "}\n");
}

void generate_table_of_states(FILE *a)
{
	fprintf(a, "\nconst %s::StateData %s::states[%s::number_of_dfa_states+1]={\n",
		data.variables.dolphin_class_name,
		data.variables.dolphin_class_name,
		data.variables.dolphin_class_name);
	
	// dummy 0th state.
	fprintf(a, "\t{ ");
	if(data.variables.compress_tables)
	{
		if(data.variables.using_layer2)
		{
			fprintf(a, "0, 0, ");
		}
		fprintf(a, "NULL");
	}
	else
	{
		fprintf(a, "{ ");
		for(int j=0; j<data.number_of_symbol_classes; j++)
		{
			if(j) fprintf(a, ", ");
			fprintf(a, "0");
		}
		fprintf(a, " }");
	}
	
	fprintf(a, ", 0"); // action_upon_accept
	
	if(data.variables.generate_arbitrary_lookahead_support)
		fprintf(a, ", false");
	fprintf(a, " }");
	
	// dfa states.
	for(int i=0; i<data.final_automaton.size(); i++)
	{
		fprintf(a, ",\n" "\t{ ", i);
		
		if(data.variables.compress_tables)
		{
			int l1_line=data.tables.state_to_layer1[i];
			int offset;
			
			if(!data.variables.using_layer2)
				offset=data.tables.line_to_offset_in_table_of_lines[l1_line];
			else
			{
				int exc_location=data.tables.layer1_to_exception_location[l1_line];
				vector<int> &exc_data=data.tables.layer1_to_exception_data[l1_line];
				if(exc_location==-1)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -