⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dolphin.h

📁 Full support for extended regular expressions (those with intersection and complement); Support for
💻 H
字号:

#ifndef __DOLPHIN__DOLPHIN_H

#define __DOLPHIN__DOLPHIN_H

#define _RWSTD_BOUNDS_CHECKING

#define DOLPHIN_VERSION "0.2.10"
#define DOLPHIN_RELEASE_DATE "17 April, 2002"
#define SHORT_COPYRIGHT_NOTICE "Dolphin " DOLPHIN_VERSION ", a lexical analyzer generator. " "(C) Alexander Okhotin, 1999-2002.\n"
#define LONG_COPYRIGHT_NOTICE "Dolphin " DOLPHIN_VERSION ", a lexical analyzer generator.\n" "Copyright (C) Alexander Okhotin <okhotin@cs.queensu.ca>, 1999-2002.\n"
#define COPYRIGHT_NOTICE_FOR_GENERATED_FILES "\t\x22" "A lexical analyzer generated by Dolphin " DOLPHIN_VERSION " (" DOLPHIN_RELEASE_DATE ").\\n\x22\n" "\t\x22" "Dolphin is (C) Alexander Okhotin <okhotin@cs.queensu.ca>, 1999-2002.\\n\x22"
#define FIRST_LINE_FOR_GENERATED_FILES "\n/* A lexical analyzer generated by Dolphin */\n"
#define COMMAND_LINE_SYNTAX "Command line syntax: dolphin filename.dlp\n"

#if '\xff'<'\x0'
	#error This program requires char to be unsigned.
#endif

#include "assert.h"

#include <string.h>
#include <vector>
#include <set>
#include <map>
#include <string>

#include "parser.h"
#include "matrix.h"
#include "nfa-to-dfa.h"
#include "utilities.h"
#include "variables.h"
#include "charset.h"
#include "intervals.h"

struct NonterminalData
{
	char *name;
	std::vector<Whale::NonterminalRuleStatement *> rules;
	
	// if it is _directly_ used in some IN expression
	bool is_used_in_IN_expressions;
	std::vector<Whale::Terminal *> where_it_is_used_in_IN_expressions;
	
	// if the definition of this nonterminal contains some operation
	// (like concatenation) that renders it unusable in IN expressions.
	bool can_be_used_in_IN_expressions;
	std::vector<Whale::Terminal *> locations_of_offending_rules;
	
	// if this nonterminal can generate only one-character strings and if
	// all such derivations have already been precalculated, then
	// then "expanded" points to the set of those symbols, otherwise ==NULL.
	UnionOfIntervals<int> *expanded;
	
	bool expression_length_calculated;
	int expression_length;
	
	NonterminalData()
	{
		name=NULL;
		is_used_in_IN_expressions=false;
		can_be_used_in_IN_expressions=true;
	//	set_is_calculated=false;
		expanded=NULL;
		expression_length_calculated=false;
	}
};

struct StartConditionData
{
	char *name;
	Whale::Terminal *declaration;
	
	NFA nfa;		// nfa for this start condition.
	DFA dfa;		// dfa for this start condition.
	int offset_of_local_states_in_main_dfa; // add this to the number of
				// a state in our dfa to get the number of
				// the same state in the main dfa
	
	// indexed by the number of state in our dfa:
	std::vector<int> expressions_accepted_in_local_dfa_states; // ==number of recognized expression or -1 meaning none.
	std::vector<std::set<int> > expressions_that_need_lookahead_in_local_dfa_states;
	std::vector<bool> lookahead_states_in_dfa;
	
	StartConditionData() : nfa(0)
	{
		name=NULL;
		declaration=NULL;
	}
};

struct ActionData
{
	bool is_special; // true for 'eof' and 'error' expressions.
	
	Whale::NonterminalActionStatement *declaration;
	std::set<int> start_condition_numbers;
	int recognized_expression_number;
	
	ActionData()
	{
		is_special=false;
		declaration=NULL;
	}
};

struct RecognizedExpressionData
{
	bool is_special; // true for 'eof' and 'error' expressions.
	
	Whale::NonterminalExpression *expr, *lookahead;
	char *in_serialized_form;
	
	NFA nfa; // nfa just for this expression.
	
	// expr (accepting state) ==> ...
	//	or
	// expr1 (intermediate state) / expr2 (accepting state) ==> ...
	int intermediate_local_nfa_state, accepting_local_nfa_state; // numbers of states in this->nfa
	std::vector<int> offset_of_our_nfa_states_in_nfa_for_start_conditions; // indexed by start condition numbers
	
	std::vector<int> action_numbers;
	
	// for each start condition. -1 means none.
	std::vector<int> start_condition_to_action_number;
	
	// if lookahead!=NULL
	int lookahead_length; // -1 if its length varies
	int lookahead_eof; // -1 not at eof, 0 - doesn't matter, 1 - at the eof
	
	RecognizedExpressionData() : nfa(0)
	{
		is_special=false;
		expr=NULL;
		lookahead=NULL;
		in_serialized_form=NULL;
		intermediate_local_nfa_state=-1;
		accepting_local_nfa_state=-1;
		lookahead_length=0;
		lookahead_eof=0;
	}
};

struct AssignmentData
{
	Whale::NonterminalOptionStatement *declaration;
	
	enum ValueType {VALUE_TRUE, VALUE_FALSE, VALUE_ID, VALUE_STRING,
		VALUE_NUMBER, VALUE_HEX_NUMBER, VALUE_CODE};
	std::vector<std::pair<char *, ValueType> > values;
	
	AssignmentData()
	{
		declaration=NULL;
	}
};

// information about a single derivation path in the grammar
struct PathData
{
	enum TransitionType { RIGHT=0, NORMAL=1, IN_EXPRESSION };
	
	std::vector<std::pair<Whale::Terminal *, TransitionType> > v;
	TransitionType worst; // should be equal to max(v[i].second)
};

inline std::ostream &operator <<(std::ostream &os, PathData::TransitionType &t)
{
	if(t==PathData::RIGHT)
		os << "Right";
	else if(t==PathData::NORMAL)
		os << "Normal";
	else if(t==PathData::IN_EXPRESSION)
		os << "IN expression";
	else
		assert(false);
	return os;
}

class DolphinData
{
public:
	CharacterSet charset;
	
	std::vector<NonterminalData> nonterminals;
	std::vector<StartConditionData> start_conditions;
	
	std::vector<ActionData> actions;
	std::vector<RecognizedExpressionData> recognized_expressions;
	std::map<const char *, int, NullTerminatedStringCompare> recognized_expression_search;
	
	matrix<PathData> derivation_paths;
	
	struct DataOnAutomatonState
	{
		int number_of_start_condition;
		int number_of_state_in_dfa_for_sc;
		int action_accepted_here; // -1 if none.
		std::set<int> actions_that_need_lookahead_here;
	};
	DFA dfa;
	std::vector<DataOnAutomatonState> data_on_dfa_states;
	
	std::vector<int> symbol_to_symbol_class;
	int number_of_symbol_classes;
	
	struct StateOfFinalAutomaton
	{
		int action_accepted_here; // -1 if none.
		std::set<int> actions_that_need_lookahead_here;
		std::vector<int> transitions;
	};
	DFA::Partition dfa_partition;
	std::vector<StateOfFinalAutomaton> final_automaton;
	
        std::map<char *, AssignmentData, NullTerminatedStringCompare> assignments;
	Variables variables;
	
	std::string file_name;
	
	struct Tables
	{
		// Transition table: The first layer of compression.
		std::vector<int> state_to_layer1; // or -1 for null lines.
		std::vector<int> layer1_to_state; // to one of those states.
		int null_line_number;
		
		// Transition table: The second layer of compression.
		std::vector<int> layer1_to_layer2;
		std::vector<int> layer2_to_layer1;
		std::vector<int> layer1_to_exception_location; // -1 for no exception.
		std::vector<std::vector<int> > layer1_to_exception_data;
		
		// Transition table: final table of lines, either layer 1 or 2.
		std::vector<int> line_to_offset_in_table_of_lines;
		std::vector<int> compressed_table_of_lines;
		
		// Indices in table of intermediate (lookahead) states.
		std::vector<int> offset_in_table_of_lookahead_states; // [action]
		std::vector<int> number_of_lookahead_states; // [action]
		
		// The table of intermediate (lookahead) states.
		std::vector<int> compressed_table_of_lookahead_states;
		
		std::vector<int> start_condition_to_dfa_state_table;
		
		Tables() { null_line_number=-1; }
	};
	
	Tables tables;
	
	int find_nonterminal(const char *s)
	{
		for(int i=0; i<nonterminals.size(); i++)
			if(!strcmp(s, nonterminals[i].name))
				return i;
		return -1;
	}
	int find_start_condition(const char *s)
	{
		for(int i=0; i<start_conditions.size(); i++)
			if(!strcmp(s, start_conditions[i].name))
				return i;
		return -1;
	}
	
	DolphinData();
};

extern DolphinData data;

inline int sgn(int x)
{
	return (x<0 ? -1 : (!x ? 0 : 1));
}

#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -