📄 dolphin.h
字号:
#ifndef __DOLPHIN__DOLPHIN_H
#define __DOLPHIN__DOLPHIN_H
#define _RWSTD_BOUNDS_CHECKING
#define DOLPHIN_VERSION "0.2.10"
#define DOLPHIN_RELEASE_DATE "17 April, 2002"
#define SHORT_COPYRIGHT_NOTICE "Dolphin " DOLPHIN_VERSION ", a lexical analyzer generator. " "(C) Alexander Okhotin, 1999-2002.\n"
#define LONG_COPYRIGHT_NOTICE "Dolphin " DOLPHIN_VERSION ", a lexical analyzer generator.\n" "Copyright (C) Alexander Okhotin <okhotin@cs.queensu.ca>, 1999-2002.\n"
#define COPYRIGHT_NOTICE_FOR_GENERATED_FILES "\t\x22" "A lexical analyzer generated by Dolphin " DOLPHIN_VERSION " (" DOLPHIN_RELEASE_DATE ").\\n\x22\n" "\t\x22" "Dolphin is (C) Alexander Okhotin <okhotin@cs.queensu.ca>, 1999-2002.\\n\x22"
#define FIRST_LINE_FOR_GENERATED_FILES "\n/* A lexical analyzer generated by Dolphin */\n"
#define COMMAND_LINE_SYNTAX "Command line syntax: dolphin filename.dlp\n"
#if '\xff'<'\x0'
#error This program requires char to be unsigned.
#endif
#include "assert.h"
#include <string.h>
#include <vector>
#include <set>
#include <map>
#include <string>
#include "parser.h"
#include "matrix.h"
#include "nfa-to-dfa.h"
#include "utilities.h"
#include "variables.h"
#include "charset.h"
#include "intervals.h"
struct NonterminalData
{
char *name;
std::vector<Whale::NonterminalRuleStatement *> rules;
// if it is _directly_ used in some IN expression
bool is_used_in_IN_expressions;
std::vector<Whale::Terminal *> where_it_is_used_in_IN_expressions;
// if the definition of this nonterminal contains some operation
// (like concatenation) that renders it unusable in IN expressions.
bool can_be_used_in_IN_expressions;
std::vector<Whale::Terminal *> locations_of_offending_rules;
// if this nonterminal can generate only one-character strings and if
// all such derivations have already been precalculated, then
// then "expanded" points to the set of those symbols, otherwise ==NULL.
UnionOfIntervals<int> *expanded;
bool expression_length_calculated;
int expression_length;
NonterminalData()
{
name=NULL;
is_used_in_IN_expressions=false;
can_be_used_in_IN_expressions=true;
// set_is_calculated=false;
expanded=NULL;
expression_length_calculated=false;
}
};
struct StartConditionData
{
char *name;
Whale::Terminal *declaration;
NFA nfa; // nfa for this start condition.
DFA dfa; // dfa for this start condition.
int offset_of_local_states_in_main_dfa; // add this to the number of
// a state in our dfa to get the number of
// the same state in the main dfa
// indexed by the number of state in our dfa:
std::vector<int> expressions_accepted_in_local_dfa_states; // ==number of recognized expression or -1 meaning none.
std::vector<std::set<int> > expressions_that_need_lookahead_in_local_dfa_states;
std::vector<bool> lookahead_states_in_dfa;
StartConditionData() : nfa(0)
{
name=NULL;
declaration=NULL;
}
};
struct ActionData
{
bool is_special; // true for 'eof' and 'error' expressions.
Whale::NonterminalActionStatement *declaration;
std::set<int> start_condition_numbers;
int recognized_expression_number;
ActionData()
{
is_special=false;
declaration=NULL;
}
};
struct RecognizedExpressionData
{
bool is_special; // true for 'eof' and 'error' expressions.
Whale::NonterminalExpression *expr, *lookahead;
char *in_serialized_form;
NFA nfa; // nfa just for this expression.
// expr (accepting state) ==> ...
// or
// expr1 (intermediate state) / expr2 (accepting state) ==> ...
int intermediate_local_nfa_state, accepting_local_nfa_state; // numbers of states in this->nfa
std::vector<int> offset_of_our_nfa_states_in_nfa_for_start_conditions; // indexed by start condition numbers
std::vector<int> action_numbers;
// for each start condition. -1 means none.
std::vector<int> start_condition_to_action_number;
// if lookahead!=NULL
int lookahead_length; // -1 if its length varies
int lookahead_eof; // -1 not at eof, 0 - doesn't matter, 1 - at the eof
RecognizedExpressionData() : nfa(0)
{
is_special=false;
expr=NULL;
lookahead=NULL;
in_serialized_form=NULL;
intermediate_local_nfa_state=-1;
accepting_local_nfa_state=-1;
lookahead_length=0;
lookahead_eof=0;
}
};
struct AssignmentData
{
Whale::NonterminalOptionStatement *declaration;
enum ValueType {VALUE_TRUE, VALUE_FALSE, VALUE_ID, VALUE_STRING,
VALUE_NUMBER, VALUE_HEX_NUMBER, VALUE_CODE};
std::vector<std::pair<char *, ValueType> > values;
AssignmentData()
{
declaration=NULL;
}
};
// information about a single derivation path in the grammar
struct PathData
{
enum TransitionType { RIGHT=0, NORMAL=1, IN_EXPRESSION };
std::vector<std::pair<Whale::Terminal *, TransitionType> > v;
TransitionType worst; // should be equal to max(v[i].second)
};
inline std::ostream &operator <<(std::ostream &os, PathData::TransitionType &t)
{
if(t==PathData::RIGHT)
os << "Right";
else if(t==PathData::NORMAL)
os << "Normal";
else if(t==PathData::IN_EXPRESSION)
os << "IN expression";
else
assert(false);
return os;
}
class DolphinData
{
public:
CharacterSet charset;
std::vector<NonterminalData> nonterminals;
std::vector<StartConditionData> start_conditions;
std::vector<ActionData> actions;
std::vector<RecognizedExpressionData> recognized_expressions;
std::map<const char *, int, NullTerminatedStringCompare> recognized_expression_search;
matrix<PathData> derivation_paths;
struct DataOnAutomatonState
{
int number_of_start_condition;
int number_of_state_in_dfa_for_sc;
int action_accepted_here; // -1 if none.
std::set<int> actions_that_need_lookahead_here;
};
DFA dfa;
std::vector<DataOnAutomatonState> data_on_dfa_states;
std::vector<int> symbol_to_symbol_class;
int number_of_symbol_classes;
struct StateOfFinalAutomaton
{
int action_accepted_here; // -1 if none.
std::set<int> actions_that_need_lookahead_here;
std::vector<int> transitions;
};
DFA::Partition dfa_partition;
std::vector<StateOfFinalAutomaton> final_automaton;
std::map<char *, AssignmentData, NullTerminatedStringCompare> assignments;
Variables variables;
std::string file_name;
struct Tables
{
// Transition table: The first layer of compression.
std::vector<int> state_to_layer1; // or -1 for null lines.
std::vector<int> layer1_to_state; // to one of those states.
int null_line_number;
// Transition table: The second layer of compression.
std::vector<int> layer1_to_layer2;
std::vector<int> layer2_to_layer1;
std::vector<int> layer1_to_exception_location; // -1 for no exception.
std::vector<std::vector<int> > layer1_to_exception_data;
// Transition table: final table of lines, either layer 1 or 2.
std::vector<int> line_to_offset_in_table_of_lines;
std::vector<int> compressed_table_of_lines;
// Indices in table of intermediate (lookahead) states.
std::vector<int> offset_in_table_of_lookahead_states; // [action]
std::vector<int> number_of_lookahead_states; // [action]
// The table of intermediate (lookahead) states.
std::vector<int> compressed_table_of_lookahead_states;
std::vector<int> start_condition_to_dfa_state_table;
Tables() { null_line_number=-1; }
};
Tables tables;
int find_nonterminal(const char *s)
{
for(int i=0; i<nonterminals.size(); i++)
if(!strcmp(s, nonterminals[i].name))
return i;
return -1;
}
int find_start_condition(const char *s)
{
for(int i=0; i<start_conditions.size(); i++)
if(!strcmp(s, start_conditions[i].name))
return i;
return -1;
}
DolphinData();
};
extern DolphinData data;
inline int sgn(int x)
{
return (x<0 ? -1 : (!x ? 0 : 1));
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -