⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 process.cpp

📁 Full support for extended regular expressions (those with intersection and complement); Support for
💻 CPP
📖 第 1 页 / 共 3 页
字号:

#include "dolphin.h"

#include <stdio.h>
#include <iostream>
#include <typeinfo>
using namespace std;

#include "assert.h"
#include "process.h"
#include "matrix.h"
#include "serialize.h"
#include "stl.h"
using namespace Whale;

//#define EXTRACT_FIRST_AND_LAST_TERMINAL_FUNCTIONS
//#define DEBUG_DERIVATION_PATHS_MATRIX
//#define PRINT_SERIALIZED_EXPRESSIONS

bool process_expression_proc(NonterminalExpression *expr, int nn, bool flag);
bool process_expression_proc(NonterminalExpressionS *expr, int nn, bool flag);
bool process_expression_proc(NonterminalExpressionC *expr, int nn);
bool check_whether_terminal_symbol_is_in_range(int tn, Terminal *location);
bool check_whether_terminal_symbol_is_in_range(vector<int> &v, Terminal *location);
bool decode_escape_sequences(char *s, vector<int> &v);
bool get_character_from_the_string_that_is_expected_to_be_exactly_one_character_long(NonterminalExpressionS *expr_s, int nn, int &result);

void print_a_number_of_terminal_locations(ostream &os, vector<Terminal *> &a, char *word)
{
	for(int k=0; k<a.size(); k++)
	{
		if(k) os << (k<a.size()-1 ? ", " : " and ");
		os << word << " ";
		print_terminal_location(os, a[k]);
	}
}

void print_derivation_path(ostream &os, PathData &path, int initial,
	PathData::TransitionType bracket_brace_threshold=PathData::IN_EXPRESSION)
{
	int n=path.v.size();
	os << data.nonterminals[initial].name;
	for(int i=0; i<n; i++)
	{
		os << " ";
		os << (path.v[i].second<=bracket_brace_threshold ? "[" : "{");
		print_terminal_location(os, path.v[i].first);
		os << (path.v[i].second<=bracket_brace_threshold ? "]" : "}");
		os << " " << path.v[i].first->text;
	}
}

void print_a_number_of_expressions(ostream &os, set<int> &expressions)
{
	int pos=0;
	for(set<int>::iterator p=expressions.begin(); p!=expressions.end(); p++)
	{
		if(pos)
		{
			if(pos<expressions.size()-1)
				os << ", ";
			else
				os << " and ";
		}
		pos++;

		os << data.recognized_expressions[*p].in_serialized_form;
	}
}

void print_a_number_of_start_conditions(ostream &os, set<int> &start_conditions)
{
	int pos=0;
	for(set<int>::iterator p=start_conditions.begin(); p!=start_conditions.end(); p++)
	{
		if(pos)
		{
			if(pos<start_conditions.size()-1)
				os << ", ";
			else
				os << " and ";
		}
		pos++;
		
		os << data.start_conditions[*p].name;
	}
}

#ifdef EXTRACT_FIRST_AND_LAST_TERMINAL_FUNCTIONS
Terminal *extract_first_terminal(Symbol *s)
{
	vector<Terminal *> v;
	s->extract_terminals(v);
	if(v.size())
		return v[0];
	else
		return NULL;
}

Terminal *extract_last_terminal(Symbol *s)
{
	vector<Terminal *> v;
	s->extract_terminals(v);
	if(v.size())
		return v[v.size()-1];
	else
		return NULL;
}
#endif

void print_derivation_paths()
{
	int n=data.nonterminals.size();
	for(int i=0; i<n; i++)
		for(int j=0; j<n; j++)
		{
			PathData &m_ij=data.derivation_paths[i][j];
			if(m_ij.v.size())
			{
				cout << "m[" << i << "][" << j << "] (" << m_ij.worst << ") = ";
				print_derivation_path(cout, m_ij, i);
				cout << "\n";
			}
		}
}

// expr points to a tree containing a subexpression.
// nn is the number of nonterminal in the left side of the rule that contains
// expr as a subexpression. If we are processing an expression that belongs to
//   a recognized expression, not a grammar rule, than nn==-1.
// flag==true means that a rightmost recursion may take place through this
//   subexpression.
// returns true if no errors were found, false otherwise.
bool process_expression_proc(NonterminalExpression *expr, int nn, bool flag)
{
	if(typeid(*expr)==typeid(NonterminalExpressionDisjunction))
	{
		NonterminalExpressionDisjunction &expr_d=*dynamic_cast<NonterminalExpressionDisjunction *>(expr);
		bool result1=process_expression_proc(expr_d.expr1, nn, flag);
		bool result2=process_expression_proc(expr_d.expr2, nn, flag);
		return result1 && result2;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionConjunction))
	{
		NonterminalExpressionConjunction &expr_c=*dynamic_cast<NonterminalExpressionConjunction *>(expr);
		bool result1=process_expression_proc(expr_c.expr1, nn, false);
		bool result2=process_expression_proc(expr_c.expr2, nn, false);
		return result1 && result2;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionConcatenation))
	{
		NonterminalExpressionConcatenation &expr_cat=*dynamic_cast<NonterminalExpressionConcatenation *>(expr);
		bool result1=process_expression_proc(expr_cat.expr1, nn, false);
		bool result2=process_expression_proc(expr_cat.expr2, nn, flag);
		if(nn>=0)
			data.nonterminals[nn].can_be_used_in_IN_expressions=false;
		return result1 && result2;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionComplement))
	{
		NonterminalExpressionComplement &expr_com=*dynamic_cast<NonterminalExpressionComplement *>(expr);
		bool result=process_expression_proc(expr_com.expr, nn, false);
		if(nn>=0)
			data.nonterminals[nn].can_be_used_in_IN_expressions=false;
		return result;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionOmittable))
	{
		NonterminalExpressionOmittable &expr_om=*dynamic_cast<NonterminalExpressionOmittable *>(expr);
		bool result=process_expression_proc(expr_om.expr, nn, flag);
		if(nn>=0)
			data.nonterminals[nn].can_be_used_in_IN_expressions=false;
		return result;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionInParentheses))
	{
		NonterminalExpressionInParentheses &expr_p=*dynamic_cast<NonterminalExpressionInParentheses *>(expr);
		bool result=process_expression_proc(expr_p.expr, nn, flag);
		return result;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionIteration))
	{
		NonterminalExpressionIteration &expr_it=*dynamic_cast<NonterminalExpressionIteration *>(expr);
		bool result=process_expression_proc(expr_it.expr, nn, false);
		if(nn>=0)
			data.nonterminals[nn].can_be_used_in_IN_expressions=false;
		if(!strcmp(expr_it.sign->text, "*"))
			expr_it.reflexive=true;
		else if(!strcmp(expr_it.sign->text, "+"))
			expr_it.reflexive=false;
		else
			assert(false);
		return result;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionCondition))
	{
		NonterminalExpressionCondition &expr_c=*dynamic_cast<NonterminalExpressionCondition *>(expr);
		bool result=process_expression_proc(expr_c.condition, nn);
		return result;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionRange))
	{
		NonterminalExpressionRange &expr_r=*dynamic_cast<NonterminalExpressionRange *>(expr);
		bool result1=get_character_from_the_string_that_is_expected_to_be_exactly_one_character_long(expr_r.first_expr, nn, expr_r.first);
		bool result2=get_character_from_the_string_that_is_expected_to_be_exactly_one_character_long(expr_r.last_expr, nn, expr_r.last);
		if(result1 && result2)
		{
			if(expr_r.first>expr_r.last)
			{
				cout << "Invalid 'range' expression at ";
				print_terminal_location(cout, expr_r.range_kw);
				cout << ": symbol at ";
				print_terminal_location(cout, expr_r.first_expr->symbol);
				cout << " is greater than symbol at ";
				print_terminal_location(cout, expr_r.last_expr->symbol);
				cout << ".\n";
				return false;
			}
			else
			{
			//	***** REMOVE IT: *****
			//	for(int j=expr_r.first; j<=expr_r.last; j++)
			//		data.charset.new_symbol(j);
				
				return true;
			}
		}
		else
			return false;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionContains))
	{
		NonterminalExpressionContains &expr_c=*dynamic_cast<NonterminalExpressionContains *>(expr);
		bool result=process_expression_proc(expr_c.expr, nn, false);
		if(nn>=0)
			data.nonterminals[nn].can_be_used_in_IN_expressions=false;
		return result;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionEpsilon))
	{
//		NonterminalExpressionEpsilon &expr_eps=*dynamic_cast<NonterminalExpressionEpsilon *>(expr);
		if(nn>=0)
			data.nonterminals[nn].can_be_used_in_IN_expressions=false;
		return true;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionSharpSign))
		return true;
	else if(typeid(*expr)==typeid(NonterminalExpressionSymbol))
	{
		NonterminalExpressionSymbol &expr_s=*dynamic_cast<NonterminalExpressionSymbol *>(expr);
		bool result=process_expression_proc(expr_s.expr, nn, flag);
		return result;
	}
	else
	{
		assert(false);
		return false; // to please the compiler
	}
}

bool process_expression_proc(NonterminalExpressionS *expr_s, int nn, bool flag)
{
	Terminal *t=expr_s->symbol;
	char *s=t->text;
	if(typeid(*t)==typeid(TerminalId))
	{
		int nn2=data.find_nonterminal(s);
		expr_s->is_nts=true;
		expr_s->nn=nn2;
		if(nn2==-1)
		{
			cout << "Reference to undefined nonterminal '" << s << "' at ";
			print_terminal_location(cout, t);
			cout << ".\n";
			return false;
		}
		
		if(nn>=0)
		{
			PathData &path=data.derivation_paths[nn][nn2];
			if(!path.v.size())
			{
			#ifdef DEBUG_DERIVATION_PATHS_MATRIX
				cout << "creating\n";
			#endif
				path.worst=(flag ? PathData::RIGHT : PathData::NORMAL);
				path.v.push_back(make_pair(t, path.worst));
			}
			else if(path.v.size() && path.worst==PathData::RIGHT && !flag)
			{
			#ifdef DEBUG_DERIVATION_PATHS_MATRIX
				cout << "replacing\n";
			#endif
				path.v.clear();
				path.worst=PathData::NORMAL;
				path.v.push_back(make_pair(t, path.worst));
			}
		#ifdef DEBUG_DERIVATION_PATHS_MATRIX
			else
				cout << "ignoring\n";
		#endif
		}
		
		return true;
	}
	else if(typeid(*t)==typeid(TerminalString))
	{
		expr_s->is_nts=false;
		bool result=decode_escape_sequences(s, expr_s->s);
		if(!result)
		{
			cout << "Ill-formed escape sequences in string at ";
			print_terminal_location(cout, t);
			cout << ".\n";
		}
		else
		{
			if(nn>=0)
			{
				if(expr_s->s.size()!=1)
					data.nonterminals[nn].can_be_used_in_IN_expressions=false;
			}
		}
		result=(check_whether_terminal_symbol_is_in_range(expr_s->s, t) && result);
		return result;
	}
	else if(typeid(*t)==typeid(TerminalNumber))
	{
		int tn=atoi(s);
		
		expr_s->is_nts=false;
		expr_s->s.push_back(tn);
		
		return check_whether_terminal_symbol_is_in_range(tn, t);
	}
	else if(typeid(*t)==typeid(TerminalHexNumber))
	{
		assert(s[0]=='0' && tolower(s[1])=='x' && s[2]);
		int tn;
		sscanf(s+2, "%x", &tn);
		
		expr_s->is_nts=false;
		expr_s->s.push_back(tn);
		
		return check_whether_terminal_symbol_is_in_range(tn, t);
	}
	else
	{
		assert(false);
		return false; // to please the compiler
	}
}

bool process_expression_proc(NonterminalExpressionC *expr, int nn)
{
	if(typeid(*expr)==typeid(NonterminalExpressionC_Disjunction))
	{
		NonterminalExpressionC_Disjunction &expr_d=*dynamic_cast<NonterminalExpressionC_Disjunction *>(expr);
		bool result1=process_expression_proc(expr_d.expr1, nn);
		bool result2=process_expression_proc(expr_d.expr2, nn);
		return result1 && result2;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionC_Conjunction))
	{
		NonterminalExpressionC_Conjunction &expr_c=*dynamic_cast<NonterminalExpressionC_Conjunction *>(expr);
		bool result1=process_expression_proc(expr_c.expr1, nn);
		bool result2=process_expression_proc(expr_c.expr2, nn);
		return result1 && result2;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionC_Complement))
	{
		NonterminalExpressionC_Complement &expr_com=*dynamic_cast<NonterminalExpressionC_Complement *>(expr);
		bool result=process_expression_proc(expr_com.expr, nn);
		return result;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionC_InParentheses))
	{
		NonterminalExpressionC_InParentheses &expr_p=*dynamic_cast<NonterminalExpressionC_InParentheses *>(expr);
		bool result=process_expression_proc(expr_p.expr, nn);
		return result;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionC_Comparison))
	{
		NonterminalExpressionC_Comparison &expr_cmp=*dynamic_cast<NonterminalExpressionC_Comparison *>(expr);
		
		NonterminalExpressionS &left_symbol=*dynamic_cast<NonterminalExpressionS *>(expr_cmp.left);
		Terminal *left_location=left_symbol.symbol;
		bool left_is_c=!strcmp(left_location->text, "c");
		
		NonterminalExpressionS &right_symbol=*dynamic_cast<NonterminalExpressionS *>(expr_cmp.right);
		Terminal *right_location=right_symbol.symbol;
		bool right_is_c=!strcmp(right_location->text, "c");
		
		if(left_is_c && right_is_c)
		{
			cout << "Expressions at ";
			print_terminal_location(cout, left_location);
			cout << " and at ";
			print_terminal_location(cout, right_location);
			cout << " cannot both be 'c'.\n";
			return false;
		}
		else if(!left_is_c && !right_is_c)
		{
			cout << "Either the expression at ";
			print_terminal_location(cout, left_location);
			cout << ", or the expression at ";
			print_terminal_location(cout, right_location);
			cout << " should be 'c'.\n";
			return false;
		}
		
		if(!strcmp(expr_cmp.comparison_operator->text, "=="))
			expr_cmp.actual_operation=NonterminalExpressionC_Comparison::EQ;
		else if(!strcmp(expr_cmp.comparison_operator->text, "!="))
			expr_cmp.actual_operation=NonterminalExpressionC_Comparison::NE;
		else if(!strcmp(expr_cmp.comparison_operator->text, "<"))
			expr_cmp.actual_operation=NonterminalExpressionC_Comparison::LT;
		else if(!strcmp(expr_cmp.comparison_operator->text, "<="))
			expr_cmp.actual_operation=NonterminalExpressionC_Comparison::LE;
		else if(!strcmp(expr_cmp.comparison_operator->text, ">"))
			expr_cmp.actual_operation=NonterminalExpressionC_Comparison::GT;
		else if(!strcmp(expr_cmp.comparison_operator->text, ">="))
			expr_cmp.actual_operation=NonterminalExpressionC_Comparison::GE;
		else
			assert(false);
		
		if(left_is_c)
			return get_character_from_the_string_that_is_expected_to_be_exactly_one_character_long(&right_symbol, nn, expr_cmp.symbol);
		else if(right_is_c)
		{
			expr_cmp.actual_operation=NonterminalExpressionC_Comparison::swap_operands(expr_cmp.actual_operation);
			return get_character_from_the_string_that_is_expected_to_be_exactly_one_character_long(&left_symbol, nn, expr_cmp.symbol);
		}
		else
		{
			assert(false);
			return false;
		}
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionC_In))
	{
		NonterminalExpressionC_In &expr_in=*dynamic_cast<NonterminalExpressionC_In *>(expr);
		if(strcmp(expr_in.c->symbol->text, "c"))
		{
			cout << "Expecting 'c' at ";
			print_terminal_location(cout, expr_in.c->symbol);
			cout << ".\n";
			return false;
		}
		expr_in.nn=data.find_nonterminal(expr_in.symbol->text);
		if(expr_in.nn==-1)
		{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -