⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 expand.cpp

📁 Full support for extended regular expressions (those with intersection and complement); Support for
💻 CPP
字号:

// To expand some expression E means
//
// i. to prove that L(E) is a subset of Sigma
//	and
// ii. to compute L(E), i.e. build the set of all terminals generated by E.

// To expand some nonterminal A means to expand the body of its definition.

#include "expand.h"
#include "dolphin.h"
#include "stl.h"
using namespace std;
using namespace Whale;

//#define DEBUG_EXPANDED_NONTERMINALS

UnionOfIntervals<int> expand_proc(NonterminalExpression *expr);
void expand_proc_ii(NonterminalExpression *expr);
UnionOfIntervals<int> expand_proc(NonterminalExpressionS *expr);
UnionOfIntervals<int> expand_proc(NonterminalExpressionC *expr);

UnionOfIntervals<int> expand_proc(NonterminalExpression *expr)
{
	if(typeid(*expr)==typeid(NonterminalExpressionDisjunction))
	{
		NonterminalExpressionDisjunction &expr_d=*dynamic_cast<NonterminalExpressionDisjunction *>(expr);
		return expand_proc(expr_d.expr1) | expand_proc(expr_d.expr2);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionConjunction))
	{
		NonterminalExpressionConjunction &expr_c=*dynamic_cast<NonterminalExpressionConjunction *>(expr);
		return expand_proc(expr_c.expr1) & expand_proc(expr_c.expr2);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionConcatenation))
		assert(false);
	else if(typeid(*expr)==typeid(NonterminalExpressionComplement))
		assert(false);
	else if(typeid(*expr)==typeid(NonterminalExpressionOmittable))
		assert(false);
	else if(typeid(*expr)==typeid(NonterminalExpressionInParentheses))
	{
		NonterminalExpressionInParentheses &expr_p=*dynamic_cast<NonterminalExpressionInParentheses *>(expr);
		return expand_proc(expr_p.expr);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionIteration))
		assert(false);
	else if(typeid(*expr)==typeid(NonterminalExpressionCondition))
	{
		NonterminalExpressionCondition &expr_cond=*dynamic_cast<NonterminalExpressionCondition *>(expr);
		
		return expand_proc(expr_cond.condition);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionRange))
	{
		NonterminalExpressionRange &expr_r=*dynamic_cast<NonterminalExpressionRange *>(expr);
		
		return UnionOfIntervals<int>(expr_r.first, expr_r.last);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionContains))
		assert(false);
	else if(typeid(*expr)==typeid(NonterminalExpressionEpsilon))
		assert(false);
	else if(typeid(*expr)==typeid(NonterminalExpressionSharpSign))
	{
		return UnionOfIntervals<int>(numeric_limits<int>::min(), numeric_limits<int>::max());
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionSymbol))
	{
		NonterminalExpressionSymbol &expr_s=*dynamic_cast<NonterminalExpressionSymbol *>(expr);
		return expand_proc(expr_s.expr);
	}
	else
		assert(false);
	
	return UnionOfIntervals<int>(); // to please the compiler
}

void expand_proc_ii(NonterminalExpression *expr)
{
	if(typeid(*expr)==typeid(NonterminalExpressionDisjunction))
	{
		NonterminalExpressionDisjunction &expr_d=*dynamic_cast<NonterminalExpressionDisjunction *>(expr);
		expand_proc_ii(expr_d.expr1);
		expand_proc_ii(expr_d.expr2);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionConjunction))
	{
		NonterminalExpressionConjunction &expr_c=*dynamic_cast<NonterminalExpressionConjunction *>(expr);
		expand_proc_ii(expr_c.expr1);
		expand_proc_ii(expr_c.expr2);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionConcatenation))
	{
		NonterminalExpressionConcatenation &expr_cat=*dynamic_cast<NonterminalExpressionConcatenation *>(expr);
		expand_proc_ii(expr_cat.expr1);
		expand_proc_ii(expr_cat.expr2);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionComplement))
	{
		NonterminalExpressionComplement &expr_com=*dynamic_cast<NonterminalExpressionComplement *>(expr);
		expand_proc_ii(expr_com.expr);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionOmittable))
	{
		NonterminalExpressionOmittable &expr_om=*dynamic_cast<NonterminalExpressionOmittable *>(expr);
		expand_proc_ii(expr_om.expr);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionInParentheses))
	{
		NonterminalExpressionInParentheses &expr_p=*dynamic_cast<NonterminalExpressionInParentheses *>(expr);
		expand_proc_ii(expr_p.expr);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionIteration))
	{
		NonterminalExpressionIteration &expr_it=*dynamic_cast<NonterminalExpressionIteration *>(expr);
		expand_proc_ii(expr_it.expr);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionCondition))
	{
		NonterminalExpressionCondition &expr_cond=*dynamic_cast<NonterminalExpressionCondition *>(expr);
		expr->expanded=new UnionOfIntervals<int>(expand_proc(expr_cond.condition));
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionRange))
	{
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionContains))
	{
		NonterminalExpressionContains &expr_cont=*dynamic_cast<NonterminalExpressionContains *>(expr);
		expand_proc_ii(expr_cont.expr);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionEpsilon))
	{
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionSharpSign))
	{
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionSymbol))
	{
	}
	else
		assert(false);
}

UnionOfIntervals<int> expand_proc(NonterminalExpressionS *expr)
{
	if(expr->is_nts)
	{
		NonterminalData &nonterminal=data.nonterminals[expr->nn];
		assert(nonterminal.expanded);
		return *nonterminal.expanded;
	}
	else
	{
		assert(expr->s.size()==1);
		return UnionOfIntervals<int>(expr->s[0]);
	}
}

UnionOfIntervals<int> expand_proc(NonterminalExpressionC *expr)
{
	if(typeid(*expr)==typeid(NonterminalExpressionC_Disjunction))
	{
		NonterminalExpressionC_Disjunction &expr_d=*dynamic_cast<NonterminalExpressionC_Disjunction *>(expr);
		return expand_proc(expr_d.expr1) | expand_proc(expr_d.expr2);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionC_Conjunction))
	{
		NonterminalExpressionC_Conjunction &expr_c=*dynamic_cast<NonterminalExpressionC_Conjunction *>(expr);
		return expand_proc(expr_c.expr1) & expand_proc(expr_c.expr2);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionC_Complement))
	{
		NonterminalExpressionC_Complement &expr_com=*dynamic_cast<NonterminalExpressionC_Complement *>(expr);
		return ~expand_proc(expr_com.expr);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionC_InParentheses))
	{
		NonterminalExpressionC_InParentheses &expr_p=*dynamic_cast<NonterminalExpressionC_InParentheses *>(expr);
		return expand_proc(expr_p.expr);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionC_Comparison))
	{
		NonterminalExpressionC_Comparison &expr_cmp=*dynamic_cast<NonterminalExpressionC_Comparison *>(expr);
		
		if(expr_cmp.actual_operation==NonterminalExpressionC_Comparison::EQ)
			return UnionOfIntervals<int>(expr_cmp.symbol);
		else if(expr_cmp.actual_operation==NonterminalExpressionC_Comparison::NE)
			return ~UnionOfIntervals<int>(expr_cmp.symbol);
		else if(expr_cmp.actual_operation==NonterminalExpressionC_Comparison::LT)
			return UnionOfIntervals<int>(numeric_limits<int>::min(), expr_cmp.symbol-1);
		else if(expr_cmp.actual_operation==NonterminalExpressionC_Comparison::LE)
			return UnionOfIntervals<int>(numeric_limits<int>::min(), expr_cmp.symbol);
		else if(expr_cmp.actual_operation==NonterminalExpressionC_Comparison::GT)
			return UnionOfIntervals<int>(expr_cmp.symbol+1, numeric_limits<int>::max());
		else if(expr_cmp.actual_operation==NonterminalExpressionC_Comparison::GE)
			return UnionOfIntervals<int>(expr_cmp.symbol, numeric_limits<int>::max());
		else
			assert(false);
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionC_In))
	{
		NonterminalExpressionC_In &expr_in=*dynamic_cast<NonterminalExpressionC_In *>(expr);
		NonterminalData &nonterminal=data.nonterminals[expr_in.nn];
		assert(nonterminal.expanded);
		return *nonterminal.expanded;
	}
	else if(typeid(*expr)==typeid(NonterminalExpressionC_Constant))
	{
		NonterminalExpressionC_Constant &expr_const=*dynamic_cast<NonterminalExpressionC_Constant *>(expr);
		
		if(expr_const.value) // true
			return UnionOfIntervals<int>(numeric_limits<int>::min(), numeric_limits<int>::max());
		else // false
			return UnionOfIntervals<int>();
	}
	else
		assert(false);
	
	return UnionOfIntervals<int>(); // to please the compiler
}

bool expand_what_can_be_expanded()
{
	int n=data.nonterminals.size();
	
	// making a set of all nonterminals derivable from 'in' expressions.
	bool *nonterminals_in_question=new bool[n];
	int number_of_nonterminals_in_question=0;
	for(int i=0; i<n; i++)
	{
		bool value=false;
		
		if(data.nonterminals[i].is_used_in_IN_expressions)
			value=true;
		else
			for(int j=0; j<n; j++)
				if(j!=i && data.derivation_paths[j][i].v.size()
					&& data.nonterminals[j].is_used_in_IN_expressions)
				{
					value=true;
					break;
				}
		
		nonterminals_in_question[i]=value;
		if(value) number_of_nonterminals_in_question++;
	}

#ifdef DEBUG_EXPANDED_NONTERMINALS
	if(number_of_nonterminals_in_question)
		cout << "Expanding " << number_of_nonterminals_in_question << " nonterminals\n";
#endif
	
	// expanding (replacing with sets of terminals) all nonterminals
	// one by one.
	while(number_of_nonterminals_in_question)
	{
		// searching for a nonterminal that is ready to be expanded
		// (i.e. all nonterminals referenced by this one have already
		// been expanded)
		
		int nn=-1;
		for(int i=0; i<n; i++)
		{
			if(!nonterminals_in_question[i]) continue;
			
			bool it_is_ready=true;
			for(int j=0; j<n; j++)
				if(nonterminals_in_question[j] &&
					data.derivation_paths[i][j].v.size())
				{
					it_is_ready=false;
					break;
				}
			if(it_is_ready)
			{
				nn=i;
				break;
			}
		}
		
		if(nn==-1)
		{
			cout << "\n\n\texpand_what_can_be_expanded():";
			cout << "Found a previously undetected error in grammar.\n";
			cout << "Something's wrong around nonterminal" << (number_of_nonterminals_in_question==1 ? " " : "s ");
			int p=0;
			for(int i=0; i<n; i++)
				if(nonterminals_in_question[i])
				{
					if(p==number_of_nonterminals_in_question-1)
						cout << " and ";
					else if(p)
						cout << ", ";
					p++;
					cout << data.nonterminals[i].name;
				}
			cout << ".\n\n";
			assert(false);
		}
		
	#ifdef DEBUG_EXPANDED_NONTERMINALS
		cout << "processing nonterminal " << data.nonterminals[nn].name << "\n";
	#endif
		
		assert(data.nonterminals[nn].rules.size()==1);
		data.nonterminals[nn].expanded=new UnionOfIntervals<int>(expand_proc(data.nonterminals[nn].rules[0]->right));
		
	#ifdef DEBUG_EXPANDED_NONTERMINALS
		cout << *(data.nonterminals[nn].expanded) << "\n";
	#endif
		
		assert(nonterminals_in_question[nn]);
		nonterminals_in_question[nn]=false;
		number_of_nonterminals_in_question--;
	}
	
	// All nonterminals used in 'in' expressions have been expanded.
	//
	// Now making a pass through all rules and actions (except rules for
	// those nonterminals that have already been expanded), expanding the
	// following:
	// i. All 'condition' statements;
	// ii. All subexpressions that can be expanded;
	// iii. All nonterminals that can be expanded.
	for(int i=0; i<n; i++)
		if(!data.nonterminals[i].expanded)
			expand_proc_ii(data.nonterminals[i].rules[0]->right);
	for(int i=0; i<data.recognized_expressions.size(); i++)
	{
		RecognizedExpressionData &re=data.recognized_expressions[i];
		if(re.is_special) continue;
		
		expand_proc_ii(re.expr);
		if(re.lookahead)
			expand_proc_ii(re.lookahead);
	}
	
	return true;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -