⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 process.cpp

📁 Full support for extended regular expressions (those with intersection and complement); Support for
💻 CPP
📖 第 1 页 / 共 3 页
字号:
			if(typeid(*action_statement.expr)==typeid(NonterminalPairOfExpressions))
			{
				NonterminalPairOfExpressions &poe=*dynamic_cast<NonterminalPairOfExpressions *>(action_statement.expr);
				
				if(!process_expression_proc(poe.expr, -1, true)) { flag=false; continue; }
				string str=serialize_expression(poe.expr);
				
				if(poe.lookahead)
				{
					if(!process_expression_proc(poe.lookahead, -1, true)) { flag=false; continue; }
					str+=" / "+serialize_expression(poe.lookahead);
				}
				if(poe.eof)
				{
					if(!poe.lookahead)
						str+=" / ";
					else
						str+=" ";
					if(poe.not_eof) str+="~";
					str+="eof";
				}
				
				s=strdup(str.c_str());
				is_special=false;
			}
			else if(typeid(*action_statement.expr)==typeid(TerminalKwEof))
			{
				s=strdup("eof");
				is_special=true;
			}
			else if(typeid(*action_statement.expr)==typeid(TerminalKwError))
			{
				s=strdup("error");
				is_special=true;
			}
			
			int ren;
			if(!data.recognized_expression_search.count(s))
			{
				ren=data.recognized_expressions.size();
				
				RecognizedExpressionData re;
				if(!is_special)
				{
					NonterminalPairOfExpressions &poe=*dynamic_cast<NonterminalPairOfExpressions *>(action_statement.expr);
					re.expr=poe.expr;
					re.lookahead=poe.lookahead;
					if(poe.eof)
						re.lookahead_eof=(poe.not_eof ? -1 : 1);
					else
						re.lookahead_eof=0;
				}
				else
				{
					re.expr=NULL;
					re.lookahead=NULL;
					re.lookahead_eof=0;
				}
				re.is_special=is_special;
				re.in_serialized_form=s;
				data.recognized_expressions.push_back(re);
				data.recognized_expression_search[re.in_serialized_form]=ren;
				
			#ifdef PRINT_SERIALIZED_EXPRESSIONS
				cout << s << "\n";
			#endif
			}
			else
			{
				ren=data.recognized_expression_search[s];
				delete s;
			}
			
			ActionData action;
			action.is_special=is_special;
			action.declaration=&action_statement;
			action.recognized_expression_number=ren;
			
			if(action_statement.start_conditions)
			{
				NonterminalStartConditionsExpression *sce=action_statement.start_conditions;
				NonterminalStartConditionsExpressionList *scel=dynamic_cast<NonterminalStartConditionsExpressionList *>(sce);
				NonterminalStartConditionsExpressionAsterisk *scea=dynamic_cast<NonterminalStartConditionsExpressionAsterisk *>(sce);
				assert(!scel || !scea);
				if(scel)
				{
					for(int k=0; k<scel->names.size(); k++)
					{
						Terminal *t=scel->names[k];
						int sc=data.find_start_condition(t->text);
						if(sc==-1)
						{
							cout << "Reference to undefined start condition '";
							cout << t->text << "' at ";
							print_terminal_location(cout, t);
							cout << ".\n";
							flag=false;
						}
						else if(action.start_condition_numbers.count(sc))
						{
							// duplicate declaration.
							// looking back to report error.
							for(int l=0; l<k; l++)
								if(!strcmp(scel->names[l]->text, t->text))
								{
									cout << "Warning: duplicate start condition ignored at ";
									print_terminal_location(cout, t);
									cout << " (earlier occurence at ";
									print_terminal_location(cout, scel->names[l]);
									cout << ").\n";
								}
						}
						else
							action.start_condition_numbers.insert(sc);
					}
				}
				else if(scea)
					for(int j=0; j<data.start_conditions.size(); j++)
						action.start_condition_numbers.insert(j);
				else
					assert(false);
			}
			else
			{
				// using default set of start conditions.
				
			#if 0
				// <INITIAL>
				action.start_condition_numbers.insert(0);
			#else
				// <*>
				for(int j=0; j<data.start_conditions.size(); j++)
					action.start_condition_numbers.insert(j);
			#endif
			}
			
			int action_number=data.actions.size();
			data.actions.push_back(action);
			
			data.recognized_expressions[ren].action_numbers.push_back(action_number);
			
			if(typeid(*action_statement.action)==typeid(NonterminalActionCodeII))
			{
				NonterminalActionCodeII &a_code2=*dynamic_cast<NonterminalActionCodeII *>(action_statement.action);
				char *s=a_code2.code->text;
				int k=(s[0]=='{' ? 1 : 2);
				a_code2.s=string(s+k, s+strlen(s)-k);
			}
			else if(typeid(*action_statement.action)==typeid(NonterminalActionReturn))
			{
			}
			else
				assert(false);
		}
	}
	
	if(!flag) return false;
	
	// at this point we have information on all one-step derivations stored
	// in data.derivation_paths. Performing a transitive closure of this
	// matrix.
	for(;;)
	{
		bool repeat_flag=false;
		
		for(int i=0; i<n; i++)
			for(int j=0; j<n; j++)
			{
				if(i==j) continue;
				PathData &m_ij=data.derivation_paths[i][j];
				if(!m_ij.v.size()) continue;
				
				for(int k=0; k<n; k++)
				{
					if(k==j) continue;
					PathData &m_jk=data.derivation_paths[j][k];
					if(!m_jk.v.size()) continue;
					
					PathData &m_ik=data.derivation_paths[i][k];
					
					if(!m_ik.v.size())
					{
						// Case 1. m_ik is empty ==>
						// just merge m_ij and m_jk.
						
					#ifdef DEBUG_DERIVATION_PATHS_MATRIX
						cout << "Case 1: " << i << ", " << j << ", " << k << "\n";
					#endif
						copy(m_ij.v.begin(), m_ij.v.end(), back_inserter(m_ik.v));
						copy(m_jk.v.begin(), m_jk.v.end(), back_inserter(m_ik.v));
						m_ik.worst=max(m_ij.worst, m_jk.worst);
						repeat_flag=true;
					}
					else if(m_ik.v.size() && m_ik.worst<max(m_ij.worst, m_jk.worst))
					{
						// Case 2. m_ik already contains
						// a recursion path, but a worse
						// one is actually possible ==>
						// replace a better path with a
						// worse.
						
					#ifdef DEBUG_DERIVATION_PATHS_MATRIX
						cout << "Case 2: " << i << ", " << j << ", " << k << "\n";
					#endif
						m_ik.v.clear();
						copy(m_ij.v.begin(), m_ij.v.end(), back_inserter(m_ik.v));
						copy(m_jk.v.begin(), m_jk.v.end(), back_inserter(m_ik.v));
						m_ik.worst=max(m_ij.worst, m_jk.worst);
						repeat_flag=true;
					}
				}
			}
		if(!repeat_flag)
			break;
	#ifdef DEBUG_DERIVATION_PATHS_MATRIX
		cout << "repeat\n";
	#endif
	}
	
#ifdef DEBUG_DERIVATION_PATHS_MATRIX
	print_derivation_paths();
#endif
	
	for(int i=0; i<n; i++)
	{
		PathData &m_ii=data.derivation_paths[i][i];
		if(!m_ii.v.size()) continue;
		
		if(m_ii.v[0].second==PathData::IN_EXPRESSION)
		{
			PathData path=m_ii;
			cout << "Recursive derivation path (";
			print_derivation_path(cout, path, i);
			cout << ") contains 'in' expression.\n";
			flag=false;
		}
	}
	
	if(!flag) return false;
	
	for(int i=0; i<n; i++)
	{
		NonterminalData &nonterminal=data.nonterminals[i];
		if(!nonterminal.is_used_in_IN_expressions) continue;
		
		int nn2;
		if(data.derivation_paths[i][i].v.size())
		{
			nn2=i;
		}
		else
		{
			nn2=-1;
//			int l=(1 << (sizeof(int)*8-2));
			for(int j=0; j<n; j++)
				if(data.derivation_paths[j][j].v.size()
					&& data.derivation_paths[i][j].v.size())
				{
					nn2=j;
					break;
				}
			if(nn2==-1) continue;
		}
		
		int k=nonterminal.where_it_is_used_in_IN_expressions.size();
		assert(k);
		cout << "Nonterminal " << nonterminal.name << ", used in 'in' expression" << (k==1 ? " " : "s ");
		print_a_number_of_terminal_locations(cout, nonterminal.where_it_is_used_in_IN_expressions, "at");
		if(i!=nn2)
		{
			cout << ", can expand (the derivation path is ";
			print_derivation_path(cout, data.derivation_paths[i][nn2], i);
			cout << ") to nonterminal " << data.nonterminals[nn2].name;
			cout << ", which ";
		}
		else
			cout << ", ";
		cout << "is self-embedding (consider a recursive derivation path ";
		print_derivation_path(cout, data.derivation_paths[nn2][nn2], nn2);
		cout << ").\n";
		flag=false;
	}
	
	if(!flag) return false;
	
	for(int i=0; i<n; i++)
	{
		NonterminalData &nonterminal=data.nonterminals[i];
		if(!nonterminal.is_used_in_IN_expressions) continue;
		
		int nn2;
		if(!nonterminal.can_be_used_in_IN_expressions)
			nn2=i;
		else
		{
			nn2=-1;
			int l=(1 << (sizeof(int)*8-2));
			for(int j=0; j<n; j++)
			{
				int this_l=data.derivation_paths[i][j].v.size();
				if(!data.nonterminals[j].is_used_in_IN_expressions
					&& !data.nonterminals[j].can_be_used_in_IN_expressions
					&& this_l>0 && this_l<l)
				{
					nn2=j;
					l=this_l;
				}
			}
			if(nn2==-1) continue;
		}
		
		int k=nonterminal.where_it_is_used_in_IN_expressions.size();
		assert(k);
		cout << "Nonterminal " << nonterminal.name << ", used in 'in' expression" << (k==1 ? " " : "s ");
		print_a_number_of_terminal_locations(cout, nonterminal.where_it_is_used_in_IN_expressions, "at");
		if(i!=nn2)
		{
			cout << ", can expand (the derivation path is ";
			print_derivation_path(cout, data.derivation_paths[i][nn2], i);
			cout << ") to nonterminal " << data.nonterminals[nn2].name;
			cout << ", which ";
		}
		else
			cout << ", ";
		
		int k2=data.nonterminals[nn2].locations_of_offending_rules.size();
		assert(k2);
		cout << "can possibly expand to a string that is other than one character long (offending rule" << (k2==1 ? " " : "s ");
		print_a_number_of_terminal_locations(cout, data.nonterminals[nn2].locations_of_offending_rules, "at");
		cout << ").\n";
		
		flag=false;
	}
	
	if(!flag) return false;
	
	// ensuring that there are no recursive symbols except strictly
	// right recursive.
	set<int> already_reported;
	for(int i=0; i<n; i++)
	{
		PathData &m_ii=data.derivation_paths[i][i];
		
		if(m_ii.v.size() && m_ii.worst>PathData::RIGHT &&
			!already_reported.count(i))
		{
			cout << "Recursion made up by derivation path (";
			print_derivation_path(cout, m_ii, i, PathData::RIGHT);
			cout << ") is rendered invalid by transitions marked "
				"with braces.\n";
			
			for(int j=0; j<m_ii.v.size(); j++)
			{
				int nn=data.find_nonterminal(m_ii.v[j].first->text);
				assert(nn!=-1);
				already_reported.insert(nn);
			}
			
			flag=false;
		}
	}
	
	if(!flag) return false;
	
	// looking for collisions between different actions for one expression.
	for(int i=0; i<data.recognized_expressions.size(); i++)
	{
		RecognizedExpressionData &re=data.recognized_expressions[i];
//		cout << "Recognized expression " << re.in_serialized_form << ": "
//			<< re.action_numbers << "\n";
		
		bool conflicts_found=false;
		set<int> conflicting_actions;
		set<int> start_conditions_involved;
		
		for(int j=0; j<data.start_conditions.size(); j++)
		{
			set<int> local_conflicting_actions;
			for(int k=0; k<re.action_numbers.size(); k++)
				if(data.actions[re.action_numbers[k]].start_condition_numbers.count(j))
					local_conflicting_actions.insert(re.action_numbers[k]);
			
			if(!local_conflicting_actions.size())
				re.start_condition_to_action_number.push_back(-1);
			else if(local_conflicting_actions.size()==1)
				re.start_condition_to_action_number.push_back(*local_conflicting_actions.begin());
			else
			{
				conflicts_found=true;
				union_assign(conflicting_actions, local_conflicting_actions);
				start_conditions_involved.insert(j);
				re.start_condition_to_action_number.push_back(-1); // doesn't matter
			}
		}
		
		if(conflicts_found)
		{
			assert(conflicting_actions.size()>1);
			bool in_all_start_conditions=(start_conditions_involved.size()==data.start_conditions.size());
			
			if(in_all_start_conditions)
				cout << "Multiple actions ";
			else
				cout << "Actions ";
			cout << "for expression ";
			cout << re.in_serialized_form;
			cout << ", defined at ";
			
			vector<Terminal *> v;
			for(set<int>::iterator p=conflicting_actions.begin(); p!=conflicting_actions.end(); p++)
				v.push_back(data.actions[*p].declaration->arrow);
			print_a_number_of_terminal_locations(cout, v, "at");
			
                        if(in_all_start_conditions)
                        	cout << ".\n";
			else
			{
				bool more_than_one_sc=(start_conditions_involved.size()>1);
				cout << " overlap in start condition";
				if(more_than_one_sc) cout << "s";
				cout << " ";
				print_a_number_of_start_conditions(cout, start_conditions_involved);
				cout << ".\n";
			}
			
			flag=false;
		}
	}
	
	if(!flag) return false;
	
	// The grammar seems to be correct. For each nonterminal, merging all
	// its rules into one.
	for(int i=0; i<n; i++)
	{
		NonterminalData &nonterminal=data.nonterminals[i];
		while(nonterminal.rules.size()>1)
		{
			int p2=nonterminal.rules.size()-1;
			
			NonterminalExpressionDisjunction *new_node=new NonterminalExpressionDisjunction;
			new_node->expr1=nonterminal.rules[0]->right;
			new_node->expr2=nonterminal.rules[p2]->right;
			nonterminal.rules[0]->right=new_node;
			
			// A few bytes of memory left hanging won't do any harm.
			nonterminal.rules.pop_back();
		}
	}
	
	return true;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -