📄 nfatodfaconverter.java

📁 ANTLR(ANother Tool for Language Recognition)它是这样的一种工具
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
	 *  if COLLAPSE_ALL_INCIDENT_EDGES, however, try to merge all edges from	 *  d to targetState; this means merging their labels.  Another optimization	 *  is to reduce to a single EOT edge any set of edges from d to targetState	 *  where there exists an EOT state.  EOT is like the wildcard so don't	 *  bother to test any other edges.  Example:	 *	 *  NUM_INT	 *    : '1'..'9' ('0'..'9')* ('l'|'L')?     *    | '0' ('x'|'X') ('0'..'9'|'a'..'f'|'A'..'F')+ ('l'|'L')?     *    | '0' ('0'..'7')* ('l'|'L')?	 *    ;	 *	 *  The normal decision to predict alts 1, 2, 3 is:	 *	 *  if ( (input.LA(1)>='1' && input.LA(1)<='9') ) {     *       alt7=1;     *  }     *  else if ( input.LA(1)=='0' ) {     *      if ( input.LA(2)=='X'||input.LA(2)=='x' ) {     *          alt7=2;     *      }     *      else if ( (input.LA(2)>='0' && input.LA(2)<='7') ) {     *           alt7=3;     *      }     *      else if ( input.LA(2)=='L'||input.LA(2)=='l' ) {     *           alt7=3;     *      }     *      else {     *           alt7=3;     *      }     *  }     *  else error	 *     *  Clearly, alt 3 is predicted with extra work since it tests 0..7	 *  and [lL] before finally realizing that any character is actually	 *  ok at k=2.	 *	 *  A better decision is as follows:     *	 *  if ( (input.LA(1)>='1' && input.LA(1)<='9') ) {	 *      alt7=1;	 *  }	 *  else if ( input.LA(1)=='0' ) {	 *      if ( input.LA(2)=='X'||input.LA(2)=='x' ) {	 *          alt7=2;	 *      }	 *      else {	 *          alt7=3;	 *      }	 *  }	 *	 *  The DFA originally has 3 edges going to the state the predicts alt 3,	 *  but upon seeing the EOT edge (the "else"-clause), this method	 *  replaces the old merged label (which would have (0..7|l|L)) with EOT.	 *  The code generator then leaves alt 3 predicted with a simple else-	 *  clause. :)	 *	 *  The only time the EOT optimization makes no sense is in the Tokens	 *  rule.  We want EOT to truly mean you have matched an entire token	 *  so don't bother actually rewinding to execute that rule unless there	 *  are actions in that rule.  For now, since I am not preventing	 *  backtracking from Tokens rule, I will simply allow the optimization.	 */	protected static int addTransition(DFAState d,									   Label label,									   DFAState targetState,									   Map targetToLabelMap)	{		//System.out.println(d.stateNumber+"-"+label.toString(dfa.nfa.grammar)+"->"+targetState.stateNumber);		int n = 0;		if ( DFAOptimizer.COLLAPSE_ALL_PARALLEL_EDGES ) {			// track which targets we've hit			Integer tI = Utils.integer(targetState.stateNumber);			Transition oldTransition = (Transition)targetToLabelMap.get(tI);			if ( oldTransition!=null ) {				//System.out.println("extra transition to "+tI+" upon "+label.toString(dfa.nfa.grammar));				// already seen state d to target transition, just add label				// to old label unless EOT				if ( label.getAtom()==Label.EOT ) {					// merge with EOT means old edge can go away					oldTransition.label = new Label(Label.EOT);				}				else {					// don't add anything to EOT, it's essentially the wildcard					if ( oldTransition.label.getAtom()!=Label.EOT ) {						// ok, not EOT, add in this label to old label						oldTransition.label.add(label);					}					//System.out.println("label updated to be "+oldTransition.label.toString(dfa.nfa.grammar));				}			}			else {				// make a transition from d to t upon 'a'				n = 1;				label = (Label)label.clone(); // clone in case we alter later				int transitionIndex = d.addTransition(targetState, label);				Transition trans = d.getTransition(transitionIndex);				// track target/transition pairs				targetToLabelMap.put(tI, trans);			}		}		else {			n = 1;			d.addTransition(targetState, label);		}		return n;	}	/** For all NFA states (configurations) merged in d,	 *  compute the epsilon closure; that is, find all NFA states reachable	 *  from the NFA states in d via purely epsilon transitions.	 */	public void closure(DFAState d) {		if ( debug ) {			System.out.println("closure("+d+")");		}		Set configs = new HashSet();		// Because we are adding to the configurations in closure		// must clone initial list so we know when to stop doing closure		// TODO: expensive, try to get around this alloc / copy		configs.addAll(d.getNFAConfigurations());		// for each NFA configuration in d (abort if we detect non-LL(*) state)		Iterator iter = configs.iterator();		while (!d.abortedDueToMultipleRecursiveAlts && iter.hasNext() ) {			NFAConfiguration c = (NFAConfiguration)iter.next();			if ( c.singleAtomTransitionEmanating ) {				continue; // ignore NFA states w/o epsilon transitions			}			//System.out.println("go do reach for NFA state "+c.state);			// figure out reachable NFA states from each of d's nfa states			// via epsilon transitions			closure(dfa.nfa.getState(c.state),					c.alt,					c.context,					c.semanticContext,					d,					false);		}		d.closureBusy = null; // wack all that memory used during closure	}	/** Where can we get from NFA state p traversing only epsilon transitions?	 *  Add new NFA states + context to DFA state d.  Also add semantic	 *  predicates to semantic context if collectPredicates is set.  We only	 *  collect predicates at hoisting depth 0, meaning before any token/char	 *  have been recognized.  This corresponds, during analysis, to the	 *  initial DFA start state construction closure() invocation.	 *	 *  There are four cases of interest (the last being the usual transition):	 *	 *   1. Traverse an edge that takes us to the start state of another	 *      rule, r.  We must push this state so that if the DFA	 *      conversion hits the end of rule r, then it knows to continue	 *      the conversion at state following state that "invoked" r. By	 *      construction, there is a single transition emanating from a rule	 *      ref node.	 *	 *   2. Reach an NFA state associated with the end of a rule, r, in the	 *      grammar from which it was built.  We must add an implicit (i.e.,	 *      don't actually add an epsilon transition) epsilon transition	 *      from r's end state to the NFA state following the NFA state	 *      that transitioned to rule r's start state.  Because there are	 *      many states that could reach r, the context for a rule invocation	 *      is part of a call tree not a simple stack.  When we fall off end	 *      of rule, "pop" a state off the call tree and add that state's	 *      "following" node to d's NFA configuration list.  The context	 *      for this new addition will be the new "stack top" in the call tree.	 *	 *   3. Like case 2, we reach an NFA state associated with the end of a	 *      rule, r, in the grammar from which NFA was built.  In this case,	 *      however, we realize that during this NFA->DFA conversion, no state	 *      invoked the current rule's NFA.  There is no choice but to add	 *      all NFA states that follow references to r's start state.  This is	 *      analogous to computing the FOLLOW(r) in the LL(k) world.  By	 *      construction, even rule stop state has a chain of nodes emanating	 *      from it that points to every possible following node.  This case	 *      is conveniently handled then by the 4th case.	 *	 *   4. Normal case.  If p can reach another NFA state q, then add	 *      q to d's configuration list, copying p's context for q's context.	 *      If there is a semantic predicate on the transition, then AND it	 *      with any existing semantic context.	 *	 *   Current state p is always added to d's configuration list as it's part	 *   of the closure as well.	 *	 *  When is a closure operation in a cycle condition?  While it is	 *  very possible to have the same NFA state mentioned twice	 *  within the same DFA state, there are two situations that	 *  would lead to nontermination of closure operation:	 *	 *  o   Whenever closure reaches a configuration where the same state	 *      with same or a suffix context already exists.  This catches	 *      the IF-THEN-ELSE tail recursion cycle and things like	 *	 *      a : A a | B ;	 *	 *      the context will be $ (empty stack).	 *	 *      We have to check	 *      larger context stacks because of (...)+ loops.  For	 *      example, the context of a (...)+ can be nonempty if the	 *      surrounding rule is invoked by another rule:	 *	 *      a : b A | X ;	 *      b : (B|)+ ;  // nondeterministic by the way	 *	 *      The context of the (B|)+ loop is "invoked from item	 *      a : . b A ;" and then the empty alt of the loop can reach back	 *      to itself.  The context stack will have one "return	 *      address" element and so we must check for same state, same	 *      context for arbitrary context stacks.	 *	 *      Idea: If we've seen this configuration before during closure, stop.	 *      We also need to avoid reaching same state with conflicting context.	 *      Ultimately analysis would stop and we'd find the conflict, but we	 *      should stop the computation.  Previously I only checked for	 *      exact config.  Need to check for same state, suffix context	 * 		not just exact context.	 *	 *  o   Whenever closure reaches a configuration where state p	 *      is present in its own context stack.  This means that	 *      p is a rule invocation state and the target rule has	 *      been called before.  NFAContext.MAX_RECURSIVE_INVOCATIONS	 *      (See the comment there also) determines how many times	 *      it's possible to recurse; clearly we cannot recurse forever.	 *      Some grammars such as the following actually require at	 *      least one recursive call to correctly compute the lookahead:	 *	 *      a : L ID R	 *        | b	 *        ;	 *      b : ID	 *        | L a R	 *        ;	 *	 *      Input L ID R is ambiguous but to figure this out, ANTLR	 *      needs to go a->b->a->b to find the L ID sequence.	 *	 *      Do not allow closure to add a configuration that would	 *      allow too much recursion.	 *	 *      This case also catches infinite left recursion.	 */	public void closure(NFAState p,						int alt,						NFAContext context,						SemanticContext semanticContext,						DFAState d,						boolean collectPredicates)	{		if ( debug ){			System.out.println("closure at NFA state "+p.stateNumber+"|"+							   alt+" filling DFA state "+d.stateNumber+" with context "+context							   );		}		if ( d.abortedDueToMultipleRecursiveAlts ) {			// keep walking back out, we're in the process of terminating			// this closure operation on NFAState p contained with DFAState d			return;		}		/* NOTE SURE WE NEED THIS FAILSAFE NOW 11/8/2006 and it complicates		   MY ALGORITHM TO HAVE TO ABORT ENTIRE DFA CONVERSION		if ( DFA.MAX_TIME_PER_DFA_CREATION>0 &&			 System.currentTimeMillis() - d.dfa.conversionStartTime >=			 DFA.MAX_TIME_PER_DFA_CREATION )		{			// report and back your way out; we've blown up somehow			terminateConversion = true;			dfa.probe.reportEarlyTermination();			return;		}		*/		NFAConfiguration proposedNFAConfiguration =				new NFAConfiguration(p.stateNumber,						alt,						context,						semanticContext);		// Avoid infinite recursion		if ( closureIsBusy(d, proposedNFAConfiguration) ) {			if ( debug ) {				System.out.println("avoid visiting exact closure computation NFA config: "+proposedNFAConfiguration);				System.out.println("state is "+d.dfa.decisionNumber+"."+d);			}			return;		}		// set closure to be busy for this NFA configuration		d.closureBusy.add(proposedNFAConfiguration);		// p itself is always in closure		d.addNFAConfiguration(p, proposedNFAConfiguration);		// Case 1: are we a reference to another rule?		Transition transition0 = p.transition(0);		if ( transition0 instanceof RuleClosureTransition ) {			int depth = context.recursionDepthEmanatingFromState(p.stateNumber);			// Detect recursion by more than a single alt, which indicates			// that the decision's lookahead language is non-regular; terminate			if ( depth == 1 && d.dfa.getUserMaxLookahead()==0 ) { // k=* only			//if ( depth >= NFAContext.MAX_SAME_RULE_INVOCATIONS_PER_NFA_CONFIG_STACK ) {				d.dfa.recursiveAltSet.add(alt); // indicate that this alt is recursive				if ( d.dfa.recursiveAltSet.size()>1 ) {					//System.out.println("recursive alts: "+d.dfa.recursiveAltSet.toString());					d.abortedDueToMultipleRecursiveAlts = true;					return;				}				/*				System.out.println("alt "+alt+" in rule "+p.enclosingRule+" dec "+d.dfa.decisionNumber+					" ctx: "+context);				System.out.println("d="+d);				*/			}			// Detect an attempt to recurse too high			// if this context has hit the max recursions for p.stateNumber,			// don't allow it to enter p.stateNumber again			if ( depth >= NFAContext.MAX_SAME_RULE_INVOCATIONS_PER_NFA_CONFIG_STACK ) {				/*				System.out.println("OVF state "+d);				System.out.println("proposed "+proposedNFAConfiguration);				*/
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -