📄 nfatodfaconverter.java

📁 antlr最新版本V3源代码
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
			// reporting must be able to compute the path from			// start to the error state with infinite recursion			dfa.setState(d.stateNumber, existingState);			return existingState;		}		// if not there, then examine new state.		// resolve syntactic conflicts by choosing a single alt or        // by using semantic predicates if present.        resolveNonDeterminisms(d);        // If deterministic, don't add this state; it's an accept state        // Just return as a valid DFA state		int alt = d.getUniquelyPredictedAlt();		if ( alt!=NFA.INVALID_ALT_NUMBER ) { // uniquely predicts an alt?			d = convertToAcceptState(d, alt);			/*			System.out.println("convert to accept; DFA "+d.dfa.decisionNumber+" state "+d.stateNumber+" uniquely predicts alt "+				d.getUniquelyPredictedAlt());				*/		}		else {            // unresolved, add to work list to continue NFA conversion            work.add(d);        }        return d;    }	protected DFAState convertToAcceptState(DFAState d, int alt) {		// only merge stop states if they are deterministic and no		// recursion problems and only if they have the same gated pred		// context!		// Later, the error reporting may want to trace the path from		// the start state to the nondet state		if ( DFAOptimizer.MERGE_STOP_STATES &&			d.getNonDeterministicAlts()==null &&			!d.abortedDueToRecursionOverflow &&			!d.abortedDueToMultipleRecursiveAlts )		{			// check to see if we already have an accept state for this alt			// [must do this after we resolve nondeterminisms in general]			DFAState acceptStateForAlt = dfa.getAcceptState(alt);			if ( acceptStateForAlt!=null ) {				// we already have an accept state for alt;				// Are their gate sem pred contexts the same?				// For now we assume a braindead version: both must not				// have gated preds or share exactly same single gated pred.				// The equals() method is only defined on Predicate contexts not				// OR etc...				SemanticContext gatedPreds = d.getGatedPredicatesInNFAConfigurations();				SemanticContext existingStateGatedPreds =					acceptStateForAlt.getGatedPredicatesInNFAConfigurations();				if ( (gatedPreds==null && existingStateGatedPreds==null) ||				     ((gatedPreds!=null && existingStateGatedPreds!=null) &&					  gatedPreds.equals(existingStateGatedPreds)) )				{					// make this d.statenumber point at old DFA state					dfa.setState(d.stateNumber, acceptStateForAlt);					dfa.removeState(d);    // remove this state from unique DFA state set					d = acceptStateForAlt; // use old accept state; throw this one out					return d;				}				// else consider it a new accept state; fall through.			}			d.setAcceptState(true); // new accept state for alt			dfa.setAcceptState(alt, d);			return d;		}		d.setAcceptState(true); // new accept state for alt		dfa.setAcceptState(alt, d);		return d;	}	/** If > 1 NFA configurations within this DFA state have identical	 *  NFA state and context, but differ in their predicted	 *  TODO update for new context suffix stuff 3-9-2005	 *  alternative then a single input sequence predicts multiple alts.	 *  The NFA decision is therefore syntactically indistinguishable	 *  from the left edge upon at least one input sequence.  We may	 *  terminate the NFA to DFA conversion for these paths since no	 *  paths emanating from those NFA states can possibly separate	 *  these conjoined twins once interwined to make things	 *  deterministic (unless there are semantic predicates; see below).	 *	 *  Upon a nondeterministic set of NFA configurations, we should	 *  report a problem to the grammar designer and resolve the issue	 *  by aribitrarily picking the first alternative (this usually	 *  ends up producing the most natural behavior).  Pick the lowest	 *  alt number and just turn off all NFA configurations	 *  associated with the other alts. Rather than remove conflicting	 *  NFA configurations, I set the "resolved" bit so that future	 *  computations will ignore them.  In this way, we maintain the	 *  complete DFA state with all its configurations, but prevent	 *  future DFA conversion operations from pursuing undesirable	 *  paths.  Remember that we want to terminate DFA conversion as	 *  soon as we know the decision is deterministic *or*	 *  nondeterministic.	 *	 *  [BTW, I have convinced myself that there can be at most one	 *  set of nondeterministic configurations in a DFA state.  Only NFA	 *  configurations arising from the same input sequence can appear	 *  in a DFA state.  There is no way to have another complete set	 *  of nondeterministic NFA configurations without another input	 *  sequence, which would reach a different DFA state.  Therefore,	 *  the two nondeterministic NFA configuration sets cannot collide	 *  in the same DFA state.]	 *	 *  Consider DFA state {(s|1),(s|2),(s|3),(t|3),(v|4)} where (s|a)	 *  is state 's' and alternative 'a'.  Here, configuration set	 *  {(s|1),(s|2),(s|3)} predicts 3 different alts.  Configurations	 *  (s|2) and (s|3) are "resolved", leaving {(s|1),(t|3),(v|4)} as	 *  items that must still be considered by the DFA conversion	 *  algorithm in DFA.findNewDFAStatesAndAddDFATransitions().	 *	 *  Consider the following grammar where alts 1 and 2 are no	 *  problem because of the 2nd lookahead symbol.  Alts 3 and 4 are	 *  identical and will therefore reach the rule end NFA state but	 *  predicting 2 different alts (no amount of future lookahead	 *  will render them deterministic/separable):	 *	 *  a : A B	 *    | A C	 *    | A	 *    | A	 *    ;	 *	 *  Here is a (slightly reduced) NFA of this grammar:	 *	 *  (1)-A->(2)-B->(end)-EOF->(8)	 *   |              ^	 *  (2)-A->(3)-C----|	 *   |              ^	 *  (4)-A->(5)------|	 *   |              ^	 *  (6)-A->(7)------|	 *	 *  where (n) is NFA state n.  To begin DFA conversion, the start	 *  state is created:	 *	 *  {(1|1),(2|2),(4|3),(6|4)}	 *	 *  Upon A, all NFA configurations lead to new NFA states yielding	 *  new DFA state:	 *	 *  {(2|1),(3|2),(5|3),(7|4),(end|3),(end|4)}	 *	 *  where the configurations with state end in them are added	 *  during the epsilon closure operation.  State end predicts both	 *  alts 3 and 4.  An error is reported, the latter configuration is	 *  flagged as resolved leaving the DFA state as:	 *	 *  {(2|1),(3|2),(5|3),(7|4|resolved),(end|3),(end|4|resolved)}	 *	 *  As NFA configurations are added to a DFA state during its	 *  construction, the reachable set of labels is computed.  Here	 *  reachable is {B,C,EOF} because there is at least one NFA state	 *  in the DFA state that can transition upon those symbols.	 *	 *  The final DFA looks like:	 *	 *  {(1|1),(2|2),(4|3),(6|4)}	 *              |	 *              v	 *  {(2|1),(3|2),(5|3),(7|4),(end|3),(end|4)} -B-> (end|1)	 *              |                        |	 *              C                        ----EOF-> (8,3)	 *              |	 *              v	 *           (end|2)	 *	 *  Upon AB, alt 1 is predicted.  Upon AC, alt 2 is predicted.	 *  Upon A EOF, alt 3 is predicted.  Alt 4 is not a viable	 *  alternative.	 *	 *  The algorithm is essentially to walk all the configurations	 *  looking for a conflict of the form (s|i) and (s|j) for i!=j.	 *  Use a hash table to track state+context pairs for collisions	 *  so that we have O(n) to walk the n configurations looking for	 *  a conflict.  Upon every conflict, track the alt number so	 *  we have a list of all nondeterministically predicted alts. Also	 *  track the minimum alt.  Next go back over the configurations, setting	 *  the "resolved" bit for any that have an alt that is a member of	 *  the nondeterministic set.  This will effectively remove any alts	 *  but the one we want from future consideration.	 *	 *  See resolveWithSemanticPredicates()	 *	 *  AMBIGUOUS TOKENS	 *	 *  With keywords and ID tokens, there is an inherit ambiguity in that	 *  "int" can be matched by ID also.  Each lexer rule has an EOT	 *  transition emanating from it which is used whenever the end of	 *  a rule is reached and another token rule did not invoke it.  EOT	 *  is the only thing that can be seen next.  If two rules are identical	 *  like "int" and "int" then the 2nd def is unreachable and you'll get	 *  a warning.  We prevent a warning though for the keyword/ID issue as	 *  ID is still reachable.  This can be a bit weird.  '+' rule then a	 *  '+'|'+=' rule will fail to match '+' for the 2nd rule.	 *	 *  If all NFA states in this DFA state are targets of EOT transitions,	 *  (and there is more than one state plus no unique alt is predicted)	 *  then DFA conversion will leave this state as a dead state as nothing	 *  can be reached from this state.  To resolve the ambiguity, just do	 *  what flex and friends do: pick the first rule (alt in this case) to	 *  win.  This means you should put keywords before the ID rule.	 *  If the DFA state has only one NFA state then there is no issue:	 *  it uniquely predicts one alt. :)  Problem	 *  states will look like this during conversion:	 *	 *  DFA 1:{9|1, 19|2, 14|3, 20|2, 23|2, 24|2, ...}-<EOT>->5:{41|3, 42|2}	 *	 *  Worse, when you have two identical literal rules, you will see 3 alts	 *  in the EOT state (one for ID and one each for the identical rules).	 */	public void resolveNonDeterminisms(DFAState d) {		if ( debug ) {			System.out.println("resolveNonDeterminisms "+d.toString());		}		boolean conflictingLexerRules = false;		Set nondeterministicAlts = d.getNonDeterministicAlts();		if ( debug && nondeterministicAlts!=null ) {			System.out.println("nondet alts="+nondeterministicAlts);		}		// CHECK FOR AMBIGUOUS EOT (if |allAlts|>1 and EOT state, resolve)		// grab any config to see if EOT state; any other configs must		// transition on EOT to get to this DFA state as well so all		// states in d must be targets of EOT.  These are the end states		// created in NFAFactory.build_EOFState		NFAConfiguration anyConfig;		Iterator itr = d.nfaConfigurations.iterator();        anyConfig = (NFAConfiguration)itr.next();		NFAState anyState = dfa.nfa.getState(anyConfig.state);		// if d is target of EOT and more than one predicted alt		// indicate that d is nondeterministic on all alts otherwise		// it looks like state has no problem		if ( anyState.isEOTTargetState() ) {			Set allAlts = d.getAltSet();			// is more than 1 alt predicted?			if ( allAlts!=null && allAlts.size()>1 ) {				nondeterministicAlts = allAlts;				// track Tokens rule issues differently than other decisions				if ( d.dfa.isTokensRuleDecision() ) {					dfa.probe.reportLexerRuleNondeterminism(d,allAlts);					//System.out.println("Tokens rule DFA state "+d+" nondeterministic");					conflictingLexerRules = true;				}			}		}		// if no problems return unless we aborted work on d to avoid inf recursion		if ( !d.abortedDueToRecursionOverflow && nondeterministicAlts==null ) {			return; // no problems, return		}		// if we're not a conflicting lexer rule and we didn't abort, report ambig		// We should get a report for abort so don't give another		if ( !d.abortedDueToRecursionOverflow && !conflictingLexerRules ) {			// TODO: with k=x option set, this is called twice for same state			dfa.probe.reportNondeterminism(d, nondeterministicAlts);			// TODO: how to turn off when it's only the FOLLOW that is			// conflicting.  This used to shut off even alts i,j < n			// conflict warnings. :(			/*			if ( dfa.isGreedy() ) {				// if nongreedy then they have said to let it fall out of loop				// don't report the problem				dfa.probe.reportNondeterminism(d);			}			else {				// TODO: remove when sure it's cool				dfa.probe.reportNondeterminism(d);				System.out.println("temp warning: warning suppressed for nongreedy loop");			}			*/		}		// ATTEMPT TO RESOLVE WITH SEMANTIC PREDICATES		boolean resolved =			tryToResolveWithSemanticPredicates(d, nondeterministicAlts);		if ( resolved ) {			d.resolvedWithPredicates = true;			dfa.probe.reportNondeterminismResolvedWithSemanticPredicate(d);			return;		}		// RESOLVE SYNTACTIC CONFLICT BY REMOVING ALL BUT ONE ALT		resolveByChoosingFirstAlt(d, nondeterministicAlts);		//System.out.println("state "+d.stateNumber+" resolved to alt "+winningAlt);	}	protected int resolveByChoosingFirstAlt(DFAState d, Set nondeterministicAlts) {		int winningAlt = 0;		if ( dfa.isGreedy() ) {			winningAlt = resolveByPickingMinAlt(d,nondeterministicAlts);		}		else {			// If nongreedy, the exit alt shout win, but only if it's			// involved in the nondeterminism!			/*			System.out.println("resolving exit alt for decision="+				dfa.decisionNumber+" state="+d);			System.out.println("nondet="+nondeterministicAlts);			System.out.println("exit alt "+exitAlt);			*/			int exitAlt = dfa.getNumberOfAlts();			if ( nondeterministicAlts.contains(Utils.integer(exitAlt)) ) {				// if nongreedy and exit alt is one of those nondeterministic alts				// predicted, resolve in favor of what follows block				winningAlt = resolveByPickingExitAlt(d,nondeterministicAlts);			}			else {				winningAlt = resolveByPickingMinAlt(d,nondeterministicAlts);			}		}		return winningAlt;	}	/** Turn off all configurations associated with the	 *  set of incoming nondeterministic alts except the min alt number.	 *  There may be many alts among the configurations but only turn off	 *  the ones with problems (other than the min alt of course).
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -