📄 nfatodfaconverter.java
字号:
/* [The "BSD licence"] Copyright (c) 2005-2006 Terence Parr All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/package org.antlr.analysis;import org.antlr.misc.IntSet;import org.antlr.misc.OrderedHashSet;import org.antlr.misc.Utils;import java.util.*;/** Code that embodies the NFA conversion to DFA. */public class NFAToDFAConverter { /** A list of DFA states we still need to process during NFA conversion */ protected List work = new LinkedList(); /** While converting NFA, we must track states that * reference other rule's NFAs so we know what to do * at the end of a rule. We need to know what context invoked * this rule so we can know where to continue looking for NFA * states. I'm tracking a context tree (record of rule invocation * stack trace) for each alternative that could be predicted. */ protected NFAContext[] contextTrees; /** We are converting which DFA? */ protected DFA dfa; public static boolean debug = false; /** Should ANTLR launch multiple threads to convert NFAs to DFAs? * With a 2-CPU box, I note that it's about the same single or * multithreaded. Both CPU meters are going even when single-threaded * so I assume the GC is killing us. Could be the compiler. When I * run java -Xint mode, I get about 15% speed improvement with multiple * threads. */ public static boolean SINGLE_THREADED_NFA_CONVERSION = true; public NFAToDFAConverter(DFA dfa) { this.dfa = dfa; NFAState nfaStartState = dfa.getNFADecisionStartState(); int nAlts = dfa.getNumberOfAlts(); initContextTrees(nAlts); } public void convert() { dfa.conversionStartTime = System.currentTimeMillis(); // create the DFA start state dfa.startState = computeStartState(); // while more DFA states to check, process them while ( work.size()>0 && !dfa.probe.analysisAborted() && !dfa.probe.nonLLStarDecision && !dfa.nfa.grammar.NFAToDFAConversionExternallyAborted() ) { DFAState d = (DFAState) work.get(0); if ( dfa.nfa.grammar.getWatchNFAConversion() ) { System.out.println("convert DFA state "+d.stateNumber+ " ("+d.getNFAConfigurations().size()+" nfa states)"); } int k = dfa.getUserMaxLookahead(); if ( k>0 && k==d.getLookaheadDepth() ) { // we've hit max lookahead, make this a stop state //System.out.println("stop state @k="+k+" (terminated early)"); resolveNonDeterminisms(d); // Check to see if we need to add any semantic predicate transitions if ( d.isResolvedWithPredicates() ) { addPredicateTransitions(d); } else { d.setAcceptState(true); // must convert to accept state at k } } else { findNewDFAStatesAndAddDFATransitions(d); } work.remove(0); // done with it; remove from work list } // walk all accept states and find the synpreds // I used to do this in the code generator, but that is too late. // This converter tries to avoid computing DFA for decisions in // syntactic predicates that are not ever used such as those // created by autobacktrack mode. int nAlts = dfa.getNumberOfAlts(); for (int i=1; i<=nAlts; i++) { DFAState a = dfa.getAcceptState(i); if ( a!=null ) { Set synpreds = a.getSyntacticPredicatesInNFAConfigurations(); if ( synpreds!=null ) { // add all the predicates we find (should be just one, right?) for (Iterator it = synpreds.iterator(); it.hasNext();) { SemanticContext semctx = (SemanticContext) it.next(); // System.out.println("synpreds: "+semctx); dfa.nfa.grammar.synPredUsedInDFA(dfa, semctx); } } } } } /** From this first NFA state of a decision, create a DFA. * Walk each alt in decision and compute closure from the start of that * rule, making sure that the closure does not include other alts within * that same decision. The idea is to associate a specific alt number * with the starting closure so we can trace the alt number for all states * derived from this. At a stop state in the DFA, we can return this alt * number, indicating which alt is predicted. * * If this DFA is derived from an loop back NFA state, then the first * transition is actually the exit branch of the loop. Rather than make * this alternative one, let's make this alt n+1 where n is the number of * alts in this block. This is nice to keep the alts of the block 1..n; * helps with error messages. * * I handle nongreedy in findNewDFAStatesAndAddDFATransitions * when nongreedy and EOT transition. Make state with EOT emanating * from it the accept state. */ protected DFAState computeStartState() { NFAState alt = dfa.decisionNFAStartState; DFAState startState = dfa.newState(); int i = 0; int altNum = 1; while ( alt!=null ) { // find the set of NFA states reachable without consuming // any input symbols for each alt. Keep adding to same // overall closure that will represent the DFA start state, // but track the alt number NFAContext initialContext = contextTrees[i]; // if first alt is derived from loopback/exit branch of loop, // make alt=n+1 for n alts instead of 1 if ( i==0 && dfa.getNFADecisionStartState().decisionStateType==NFAState.LOOPBACK ) { int numAltsIncludingExitBranch = dfa.nfa.grammar .getNumberOfAltsForDecisionNFA(dfa.decisionNFAStartState); altNum = numAltsIncludingExitBranch; closure((NFAState)alt.transition(0).target, altNum, initialContext, SemanticContext.EMPTY_SEMANTIC_CONTEXT, startState, true); altNum = 1; // make next alt the first } else { closure((NFAState)alt.transition(0).target, altNum, initialContext, SemanticContext.EMPTY_SEMANTIC_CONTEXT, startState, true); altNum++; } i++; // move to next alternative if ( alt.transition(1)==null ) { break; } alt = (NFAState)alt.transition(1).target; } // now DFA start state has the complete closure for the decision // but we have tracked which alt is associated with which // NFA states. dfa.addState(startState); // make sure dfa knows about this state work.add(startState); return startState; } /** From this node, add a d--a-->t transition for all * labels 'a' where t is a DFA node created * from the set of NFA states reachable from any NFA * state in DFA state d. */ protected void findNewDFAStatesAndAddDFATransitions(DFAState d) { //System.out.println("work on DFA state "+d); OrderedHashSet labels = d.getReachableLabels(); /* System.out.println("reachable="+labels.toString()); System.out.println("|reachable|/|nfaconfigs|="+ labels.size()+"/"+d.getNFAConfigurations().size()+"="+ labels.size()/(float)d.getNFAConfigurations().size()); */ // normally EOT is the "default" clause and decisions just // choose that last clause when nothing else matches. DFA conversion // continues searching for a unique sequence that predicts the // various alts or until it finds EOT. So this rule // // DUH : ('x'|'y')* "xy!"; // // does not need a greedy indicator. The following rule works fine too // // A : ('x')+ ; // // When the follow branch could match what is in the loop, by default, // the nondeterminism is resolved in favor of the loop. You don't // get a warning because the only way to get this condition is if // the DFA conversion hits the end of the token. In that case, // we're not *sure* what will happen next, but it could be anything. // Anyway, EOT is the default case which means it will never be matched // as resolution goes to the lowest alt number. Exit branches are // always alt n+1 for n alts in a block. // // When a loop is nongreedy and we find an EOT transition, the DFA // state should become an accept state, predicting exit of loop. It's // just reversing the resolution of ambiguity. // TODO: should this be done in the resolveAmbig method? Label EOTLabel = new Label(Label.EOT); boolean containsEOT = labels.contains(EOTLabel); if ( !dfa.isGreedy() && containsEOT ) { convertToEOTAcceptState(d); return; // no more work to do on this accept state } // if in filter mode for lexer, want to match shortest not longest // string so if we see an EOT edge emanating from this state, then // convert this state to an accept state. This only counts for // The Tokens rule as all other decisions must continue to look for // longest match. // [Taking back out a few days later on Jan 17, 2006. This could // be an option for the future, but this was wrong soluion for // filtering.] /* if ( dfa.nfa.grammar.type==Grammar.LEXER && containsEOT ) { String filterOption = (String)dfa.nfa.grammar.getOption("filter"); boolean filterMode = filterOption!=null && filterOption.equals("true"); if ( filterMode && d.dfa.isTokensRuleDecision() ) { DFAState t = reach(d, EOTLabel); if ( t.getNFAConfigurations().size()>0 ) { convertToEOTAcceptState(d); //System.out.println("state "+d+" has EOT target "+t.stateNumber); return; } } } */ int numberOfEdgesEmanating = 0; Map targetToLabelMap = new HashMap(); // for each label that could possibly emanate from NFAStates of d // (abort if we find any closure operation on a configuration of d // that finds multiple alts with recursion, non-LL(*), as we cannot // trust any reach operations from d since we are blind to some // paths. Leave state a dead-end and try to resolve with preds) for (int i=0; !d.abortedDueToMultipleRecursiveAlts && i<labels.size(); i++) { Label label = (Label)labels.get(i); DFAState t = reach(d, label); if ( debug ) { System.out.println("DFA state after reach "+d+"-" + label.toString(dfa.nfa.grammar)+"->"+t); } if ( t==null ) { // nothing was reached by label due to conflict resolution // EOT also seems to be in here occasionally probably due // to an end-of-rule state seeing it even though we'll pop // an invoking state off the state; don't bother to conflict // as this labels set is a covering approximation only. continue; } if ( t.getUniqueAlt()==NFA.INVALID_ALT_NUMBER ) { // Only compute closure if a unique alt number is not known. // If a unique alternative is mentioned among all NFA // configurations then there is no possibility of needing to look // beyond this state; also no possibility of a nondeterminism. // This optimization May 22, 2006 just dropped -Xint time // for analysis of Java grammar from 11.5s to 2s! Wow. closure(t); // add any NFA states reachable via epsilon } /* System.out.println("DFA state after closure "+d+"-"+ label.toString(dfa.nfa.grammar)+ "->"+t); */ // add if not in DFA yet even if its closure aborts due to non-LL(*); // getting to the state is ok, we just can't see where to go next--it's // a blind alley. DFAState targetState = addDFAStateToWorkList(t); numberOfEdgesEmanating += addTransition(d, label, targetState, targetToLabelMap); // lookahead of target must be one larger than d's k targetState.setLookaheadDepth(d.getLookaheadDepth() + 1); // closure(t) might have aborted, but addDFAStateToWorkList will try // to resolve t with predicates. If that fails, must give an error // Note: this is tested on the target of d not d. if ( t.abortedDueToMultipleRecursiveAlts && !t.isResolvedWithPredicates() ) { // no predicates to resolve non-LL(*) decision, report t.dfa.probe.reportNonLLStarDecision(t.dfa);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -