📄 dfa.java
字号:
* no cycles can be created. If we're doing fixed k lookahead * don't updated uniqueStates, just return incoming state, which * indicates it's a new state. */ protected DFAState addState(DFAState d) { /* if ( decisionNumber==14 ) { System.out.println("addState: "+d.stateNumber); } */ if ( getUserMaxLookahead()>0 ) { return d; } // does a DFA state exist already with everything the same // except its state number? DFAState existing = (DFAState)uniqueStates.get(d); if ( existing != null ) { /* System.out.println("state "+d.stateNumber+" exists as state "+ existing.stateNumber); */ // already there...get the existing DFA state return existing; } // if not there, then add new state. uniqueStates.put(d,d); numberOfStates++; return d; } public void removeState(DFAState d) { DFAState it = (DFAState)uniqueStates.remove(d); if ( it!=null ) { numberOfStates--; } } public Map getUniqueStates() { return uniqueStates; } /** What is the max state number ever created? This may be beyond * getNumberOfStates(). */ public int getMaxStateNumber() { return states.size()-1; } public DFAState getState(int stateNumber) { return (DFAState)states.get(stateNumber); } public void setState(int stateNumber, DFAState d) { states.set(stateNumber, d); } /** Is the DFA reduced? I.e., does every state have a path to an accept * state? If not, don't delete as we need to generate an error indicating * which paths are "dead ends". Also tracks list of alts with no accept * state in the DFA. Must call verify() first before this makes sense. */ public boolean isReduced() { return reduced; } /** Is this DFA cyclic? That is, are there any loops? If not, then * the DFA is essentially an LL(k) predictor for some fixed, max k value. * We can build a series of nested IF statements to match this. In the * presence of cycles, we need to build a general DFA and interpret it * to distinguish between alternatives. */ public boolean isCyclic() { return cyclic && getUserMaxLookahead()==0; } public boolean canInlineDecision() { // TODO: and ! too big return CodeGenerator.GEN_ACYCLIC_DFA_INLINE && !isCyclic() && !probe.isNonLLStarDecision(); } /** Is this DFA derived from the NFA for the Tokens rule? */ public boolean isTokensRuleDecision() { if ( nfa.grammar.type!=Grammar.LEXER ) { return false; } NFAState nfaStart = getNFADecisionStartState(); NFAState TokensRuleStart = nfa.grammar.getRuleStartState(Grammar.ARTIFICIAL_TOKENS_RULENAME); NFAState TokensDecisionStart = (NFAState)TokensRuleStart.transition(0).target; return nfaStart == TokensDecisionStart; } /** The user may specify a max, acyclic lookahead for any decision. No * DFA cycles are created when this value, k, is greater than 0. * If this decision has no k lookahead specified, then try the grammar. */ public int getUserMaxLookahead() { if ( user_k>=0 ) { // cache for speed return user_k; } GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber); Object k = blockAST.getOption("k"); if ( k==null ) { user_k = nfa.grammar.getGrammarMaxLookahead(); return user_k; } if (k instanceof Integer) { Integer kI = (Integer)k; user_k = kI.intValue(); } else { // must be String "*" if ( k.equals("*") ) { user_k = 0; } } return user_k; } public boolean getAutoBacktrackMode() { String autoBacktrack = (String)decisionNFAStartState.getAssociatedASTNode().getOption("backtrack"); if ( autoBacktrack==null ) { autoBacktrack = (String)nfa.grammar.getOption("backtrack"); } return autoBacktrack!=null&&autoBacktrack.equals("true"); } public void setUserMaxLookahead(int k) { this.user_k = k; } /** Return k if decision is LL(k) for some k else return max int */ public int getMaxLookaheadDepth() { if ( isCyclic() ) { return Integer.MAX_VALUE; } return max_k; } /** Return a list of Integer alt numbers for which no lookahead could * be computed or for which no single DFA accept state predicts those * alts. Must call verify() first before this makes sense. */ public List getUnreachableAlts() { return unreachableAlts; } /** Once this DFA has been built, need to verify that: * * 1. it's reduced * 2. all alts have an accept state * * Elsewhere, in the NFA converter, we need to verify that: * * 3. alts i and j have disjoint lookahead if no sem preds * 4. if sem preds, nondeterministic alts must be sufficiently covered */ public void verify() { if ( !probe.nonLLStarDecision ) { // avoid if non-LL(*) doesStateReachAcceptState(startState); } } /** figure out if this state eventually reaches an accept state and * modify the instance variable 'reduced' to indicate if we find * at least one state that cannot reach an accept state. This implies * that the overall DFA is not reduced. This algorithm should be * linear in the number of DFA states. * * The algorithm also tracks which alternatives have no accept state, * indicating a nondeterminism. * * Also computes whether the DFA is cyclic. * * TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt */ protected boolean doesStateReachAcceptState(DFAState d) { if ( d.isAcceptState() ) { // accept states have no edges emanating from them so we can return d.setAcceptStateReachable(REACHABLE_YES); // this alt is uniquely predicted, remove from nondeterministic list int predicts = d.getUniquelyPredictedAlt(); unreachableAlts.remove(Utils.integer(predicts)); return true; } // avoid infinite loops d.setAcceptStateReachable(REACHABLE_BUSY); boolean anEdgeReachesAcceptState = false; // Visit every transition, track if at least one edge reaches stop state // Cannot terminate when we know this state reaches stop state since // all transitions must be traversed to set status of each DFA state. for (int i=0; i<d.getNumberOfTransitions(); i++) { Transition t = d.transition(i); DFAState edgeTarget = (DFAState)t.target; int targetStatus = edgeTarget.getAcceptStateReachable(); if ( targetStatus==REACHABLE_BUSY ) { // avoid cycles; they say nothing cyclic = true; continue; } if ( targetStatus==REACHABLE_YES ) { // avoid unnecessary work anEdgeReachesAcceptState = true; continue; } if ( targetStatus==REACHABLE_NO ) { // avoid unnecessary work continue; } // target must be REACHABLE_UNKNOWN (i.e., unvisited) if ( doesStateReachAcceptState(edgeTarget) ) { anEdgeReachesAcceptState = true; // have to keep looking so don't break loop // must cover all states even if we find a path for this state } } if ( anEdgeReachesAcceptState ) { d.setAcceptStateReachable(REACHABLE_YES); } else { /* if ( d.getNumberOfTransitions()==0 ) { probe.reportDanglingState(d); } */ d.setAcceptStateReachable(REACHABLE_NO); reduced = false; } return anEdgeReachesAcceptState; } public NFAState getNFADecisionStartState() { return decisionNFAStartState; } public DFAState getAcceptState(int alt) { return altToAcceptState[alt]; } public void setAcceptState(int alt, DFAState acceptState) { altToAcceptState[alt] = acceptState; } public String getDescription() { return description; } public int getDecisionNumber() { return decisionNFAStartState.getDecisionNumber(); } /** What GrammarAST node (derived from the grammar) is this DFA * associated with? It will point to the start of a block or * the loop back of a (...)+ block etc... */ public GrammarAST getDecisionASTNode() { return decisionNFAStartState.getAssociatedASTNode(); } public boolean isGreedy() { GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber); String v = (String)blockAST.getOption("greedy"); if ( v!=null && v.equals("false") ) { return false; } return true; } public DFAState newState() { DFAState n = new DFAState(this); n.stateNumber = stateCounter; stateCounter++; states.setSize(n.stateNumber+1); states.set(n.stateNumber, n); // track state num to state return n; } public int getNumberOfStates() { if ( getUserMaxLookahead()>0 ) { // if using fixed lookahead then uniqueSets not set return states.size(); } return numberOfStates; } public int getNumberOfAlts() { return nAlts; } public boolean analysisAborted() { return probe.analysisAborted(); } protected void initAltRelatedInfo() { unreachableAlts = new LinkedList(); for (int i = 1; i <= nAlts; i++) { unreachableAlts.add(Utils.integer(i)); } altToAcceptState = new DFAState[nAlts+1]; } public String toString() { FASerializer serializer = new FASerializer(nfa.grammar); if ( startState==null ) { return ""; } return serializer.serialize(startState, false); } /** EOT (end of token) is a label that indicates when the DFA conversion * algorithm would "fall off the end of a lexer rule". It normally * means the default clause. So for ('a'..'z')+ you would see a DFA * with a state that has a..z and EOT emanating from it. a..z would * jump to a state predicting alt 1 and EOT would jump to a state * predicting alt 2 (the exit loop branch). EOT implies anything other * than a..z. If for some reason, the set is "all char" such as with * the wildcard '.', then EOT cannot match anything. For example, * * BLOCK : '{' (.)* '}' * * consumes all char until EOF when greedy=true. When all edges are * combined for the DFA state after matching '}', you will find that * it is all char. The EOT transition has nothing to match and is * unreachable. The findNewDFAStatesAndAddDFATransitions() method * must know to ignore the EOT, so we simply remove it from the * reachable labels. Later analysis will find that the exit branch * is not predicted by anything. For greedy=false, we leave only * the EOT label indicating that the DFA should stop immediately * and predict the exit branch. The reachable labels are often a * set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}] * due to DFA conversion so must construct a pure set to see if * it is same as Label.ALLCHAR. * * Only do this for Lexers. * * If EOT coexists with ALLCHAR: * 1. If not greedy, modify the labels parameter to be EOT * 2. If greedy, remove EOT from the labels set protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels) { Label eot = new Label(Label.EOT); if ( !labels.containsKey(eot) ) { return false; } System.out.println("### contains EOT"); boolean containsAllChar = false; IntervalSet completeVocab = new IntervalSet(); int n = labels.size(); for (int i=0; i<n; i++) { Label rl = (Label)labels.get(i); if ( !rl.equals(eot) ) { completeVocab.addAll(rl.getSet()); } } System.out.println("completeVocab="+completeVocab); if ( completeVocab.equals(Label.ALLCHAR) ) { System.out.println("all char"); containsAllChar = true; } return containsAllChar; } */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -