⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nfafactory.java

📁 antlr最新版本V3源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* [The "BSD licence"] Copyright (c) 2005-2006 Terence Parr All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright    notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright    notice, this list of conditions and the following disclaimer in the    documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products    derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/package org.antlr.tool;import org.antlr.analysis.*;import org.antlr.misc.IntSet;import org.antlr.misc.IntervalSet;import java.util.Iterator;import java.util.List;/** Routines to construct StateClusters from EBNF grammar constructs. *  No optimization is done to remove unnecessary epsilon edges. * *  TODO: add an optimization that reduces number of states and transitions *  will help with speed of conversion and make it easier to view NFA.  For *  example, o-A->o-->o-B->o should be o-A->o-B->o */public class NFAFactory {	/** This factory is attached to a specifc NFA that it is building.     *  The NFA will be filled up with states and transitions.     */	NFA nfa = null;	String currentRuleName = null;    /** Used to assign state numbers */    protected int stateCounter = 0;	public NFAFactory(NFA nfa) {        nfa.setFactory(this);		this.nfa = nfa;	}    public NFAState newState() {        NFAState n = new NFAState(nfa);        int state = stateCounter;        n.stateNumber = state;        stateCounter++;        nfa.addState(n);		n.setEnclosingRuleName(currentRuleName);        return n;    }    public int getNumberOfStates() {        return stateCounter;    }	/** Optimize an alternative (list of grammar elements).	 *	 *  Walk the chain of elements (which can be complicated loop blocks...)	 *  and throw away any epsilon transitions used to link up simple elements.	 *	 *  This only removes 195 states from the java.g's NFA, but every little	 *  bit helps.  Perhaps I can improve in the future.	 */	public void optimizeAlternative(StateCluster alt) {		NFAState s = alt.left;		while ( s!=alt.right ) {			// if it's a block element, jump over it and continue			if ( s.endOfBlockStateNumber!=State.INVALID_STATE_NUMBER ) {				s = nfa.getState(s.endOfBlockStateNumber);				continue;			}			Transition t = s.transition(0);			if ( t instanceof RuleClosureTransition ) {				s = ((RuleClosureTransition)t).getFollowState();				continue;			}			if ( t.label.isEpsilon() && s.getNumberOfTransitions()==1 ) {				// bypass epsilon transition and point to what the epsilon's				// target points to unless that epsilon transition points to				// a block or loop etc..  Also don't collapse epsilons that				// point at the last node of the alt				NFAState epsilonTarget = (NFAState)t.target;				if ( epsilonTarget.endOfBlockStateNumber==State.INVALID_STATE_NUMBER &&					 epsilonTarget.transition(0)!=null )				{					s.setTransition0(epsilonTarget.transition(0));					/*					System.out.println("### opt "+s.stateNumber+"->"+									   epsilonTarget.transition(0).target.stateNumber);					*/				}			}			s = (NFAState)t.target;		}	}	/** From label A build Graph o-A->o */	public StateCluster build_Atom(int label) {		NFAState left = newState();		NFAState right = newState();		transitionBetweenStates(left, right, label);		StateCluster g = new StateCluster(left, right);		return g;	}    /** From set build single edge graph o->o-set->o.  To conform to     *  what an alt block looks like, must have extra state on left.     */	public StateCluster build_Set(IntSet set) {        //NFAState start = newState();        NFAState left = newState();        //transitionBetweenStates(start, left, Label.EPSILON);        NFAState right = newState();        Transition e = new Transition(new Label(set),right);        left.addTransition(e);		StateCluster g = new StateCluster(left, right);        return g;	}    /** Can only complement block of simple alts; can complement build_Set()     *  result, that is.  Get set and complement, replace old with complement.    public StateCluster build_AlternativeBlockComplement(StateCluster blk) {        State s0 = blk.left;        IntSet set = getCollapsedBlockAsSet(s0);        if ( set!=null ) {            // if set is available, then structure known and blk is a set            set = nfa.grammar.complement(set);            Label label = s0.transition(0).target.transition(0).label;            label.setSet(set);        }        return blk;    }	 */    public StateCluster build_Range(int a, int b) {        NFAState left = newState();        NFAState right = newState();        Transition e = new Transition(new Label(IntervalSet.of(a,b)),right);        left.addTransition(e);        StateCluster g = new StateCluster(left, right);        return g;    }	/** From char 'c' build StateCluster o-intValue(c)->o	 */	public StateCluster build_CharLiteralAtom(String charLiteral) {        int c = Grammar.getCharValueFromGrammarCharLiteral(charLiteral);		return build_Atom(c);	}	/** From char 'c' build StateCluster o-intValue(c)->o	 *  can include unicode spec likes '\u0024' later.  Accepts	 *  actual unicode 16-bit now, of course, by default.     *  TODO not supplemental char clean!	 */	public StateCluster build_CharRange(String a, String b) {		int from = Grammar.getCharValueFromGrammarCharLiteral(a);		int to = Grammar.getCharValueFromGrammarCharLiteral(b);		return build_Range(from, to);	}    /** For a non-lexer, just build a simple token reference atom.     *  For a lexer, a string is a sequence of char to match.  That is,     *  "fog" is treated as 'f' 'o' 'g' not as a single transition in     *  the DFA.  Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states     *  for n characters.     */    public StateCluster build_StringLiteralAtom(String stringLiteral) {        if ( nfa.grammar.type==Grammar.LEXER ) {			StringBuffer chars =				Grammar.getUnescapedStringFromGrammarStringLiteral(stringLiteral);            NFAState first = newState();            NFAState last = null;            NFAState prev = first;            for (int i=0; i<chars.length(); i++) {                int c = chars.charAt(i);                NFAState next = newState();                transitionBetweenStates(prev, next, c);                prev = last = next;            }            return  new StateCluster(first, last);        }        // a simple token reference in non-Lexers        int tokenType = nfa.grammar.getTokenType(stringLiteral);        return build_Atom(tokenType);    }    /** For reference to rule r, build     *     *  o-e->(r)  o     *     *  where (r) is the start of rule r and the trailing o is not linked     *  to from rule ref state directly (it's done thru the transition(0)     *  RuleClosureTransition.     *     *  If the rule r is just a list of tokens, it's block will be just     *  a set on an edge o->o->o-set->o->o->o, could inline it rather than doing     *  the rule reference, but i'm not doing this yet as I'm not sure     *  it would help much in the NFA->DFA construction.     *     *  TODO add to codegen: collapse alt blks that are sets into single matchSet     */    public StateCluster build_RuleRef(int ruleIndex, NFAState ruleStart) {        /*        System.out.println("building ref to rule "+ruleIndex+": "+                nfa.getGrammar().getRuleName(ruleIndex));        */        NFAState left = newState();        // left.setDescription("ref to "+ruleStart.getDescription());        NFAState right = newState();        // right.setDescription("NFAState following ref to "+ruleStart.getDescription());        Transition e = new RuleClosureTransition(ruleIndex,ruleStart,right);        left.addTransition(e);        StateCluster g = new StateCluster(left, right);        return g;    }    /** From an empty alternative build StateCluster o-e->o */    public StateCluster build_Epsilon() {        NFAState left = newState();        NFAState right = newState();        transitionBetweenStates(left, right, Label.EPSILON);        StateCluster g = new StateCluster(left, right);        return g;    }    /** Build what amounts to an epsilon transition with a semantic     *  predicate action.  The pred is a pointer into the AST of     *  the SEMPRED token.     */    public StateCluster build_SemanticPredicate(GrammarAST pred) {		// don't count syn preds		if ( !pred.getText().toUpperCase()			    .startsWith(Grammar.SYNPRED_RULE_PREFIX.toUpperCase()) )		{			nfa.grammar.numberOfSemanticPredicates++;		}		NFAState left = newState();        NFAState right = newState();        Transition e = new Transition(new Label(pred), right);        left.addTransition(e);        StateCluster g = new StateCluster(left, right);        return g;    }	/** add an EOF transition to any rule end NFAState that points to nothing     *  (i.e., for all those rules not invoked by another rule).  These     *  are start symbols then.	 *	 *  Return the number of grammar entry points; i.e., how many rules are	 *  not invoked by another rule (they can only be invoked from outside).	 *  These are the start rules.     */    public int build_EOFStates(List rules) {		int numberUnInvokedRules = 0;        for (Iterator iterator = rules.iterator(); iterator.hasNext();) {			Rule r = (Rule) iterator.next();			String ruleName = r.name;			NFAState endNFAState = nfa.grammar.getRuleStopState(ruleName);            // Is this rule a start symbol?  (no follow links)            if ( endNFAState.transition(0)==null ) {                // if so, then don't let algorithm fall off the end of                // the rule, make it hit EOF/EOT.				/*				if ( nfa.grammar.type==Grammar.LEXER ) {					return; // 11/28/2005: try having only Tokens with EOT transition				}                if ( nfa.grammar.type!=Grammar.LEXER ||					 ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) )				{					build_EOFState(endNFAState);				}				*/				build_EOFState(endNFAState);				// track how many rules have been invoked by another rule				numberUnInvokedRules++;            }        }		return numberUnInvokedRules;    }    /** set up an NFA NFAState that will yield eof tokens or,     *  in the case of a lexer grammar, an EOT token when the conversion     *  hits the end of a rule.     */    private void build_EOFState(NFAState endNFAState) {		NFAState end = newState();        int label = Label.EOF;        if ( nfa.grammar.type==Grammar.LEXER ) {            label = Label.EOT;			end.setEOTTargetState(true);        }        /*		System.out.println("build "+nfa.grammar.getTokenDisplayName(label)+						   " loop on end of state "+endNFAState.getDescription()+						   " to state "+end.stateNumber);        */		Transition toEnd = new Transition(label, end);        endNFAState.addTransition(toEnd);    }    /** From A B build A-e->B (that is, build an epsilon arc from right     *  of A to left of B).     *     *  As a convenience, return B if A is null or return A if B is null.     */    public StateCluster build_AB(StateCluster A, StateCluster B) {        if ( A==null ) {            return B;        }        if ( B==null ) {            return A;        }		transitionBetweenStates(A.right, B.left, Label.EPSILON);		StateCluster g = new StateCluster(A.left, B.right);        return g;    }	/** From a set ('a'|'b') build     *     *  o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts)	 */	public StateCluster build_AlternativeBlockFromSet(StateCluster set) {		if ( set==null ) {			return null;		}		// single alt, no decision, just return only alt state cluster

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -