📄 llkanalyzer.java

📁 SRI international 发布的OAA框架软件
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
	 *		c : b C;
	 * </pre>
	 * Here, when computing the look of rule b from rule a,
	 * we want only {B,EPSILON_TYPE} so that look(b A) will
	 * be {B,A} not {B,A,C}.
	 * <p>
	 * if the end block is not locked and the FOLLOW is
	 * wanted, the algorithm must compute the lookahead
	 * of what follows references to this rule.  If
	 * end block is locked, FOLLOW will return an empty set
	 * with a cycle to the rule associated with this end block.
	 */
	public Lookahead look(int k, RuleEndElement end) {
		if ( DEBUG_ANALYZER )
			System.out.println("lookRuleBlockEnd("+k+"); noFOLLOW="+
							   end.noFOLLOW+"; lock is "+end.lock[k]);
		if ( /*lexicalAnalysis ||*/ end.noFOLLOW ) {
			Lookahead p = new Lookahead();
			p.setEpsilon();
			p.epsilonDepth = BitSet.of(k);
			return p;
		}
		Lookahead p = FOLLOW(k,end);
		return p;
	}

	/**Compute the lookahead contributed by a rule reference.
	 *
	 * <p>
	 * When computing ruleref lookahead, we don't want the FOLLOW
	 * computation done if an empty path exists for the rule.
	 * The FOLLOW is too loose of a set...we want only to
	 * include the "local" FOLLOW or what can follow this
	 * particular ref to the node.  In other words, we use
	 * context information to reduce the complexity of the
	 * analysis and strengthen the parser.
	 *
	 * The noFOLLOW flag is used as a means of restricting
	 * the FOLLOW to a "local" FOLLOW.  This variable is
	 * orthogonal to the <tt>lock</tt> variable that prevents
	 * infinite recursion.  noFOLLOW does not care about what k is.
	 */
	public Lookahead look(int k, RuleRefElement rr) {
		if ( DEBUG_ANALYZER ) System.out.println("lookRuleRef("+k+","+rr+")");
		RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
		if ( rs==null || !rs.defined ) {
			tool.error("no definition of rule "+rr.targetRule, grammar.getFilename(), rr.getLine());
			return new Lookahead();
		}
		RuleBlock rb = rs.getBlock();
		RuleEndElement end = rb.endNode;
		boolean saveEnd = end.noFOLLOW;
		end.noFOLLOW = true;
		// go off to the rule and get the lookahead (w/o FOLLOW)
		Lookahead p = look(k, rr.targetRule);
		if ( DEBUG_ANALYZER ) System.out.println("back from rule ref to "+rr.targetRule);
		// restore state of end block
		end.noFOLLOW = saveEnd;
		
		// check for infinite recursion.  If a cycle is returned: trouble!
		if ( p.cycle!=null ) {
			tool.error("infinite recursion to rule "+p.cycle+" from rule "+
					   rr.enclosingRuleName, grammar.getFilename(), rr.getLine());
		}

		// is the local FOLLOW required?
		if ( p.containsEpsilon() ) {
			if ( DEBUG_ANALYZER )
				System.out.println("rule ref to "+
								   rr.targetRule+" has eps, depth: "+p.epsilonDepth);
	
			// remove epsilon
			p.resetEpsilon();
			// fset.clear(EPSILON_TYPE);
			
			// for each lookahead depth that saw epsilon
			int[] depths = p.epsilonDepth.toArray();
			p.epsilonDepth = null;		// clear all epsilon stuff
			for (int i=0; i<depths.length; i++) {
				int rk = k - (k-depths[i]);
				Lookahead q = rr.next.look(rk);	// see comments in Lookahead
				p.combineWith(q);
			}
			// note: any of these look() computations for local follow can
			// set EPSILON in the set again if the end of this rule is found.
		}

		return p;
	}

	public Lookahead look(int k, StringLiteralElement atom) {
		if ( DEBUG_ANALYZER ) System.out.println("lookStringLiteral("+k+","+atom+")");
		if ( lexicalAnalysis ) {
			// need more lookahead than string can provide?
			if ( k > atom.processedAtomText.length() ) {
				return atom.next.look(k - atom.processedAtomText.length());
			}
			else {
				// get char at lookahead depth k, from the processed literal text
				return Lookahead.of(atom.processedAtomText.charAt(k-1));
			}
		}
		else {
			// Skip until analysis hits k==1 
			if ( k>1 ) {
				return atom.next.look(k-1);
			}
			Lookahead l = Lookahead.of(atom.getType());
			if (atom.not) {
				// Invert the lookahead set against the token vocabulary
				int maxToken = grammar.tokenManager.maxTokenType();
				l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
			}
			return l;
		}
	}

	/**The lookahead of a (...)=> block is the lookahead of
	 * what follows the block.  By definition, the syntactic
	 * predicate block defies static analysis (you want to try it
	 * out at run-time).  The LOOK of (a)=>A B is A for LL(1)
	 * ### is this even called?
	 */
	public Lookahead look(int k, SynPredBlock blk) {
		if ( DEBUG_ANALYZER ) System.out.println("look=>("+k+","+blk+")");
		return blk.next.look(k);
	}

	public Lookahead look(int k, TokenRangeElement r) {
		if ( DEBUG_ANALYZER ) System.out.println("lookTokenRange("+k+","+r+")");
		// Skip until analysis hits k==1 
		if ( k>1 ) {
			return r.next.look(k-1);
		}
		BitSet p = BitSet.of(r.begin);
		for (int i=r.begin+1; i<=r.end; i++) {
			p.add(i);
		}
		return new Lookahead(p);
	}

	public Lookahead look(int k, TreeElement t) {
		if (DEBUG_ANALYZER)
			System.out.println("look(" + k + "," + t.root + "[" + t.root.getType() + "])");
		if (k > 1) {
			return t.next.look(k - 1);
		}
		Lookahead l = null;
		if (t.root instanceof WildcardElement) {
			l = t.root.look(1); // compute FIRST set minus previous rows
		} else {
			l = Lookahead.of(t.root.getType());
			if (t.root.not) {
				// Invert the lookahead set against the token vocabulary
				int maxToken = grammar.tokenManager.maxTokenType();
				l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
			}
		}
		return l;
	}

	public Lookahead look(int k, WildcardElement wc) {
		if ( DEBUG_ANALYZER ) System.out.println("look(" + k + "," + wc + ")");
		
		// Skip until analysis hits k==1 
		if ( k>1 ) {
			return wc.next.look(k-1);
		}

		BitSet b;
		if ( lexicalAnalysis ) {
			// Copy the character vocabulary
			b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
		}
		else {
			b = new BitSet(1);
			// Invert the lookahead set against the token vocabulary
			int maxToken = grammar.tokenManager.maxTokenType();
			b.notInPlace(Token.MIN_USER_TYPE, maxToken);
			if ( DEBUG_ANALYZER ) System.out.println("look(" + k + "," + wc + ") after not: "+b);
		}

		// Remove prediction sets from competing alternatives
		// removeCompetingPredictionSets(b, wc);

		return new Lookahead(b);
	}

	/** The (...)* element is the combined lookahead of the alternatives and what can
	 *  follow the loop.
	 */
	public Lookahead look(int k, ZeroOrMoreBlock blk) {
		if ( DEBUG_ANALYZER ) System.out.println("look*("+k+","+blk+")");
		Lookahead p = look(k, (AlternativeBlock)blk);
		Lookahead q = blk.next.look(k);
		p.combineWith(q);
		return p;
	}

	/**Compute the combined lookahead for all productions of a rule.
	 * If the lookahead returns with epsilon, at least one epsilon
	 * path exists (one that consumes no tokens).  The noFOLLOW
	 * flag being set for this endruleblk, indicates that the
	 * a rule ref invoked this rule.
	 *
	 * Currently only look(RuleRef) calls this.  There is no need
	 * for the code generator to call this.
	 */
	public Lookahead look(int k, String rule) {
		if ( DEBUG_ANALYZER ) System.out.println("lookRuleName("+k+","+rule+")");
		RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
		RuleBlock rb = rs.getBlock();
		
		if ( rb.lock[k] ) {
			if ( DEBUG_ANALYZER )
				System.out.println("infinite recursion to rule "+rb.getRuleName());
			return new Lookahead(rule);
		}

		// have we computed it before?
		if ( rb.cache[k]!=null ) {
			if ( DEBUG_ANALYZER ) {
				System.out.println("found depth "+k+" result in FIRST "+rule+" cache: "+
								   rb.cache[k].toString(",", charFormatter, grammar));
			}
			return (Lookahead)rb.cache[k].clone();
		}

		rb.lock[k] = true;
		Lookahead p = look(k, (RuleBlock)rb);
		rb.lock[k] = false;

		// cache results
		rb.cache[k] = (Lookahead)p.clone();
		if ( DEBUG_ANALYZER ) {
			System.out.println("saving depth "+k+" result in FIRST "+rule+" cache: "+
							   rb.cache[k].toString(",", charFormatter, grammar));
		}
		return p;
	}

	/** If the first k-1 sets are singleton sets, the appoximate
	 *  lookahead analysis is equivalent to full lookahead analysis.
	 */
	public static boolean lookaheadEquivForApproxAndFullAnalysis(Lookahead[] bset, int k) {
		// first k-1 sets degree 1?
		for (int i=1; i<=k-1; i++) {
			BitSet look = bset[i].fset;
			if ( look.degree()>1 ) {
				return false;
			}
		}
		return true;
	}

	/** Remove the prediction sets from preceding alternatives
	 * and follow set, but *only* if this element is the first element 
	 * of the alternative.  The class members currenBlock and
	 * currentBlock.analysisAlt must be set correctly.
	 * @param b The prediction bitset to be modified
	 * @el The element of interest
	 */
	private void removeCompetingPredictionSets(BitSet b, AlternativeElement el) {
		// Only do this if the element is the first element of the alt, 
		// because we are making an implicit assumption that k==1.
		GrammarElement head = currentBlock.getAlternativeAt(currentBlock.analysisAlt).head;
		// if element is #(. blah) then check to see if el is root
		if ( head instanceof TreeElement ) {
			if ( ((TreeElement) head).root != el ) {
				return;
			}
		}
		else if ( el != head ) {
			return;
		}
		for (int i = 0; i < currentBlock.analysisAlt; i++) {
			AlternativeElement e = currentBlock.getAlternativeAt(i).head;
			b.subtractInPlace(e.look(1).fset);
		}
	}

	/** Remove the prediction sets from preceding alternatives
	 * The class members currenBlock must be set correctly.
	 * Remove prediction sets from 1..k.
	 * @param look The prediction lookahead to be modified
	 * @el The element of interest
	 * @k  How deep into lookahead to modify
	 */
	private void removeCompetingPredictionSetsFromWildcard(Lookahead[] look, AlternativeElement el, int k) {
		for (int d = 1; d <= k; d++) {
			for (int i = 0; i < currentBlock.analysisAlt; i++) {
				AlternativeElement e = currentBlock.getAlternativeAt(i).head;
				look[d].fset.subtractInPlace(e.look(d).fset);
			}
		}
	}

	/** reset the analyzer so it looks like a new one */
	private void reset() {
		grammar = null;
		DEBUG_ANALYZER = false;
		currentBlock = null;
		lexicalAnalysis = false;
	}

	/** Set the grammar for the analyzer */
	public void setGrammar(Grammar g) {
		if (grammar != null) {
			reset();
		}
		grammar = g;

		// Is this lexical?
		lexicalAnalysis = (grammar instanceof LexerGrammar);
		DEBUG_ANALYZER = grammar.analyzerDebug;
	}

	public boolean subruleCanBeInverted(AlternativeBlock blk, boolean forLexer)
	{
		if (
			blk instanceof ZeroOrMoreBlock ||
			blk instanceof OneOrMoreBlock ||
			blk instanceof SynPredBlock
			) {
			return false;
		}
		// Cannot invert an empty subrule
		if (blk.alternatives.size() == 0) {
			return false;
		}
		// The block must only contain alternatives with a single element,
		// where each element is a char, token, char range, or token range.
		for (int i = 0; i < blk.alternatives.size(); i++) {
			Alternative alt = blk.getAlternativeAt(i);
			// Cannot have anything interesting in the alternative ...
			if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) {
				return false;
			}
			// ... and there must be one simple element
			AlternativeElement elt = alt.head;
			if (
				!(
				  elt instanceof CharLiteralElement ||
				  elt instanceof TokenRefElement ||
				  elt instanceof CharRangeElement ||
				  elt instanceof TokenRangeElement ||
				  (elt instanceof StringLiteralElement && !forLexer)
				  ) ||
				!(elt.next instanceof BlockEndElement) ||
				elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE
				) {
				return false;
			}
		}
		return true;
	}
}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -