📄 llkanalyzer.java

📁 SRI international 发布的OAA框架软件
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
			else {
				alt.lookaheadDepth = Math.max(alt.lookaheadDepth,k);
				blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth,k);
			}
		}
		return det;
    }

	/**Compute the lookahead set of whatever follows references to
	 * the rule associated witht the FOLLOW block.
	 */
	public Lookahead FOLLOW(int k, RuleEndElement end) {
		// what rule are we trying to compute FOLLOW of?
		RuleBlock rb = (RuleBlock)end.block;
		// rule name is different in lexer
		String rule;
		if (lexicalAnalysis) {
			rule = CodeGenerator.lexerRuleName(rb.getRuleName());
		} else {
			rule = rb.getRuleName();
		}

		if ( DEBUG_ANALYZER ) System.out.println("FOLLOW("+k+","+rule+")");

		// are we in the midst of computing this FOLLOW already?
		if ( end.lock[k] ) {
			if ( DEBUG_ANALYZER ) System.out.println("FOLLOW cycle to "+rule);
			return new Lookahead(rule);
		}

		// Check to see if there is cached value
		if ( end.cache[k]!=null ) {
			if ( DEBUG_ANALYZER ) {
				System.out.println("cache entry FOLLOW("+k+") for "+rule+": "+end.cache[k].toString(",", charFormatter, grammar));
			}
			// if the cache is a complete computation then simply return entry 
			if ( end.cache[k].cycle==null ) {
				return (Lookahead)end.cache[k].clone();
			}
			// A cache entry exists, but it is a reference to a cyclic computation.
			RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle);
			RuleEndElement re = rs.getBlock().endNode;
			// The other entry may not exist because it is still being
			// computed when this cycle cache entry was found here.
			if ( re.cache[k]==null ) {
				// return the cycle...that's all we can do at the moment.
				return (Lookahead)end.cache[k].clone();
			}
			else {
				// replace this cache entry with the entry from the referenced computation.
				// Eventually, this percolates a complete (no cycle reference) cache entry
				// to this node (or at least gets it closer and closer).  This is not
				// crucial, but makes cache lookup faster as we might have to look up
				// lots of cycle references before finding a complete reference.
				end.cache[k] = re.cache[k];
				// Return the cache entry associated with the cycle reference.
				return (Lookahead)re.cache[k].clone();
			}
		}

		end.lock[k] = true;	// prevent FOLLOW computation cycles

		Lookahead p = new Lookahead();

		RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);

		// Walk list of references to this rule to compute FOLLOW
		for (int i=0; i<rs.numReferences(); i++) {
			RuleRefElement rr = rs.getReference(i);
			if ( DEBUG_ANALYZER ) System.out.println("next["+rule+"] is "+rr.next.toString());
			Lookahead q = rr.next.look(k);
			if ( DEBUG_ANALYZER ) System.out.println("FIRST of next["+rule+"] ptr is "+q.toString());
			/* If there is a cycle then if the cycle is to the rule for
			 * this end block, you have a cycle to yourself.  Remove the
			 * cycle indication--the lookahead is complete.
			 */
			if ( q.cycle!=null && q.cycle.equals(rule) ) {
				q.cycle = null;	// don't want cycle to yourself!
			}
			// add the lookahead into the current FOLLOW computation set
			p.combineWith(q);
			if ( DEBUG_ANALYZER ) System.out.println("combined FOLLOW["+rule+"] is "+p.toString());
		}

		end.lock[k] = false; // we're not doing FOLLOW anymore

		// if no rules follow this, it can be a start symbol or called by a start sym.
		// set the follow to be end of file.
		if ( p.fset.nil() && p.cycle==null ) {
			if ( grammar instanceof TreeWalkerGrammar ) {
				// Tree grammars don't see EOF, they see end of sibling list or
				// "NULL TREE LOOKAHEAD".
				p.fset.add(Token.NULL_TREE_LOOKAHEAD);
			}
			else if ( grammar instanceof LexerGrammar ) {
				// Lexical grammars use Epsilon to indicate that the end of rule has been hit
				// EOF would be misleading; any character can follow a token rule not just EOF
				// as in a grammar (where a start symbol is followed by EOF).  There is no
				// sequence info in a lexer between tokens to indicate what is the last token
				// to be seen.
				// p.fset.add(EPSILON_TYPE);
				p.setEpsilon();
			}	
			else {
				p.fset.add(Token.EOF_TYPE);
			}
		}

		// Cache the result of the FOLLOW computation
		if ( DEBUG_ANALYZER ) {
			System.out.println("saving FOLLOW("+k+") for "+rule+": "+p.toString(",", charFormatter, grammar));
		}
		end.cache[k] = (Lookahead)p.clone();

		return p;
	}

	private Lookahead getAltLookahead(AlternativeBlock blk, int alt, int k) {
		Lookahead p;
		Alternative a = blk.getAlternativeAt(alt);
		AlternativeElement e = a.head;
		//System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
		if ( a.cache[k]==null ) {
			p = e.look(k);
			a.cache[k] = p;
		}
		else {
			p = a.cache[k];
		}
		return p;
	}

	/**Actions are ignored */
	public Lookahead look(int k, ActionElement action) {
		if ( DEBUG_ANALYZER ) System.out.println("lookAction("+k+","+action+")");
		return action.next.look(k);
	}

	/**Combine the lookahead computed for each alternative */
	public Lookahead look(int k, AlternativeBlock blk) {
		if ( DEBUG_ANALYZER ) System.out.println("lookAltBlk(" + k + "," + blk + ")");
		AlternativeBlock saveCurrentBlock = currentBlock;
		currentBlock = blk;
		Lookahead p = new Lookahead();
		for (int i=0; i<blk.alternatives.size(); i++) {
			if ( DEBUG_ANALYZER ) System.out.println("alt " + i + " of "+blk);
			// must set analysis alt
			currentBlock.analysisAlt = i;
			Alternative alt = blk.getAlternativeAt(i);
			AlternativeElement elem = alt.head;
			if ( DEBUG_ANALYZER ) {
				if ( alt.head == alt.tail ) {
					System.out.println("alt " + i + " is empty");
				}
			}
			Lookahead q = elem.look(k);
			p.combineWith(q);
		}
		if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) {
			// Invert the lookahead set
			if (lexicalAnalysis) {
				BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
				int[] elems = p.fset.toArray();
				for (int j = 0; j < elems.length; j++) {
					b.remove(elems[j]);
				}
				p.fset = b;
			} else {
				p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType());
			}
		}
		currentBlock = saveCurrentBlock;
		return p;
	}

	/**Compute what follows this place-holder node and possibly
	 * what begins the associated loop unless the
	 * node is locked.
	 * <p>
	 * if we hit the end of a loop, we have to include
	 * what tokens can begin the loop as well.  If the start
	 * node is locked, then we simply found an empty path
	 * through this subrule while analyzing it.  If the
	 * start node is not locked, then this node was hit
	 * during a FOLLOW operation and the FIRST of this
	 * block must be included in that lookahead computation.
	 */
	public Lookahead look(int k, BlockEndElement end) {
		if ( DEBUG_ANALYZER ) System.out.println("lookBlockEnd("+k+", "+end.block+"); lock is "+end.lock[k]);
		if ( end.lock[k] ) {
			// computation in progress => the tokens we would have
			// computed (had we not been locked) will be included
			// in the set by that computation with the lock on this
			// node.
			return new Lookahead();
		}

		Lookahead p;

		/* Hitting the end of a loop means you can see what begins the loop */
		if ( end.block instanceof ZeroOrMoreBlock ||
			 end.block instanceof OneOrMoreBlock ) {
			// compute what can start the block,
			// but lock end node so we don't do it twice in same
			// computation.
			end.lock[k] = true;
			p = look(k, end.block);
			end.lock[k] = false;
		}
		else {
			p = new Lookahead();
		}

		/* Tree blocks do not have any follow because they are children
		 * of what surrounds them.  For example, A #(B C) D results in
		 * a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
		 * indicates that nothing can follow the last node of tree #(B C)
		 */
		if (end.block instanceof TreeElement) {
			p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
		}
		
		/* Syntactic predicates such as ( (A)? )=> have no follow per se.
		 * We cannot accurately say what would be matched following a
		 * syntactic predicate (you MIGHT be ok if you said it was whatever
		 * followed the alternative predicted by the predicate).  Hence,
		 * (like end-of-token) we return Epsilon to indicate "unknown
		 * lookahead."
		 */
		else if ( end.block instanceof SynPredBlock ) {
			p.setEpsilon();
		}	

		// compute what can follow the block
		else {
			Lookahead q = end.block.next.look(k);
			p.combineWith(q);
		}
		
		return p;
	}

	/**Return this char as the lookahead if k=1.
	 * <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
	 * <p>
	 * If the atom has the <tt>not</tt> flag on, then
	 * create the set complement of the tokenType
	 * which is the set of all characters referenced
	 * in the grammar with this char turned off.
	 * Also remove characters from the set that
	 * are currently allocated for predicting
	 * previous alternatives.  This avoids ambiguity
	 * messages and is more properly what is meant.
	 * ( 'a' | ~'a' ) implies that the ~'a' is the
	 * "else" clause.
	 * <p>
	 * NOTE: we do <b>NOT</b> include exit path in
	 * the exclusion set. E.g.,
	 * ( 'a' | ~'a' )* 'b'
	 * should exit upon seeing a 'b' during the loop.
	 */
	public Lookahead look(int k, CharLiteralElement atom) {
		if ( DEBUG_ANALYZER ) System.out.println("lookCharLiteral("+k+","+atom+")");
		// Skip until analysis hits k==1 
		if ( k>1 ) {
			return atom.next.look(k-1);
		}
		if ( lexicalAnalysis) {
			if (atom.not) {
				BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
				if ( DEBUG_ANALYZER ) System.out.println("charVocab is "+b.toString());
				// remove stuff predicted by preceding alts and follow of block
				removeCompetingPredictionSets(b, atom);
				if ( DEBUG_ANALYZER ) System.out.println("charVocab after removal of prior alt lookahead "+b.toString());
				// now remove element that is stated not to be in the set
				b.clear(atom.getType());
				return new Lookahead(b);
			} else {
				return Lookahead.of(atom.getType());
			}
		}
		else {
			// Should have been avoided by MakeGrammar
			tool.panic("Character literal reference found in parser");
			// ... so we make the compiler happy
			return Lookahead.of(atom.getType());
		}
	}

	public Lookahead look(int k, CharRangeElement r) {
		if ( DEBUG_ANALYZER ) System.out.println("lookCharRange("+k+","+r+")");
		// Skip until analysis hits k==1 
		if ( k>1 ) {
			return r.next.look(k-1);
		}
		BitSet p = BitSet.of(r.begin);
		for (int i=r.begin+1; i<=r.end; i++) {
			p.add(i);
		}
		return new Lookahead(p);
	}

	public Lookahead look(int k, GrammarAtom atom) {
		if ( DEBUG_ANALYZER ) System.out.println("look("+k+","+atom+"["+atom.getType()+"])");
		
		if ( lexicalAnalysis ) {
			// MakeGrammar should have created a rule reference instead
			tool.panic("token reference found in lexer");
		}
		// Skip until analysis hits k==1 
		if ( k>1 ) {
			return atom.next.look(k-1);
		}
		Lookahead l = Lookahead.of(atom.getType());
		if (atom.not) {
			// Invert the lookahead set against the token vocabulary
			int maxToken = grammar.tokenManager.maxTokenType();
			l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
			// remove stuff predicted by preceding alts and follow of block
			removeCompetingPredictionSets(l.fset, atom);
		}
		return l;
	}

	/**The lookahead of a (...)+ block is the combined lookahead of
	 * all alternatives and, if an empty path is found, the lookahead
	 * of what follows the block.
	 */
	public Lookahead look(int k, OneOrMoreBlock blk) {
		if ( DEBUG_ANALYZER ) System.out.println("look+"+k+","+blk+")");
		Lookahead p = look(k, (AlternativeBlock)blk);
		return p;
	}

	/**Combine the lookahead computed for each alternative.
	 * Lock the node so that no other computation may come back
	 * on itself--infinite loop.  This also implies infinite left-recursion
	 * in the grammar (or an error in this algorithm ;)).
	 */
	public Lookahead look(int k, RuleBlock blk) {
		if ( DEBUG_ANALYZER ) System.out.println("lookRuleBlk("+k+","+blk+")");
		Lookahead p = look(k, (AlternativeBlock)blk);
		return p;
	}

	/**If not locked or noFOLLOW set, compute FOLLOW of a rule.
	 * <p>
	 * TJP says 8/12/99: not true anymore:
	 * Lexical rules never compute follow.  They set epsilon and
	 * the code generator gens code to check for any character.
	 * The code generator must remove the tokens used to predict
	 * any previous alts in the same block.
	 * <p>
	 * When the last node of a rule is reached and noFOLLOW,
	 * it implies that a "local" FOLLOW will be computed
	 * after this call.  I.e.,
	 * <pre>
	 *		a : b A;
	 *		b : B | ;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -