📄 llkanalyzer.java
字号:
else {
alt.lookaheadDepth = Math.max(alt.lookaheadDepth,k);
blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth,k);
}
}
return det;
}
/**Compute the lookahead set of whatever follows references to
* the rule associated witht the FOLLOW block.
*/
public Lookahead FOLLOW(int k, RuleEndElement end) {
// what rule are we trying to compute FOLLOW of?
RuleBlock rb = (RuleBlock)end.block;
// rule name is different in lexer
String rule;
if (lexicalAnalysis) {
rule = CodeGenerator.lexerRuleName(rb.getRuleName());
} else {
rule = rb.getRuleName();
}
if ( DEBUG_ANALYZER ) System.out.println("FOLLOW("+k+","+rule+")");
// are we in the midst of computing this FOLLOW already?
if ( end.lock[k] ) {
if ( DEBUG_ANALYZER ) System.out.println("FOLLOW cycle to "+rule);
return new Lookahead(rule);
}
// Check to see if there is cached value
if ( end.cache[k]!=null ) {
if ( DEBUG_ANALYZER ) {
System.out.println("cache entry FOLLOW("+k+") for "+rule+": "+end.cache[k].toString(",", charFormatter, grammar));
}
// if the cache is a complete computation then simply return entry
if ( end.cache[k].cycle==null ) {
return (Lookahead)end.cache[k].clone();
}
// A cache entry exists, but it is a reference to a cyclic computation.
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle);
RuleEndElement re = rs.getBlock().endNode;
// The other entry may not exist because it is still being
// computed when this cycle cache entry was found here.
if ( re.cache[k]==null ) {
// return the cycle...that's all we can do at the moment.
return (Lookahead)end.cache[k].clone();
}
else {
// replace this cache entry with the entry from the referenced computation.
// Eventually, this percolates a complete (no cycle reference) cache entry
// to this node (or at least gets it closer and closer). This is not
// crucial, but makes cache lookup faster as we might have to look up
// lots of cycle references before finding a complete reference.
end.cache[k] = re.cache[k];
// Return the cache entry associated with the cycle reference.
return (Lookahead)re.cache[k].clone();
}
}
end.lock[k] = true; // prevent FOLLOW computation cycles
Lookahead p = new Lookahead();
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
// Walk list of references to this rule to compute FOLLOW
for (int i=0; i<rs.numReferences(); i++) {
RuleRefElement rr = rs.getReference(i);
if ( DEBUG_ANALYZER ) System.out.println("next["+rule+"] is "+rr.next.toString());
Lookahead q = rr.next.look(k);
if ( DEBUG_ANALYZER ) System.out.println("FIRST of next["+rule+"] ptr is "+q.toString());
/* If there is a cycle then if the cycle is to the rule for
* this end block, you have a cycle to yourself. Remove the
* cycle indication--the lookahead is complete.
*/
if ( q.cycle!=null && q.cycle.equals(rule) ) {
q.cycle = null; // don't want cycle to yourself!
}
// add the lookahead into the current FOLLOW computation set
p.combineWith(q);
if ( DEBUG_ANALYZER ) System.out.println("combined FOLLOW["+rule+"] is "+p.toString());
}
end.lock[k] = false; // we're not doing FOLLOW anymore
// if no rules follow this, it can be a start symbol or called by a start sym.
// set the follow to be end of file.
if ( p.fset.nil() && p.cycle==null ) {
if ( grammar instanceof TreeWalkerGrammar ) {
// Tree grammars don't see EOF, they see end of sibling list or
// "NULL TREE LOOKAHEAD".
p.fset.add(Token.NULL_TREE_LOOKAHEAD);
}
else if ( grammar instanceof LexerGrammar ) {
// Lexical grammars use Epsilon to indicate that the end of rule has been hit
// EOF would be misleading; any character can follow a token rule not just EOF
// as in a grammar (where a start symbol is followed by EOF). There is no
// sequence info in a lexer between tokens to indicate what is the last token
// to be seen.
// p.fset.add(EPSILON_TYPE);
p.setEpsilon();
}
else {
p.fset.add(Token.EOF_TYPE);
}
}
// Cache the result of the FOLLOW computation
if ( DEBUG_ANALYZER ) {
System.out.println("saving FOLLOW("+k+") for "+rule+": "+p.toString(",", charFormatter, grammar));
}
end.cache[k] = (Lookahead)p.clone();
return p;
}
private Lookahead getAltLookahead(AlternativeBlock blk, int alt, int k) {
Lookahead p;
Alternative a = blk.getAlternativeAt(alt);
AlternativeElement e = a.head;
//System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
if ( a.cache[k]==null ) {
p = e.look(k);
a.cache[k] = p;
}
else {
p = a.cache[k];
}
return p;
}
/**Actions are ignored */
public Lookahead look(int k, ActionElement action) {
if ( DEBUG_ANALYZER ) System.out.println("lookAction("+k+","+action+")");
return action.next.look(k);
}
/**Combine the lookahead computed for each alternative */
public Lookahead look(int k, AlternativeBlock blk) {
if ( DEBUG_ANALYZER ) System.out.println("lookAltBlk(" + k + "," + blk + ")");
AlternativeBlock saveCurrentBlock = currentBlock;
currentBlock = blk;
Lookahead p = new Lookahead();
for (int i=0; i<blk.alternatives.size(); i++) {
if ( DEBUG_ANALYZER ) System.out.println("alt " + i + " of "+blk);
// must set analysis alt
currentBlock.analysisAlt = i;
Alternative alt = blk.getAlternativeAt(i);
AlternativeElement elem = alt.head;
if ( DEBUG_ANALYZER ) {
if ( alt.head == alt.tail ) {
System.out.println("alt " + i + " is empty");
}
}
Lookahead q = elem.look(k);
p.combineWith(q);
}
if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) {
// Invert the lookahead set
if (lexicalAnalysis) {
BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
int[] elems = p.fset.toArray();
for (int j = 0; j < elems.length; j++) {
b.remove(elems[j]);
}
p.fset = b;
} else {
p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType());
}
}
currentBlock = saveCurrentBlock;
return p;
}
/**Compute what follows this place-holder node and possibly
* what begins the associated loop unless the
* node is locked.
* <p>
* if we hit the end of a loop, we have to include
* what tokens can begin the loop as well. If the start
* node is locked, then we simply found an empty path
* through this subrule while analyzing it. If the
* start node is not locked, then this node was hit
* during a FOLLOW operation and the FIRST of this
* block must be included in that lookahead computation.
*/
public Lookahead look(int k, BlockEndElement end) {
if ( DEBUG_ANALYZER ) System.out.println("lookBlockEnd("+k+", "+end.block+"); lock is "+end.lock[k]);
if ( end.lock[k] ) {
// computation in progress => the tokens we would have
// computed (had we not been locked) will be included
// in the set by that computation with the lock on this
// node.
return new Lookahead();
}
Lookahead p;
/* Hitting the end of a loop means you can see what begins the loop */
if ( end.block instanceof ZeroOrMoreBlock ||
end.block instanceof OneOrMoreBlock ) {
// compute what can start the block,
// but lock end node so we don't do it twice in same
// computation.
end.lock[k] = true;
p = look(k, end.block);
end.lock[k] = false;
}
else {
p = new Lookahead();
}
/* Tree blocks do not have any follow because they are children
* of what surrounds them. For example, A #(B C) D results in
* a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
* indicates that nothing can follow the last node of tree #(B C)
*/
if (end.block instanceof TreeElement) {
p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
}
/* Syntactic predicates such as ( (A)? )=> have no follow per se.
* We cannot accurately say what would be matched following a
* syntactic predicate (you MIGHT be ok if you said it was whatever
* followed the alternative predicted by the predicate). Hence,
* (like end-of-token) we return Epsilon to indicate "unknown
* lookahead."
*/
else if ( end.block instanceof SynPredBlock ) {
p.setEpsilon();
}
// compute what can follow the block
else {
Lookahead q = end.block.next.look(k);
p.combineWith(q);
}
return p;
}
/**Return this char as the lookahead if k=1.
* <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
* <p>
* If the atom has the <tt>not</tt> flag on, then
* create the set complement of the tokenType
* which is the set of all characters referenced
* in the grammar with this char turned off.
* Also remove characters from the set that
* are currently allocated for predicting
* previous alternatives. This avoids ambiguity
* messages and is more properly what is meant.
* ( 'a' | ~'a' ) implies that the ~'a' is the
* "else" clause.
* <p>
* NOTE: we do <b>NOT</b> include exit path in
* the exclusion set. E.g.,
* ( 'a' | ~'a' )* 'b'
* should exit upon seeing a 'b' during the loop.
*/
public Lookahead look(int k, CharLiteralElement atom) {
if ( DEBUG_ANALYZER ) System.out.println("lookCharLiteral("+k+","+atom+")");
// Skip until analysis hits k==1
if ( k>1 ) {
return atom.next.look(k-1);
}
if ( lexicalAnalysis) {
if (atom.not) {
BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
if ( DEBUG_ANALYZER ) System.out.println("charVocab is "+b.toString());
// remove stuff predicted by preceding alts and follow of block
removeCompetingPredictionSets(b, atom);
if ( DEBUG_ANALYZER ) System.out.println("charVocab after removal of prior alt lookahead "+b.toString());
// now remove element that is stated not to be in the set
b.clear(atom.getType());
return new Lookahead(b);
} else {
return Lookahead.of(atom.getType());
}
}
else {
// Should have been avoided by MakeGrammar
tool.panic("Character literal reference found in parser");
// ... so we make the compiler happy
return Lookahead.of(atom.getType());
}
}
public Lookahead look(int k, CharRangeElement r) {
if ( DEBUG_ANALYZER ) System.out.println("lookCharRange("+k+","+r+")");
// Skip until analysis hits k==1
if ( k>1 ) {
return r.next.look(k-1);
}
BitSet p = BitSet.of(r.begin);
for (int i=r.begin+1; i<=r.end; i++) {
p.add(i);
}
return new Lookahead(p);
}
public Lookahead look(int k, GrammarAtom atom) {
if ( DEBUG_ANALYZER ) System.out.println("look("+k+","+atom+"["+atom.getType()+"])");
if ( lexicalAnalysis ) {
// MakeGrammar should have created a rule reference instead
tool.panic("token reference found in lexer");
}
// Skip until analysis hits k==1
if ( k>1 ) {
return atom.next.look(k-1);
}
Lookahead l = Lookahead.of(atom.getType());
if (atom.not) {
// Invert the lookahead set against the token vocabulary
int maxToken = grammar.tokenManager.maxTokenType();
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
// remove stuff predicted by preceding alts and follow of block
removeCompetingPredictionSets(l.fset, atom);
}
return l;
}
/**The lookahead of a (...)+ block is the combined lookahead of
* all alternatives and, if an empty path is found, the lookahead
* of what follows the block.
*/
public Lookahead look(int k, OneOrMoreBlock blk) {
if ( DEBUG_ANALYZER ) System.out.println("look+"+k+","+blk+")");
Lookahead p = look(k, (AlternativeBlock)blk);
return p;
}
/**Combine the lookahead computed for each alternative.
* Lock the node so that no other computation may come back
* on itself--infinite loop. This also implies infinite left-recursion
* in the grammar (or an error in this algorithm ;)).
*/
public Lookahead look(int k, RuleBlock blk) {
if ( DEBUG_ANALYZER ) System.out.println("lookRuleBlk("+k+","+blk+")");
Lookahead p = look(k, (AlternativeBlock)blk);
return p;
}
/**If not locked or noFOLLOW set, compute FOLLOW of a rule.
* <p>
* TJP says 8/12/99: not true anymore:
* Lexical rules never compute follow. They set epsilon and
* the code generator gens code to check for any character.
* The code generator must remove the tokens used to predict
* any previous alts in the same block.
* <p>
* When the last node of a rule is reached and noFOLLOW,
* it implies that a "local" FOLLOW will be computed
* after this call. I.e.,
* <pre>
* a : b A;
* b : B | ;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -