📄 baserecognizer.java
字号:
package org.antlr.runtime;import java.util.*;/** A generic recognizer that can handle recognizers generated from * lexer, parser, and tree grammars. This is all the parsing * support code essentially; most of it is error recovery stuff and * backtracking. */public abstract class BaseRecognizer { public static final int MEMO_RULE_FAILED = -2; public static final int MEMO_RULE_UNKNOWN = -1; public static final int INITIAL_FOLLOW_STACK_SIZE = 100; // copies from Token object for convenience in actions public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL; public static final int HIDDEN = Token.HIDDEN_CHANNEL; /** Track the set of token types that can follow any rule invocation. * Stack grows upwards. When it hits the max, it grows 2x in size * and keeps going. */ protected BitSet[] following = new BitSet[INITIAL_FOLLOW_STACK_SIZE]; protected int _fsp = -1; /** This is true when we see an error and before having successfully * matched a token. Prevents generation of more than one error message * per error. */ protected boolean errorRecovery = false; /** The index into the input stream where the last error occurred. * This is used to prevent infinite loops where an error is found * but no token is consumed during recovery...another error is found, * ad naseum. This is a failsafe mechanism to guarantee that at least * one token/tree node is consumed for two errors. */ protected int lastErrorIndex = -1; /** In lieu of a return value, this indicates that a rule or token * has failed to match. Reset to false upon valid token match. */ protected boolean failed = false; /** If 0, no backtracking is going on. Safe to exec actions etc... * If >0 then it's the level of backtracking. */ protected int backtracking = 0; /** An array[size num rules] of Map<Integer,Integer> that tracks * the stop token index for each rule. ruleMemo[ruleIndex] is * the memoization table for ruleIndex. For key ruleStartIndex, you * get back the stop token for associated rule or MEMO_RULE_FAILED. * * This is only used if rule memoization is on (which it is by default). */ protected Map[] ruleMemo; /** reset the parser's state */ public void reset() { // TODO: implement this! //following.setSize(0); } /** Match current input symbol against ttype. Upon error, do one token * insertion or deletion if possible. You can override to not recover * here and bail out of the current production to the normal error * exception catch (at the end of the method) by just throwing * MismatchedTokenException upon input.LA(1)!=ttype. */ public void match(IntStream input, int ttype, BitSet follow) throws RecognitionException { if ( input.LA(1)==ttype ) { input.consume(); errorRecovery = false; failed = false; return; } if ( backtracking>0 ) { failed = true; return; } mismatch(input, ttype, follow); return; } public void matchAny(IntStream input) { errorRecovery = false; failed = false; input.consume(); } /** factor out what to do upon token mismatch so tree parsers can behave * differently. */ protected void mismatch(IntStream input, int ttype, BitSet follow) throws RecognitionException { MismatchedTokenException mte = new MismatchedTokenException(ttype, input); recoverFromMismatchedToken(input, mte, ttype, follow); } /** Report a recognition problem. Java is not polymorphic on the * argument types so you have to check the type of exception yourself. * That's not very clean but it's better than generating a bunch of * catch clauses in each rule and makes it easy to extend with * more exceptions w/o breaking old code. * * This method sets errorRecovery to indicate the parser is recovering * not parsing. Once in recovery mode, no errors are generated. * To get out of recovery mode, the parser must successfully match * a token (after a resync). So it will go: * * 1. error occurs * 2. enter recovery mode, report error * 3. consume until token found in resynch set * 4. try to resume parsing * 5. next match() will reset errorRecovery mode */ public void reportError(RecognitionException e) { // if we've already reported an error and have not matched a token // yet successfully, don't report any errors. if ( errorRecovery ) { //System.err.print("[SPURIOUS] "); return; } errorRecovery = true; displayRecognitionError(this.getClass().getName(), this.getTokenNames(), e); } public static void displayRecognitionError(String name, String[] tokenNames, RecognitionException e) { System.err.print(getRuleInvocationStack(e, name)+ ": line "+e.line+":"+e.charPositionInLine+" "); if ( e instanceof MismatchedTokenException ) { MismatchedTokenException mte = (MismatchedTokenException)e; String tokenName="<unknown>"; if ( mte.expecting==Token.EOF ) { tokenName = "EOF"; } else { tokenName = tokenNames[mte.expecting]; } System.err.println("mismatched token: "+ e.token+ "; expecting type "+ tokenName); } else if ( e instanceof MismatchedTreeNodeException ) { MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e; String tokenName="<unknown>"; if ( mtne.expecting==Token.EOF ) { tokenName = "EOF"; } else { tokenName = tokenNames[mtne.expecting]; } System.err.println("mismatched tree node: "+ mtne.foundNode+ "; expecting type "+ tokenName); } else if ( e instanceof NoViableAltException ) { NoViableAltException nvae = (NoViableAltException)e; System.err.println(//"decision=<<"+nvae.grammarDecisionDescription+">>"+ "state "+nvae.stateNumber+ " (decision="+nvae.decisionNumber+ ") no viable alt; token="+ e.token); } else if ( e instanceof EarlyExitException ) { EarlyExitException eee = (EarlyExitException)e; System.err.println("required (...)+ loop (decision="+ eee.decisionNumber+ ") did not match anything; token="+ e.token); } else if ( e instanceof MismatchedSetException ) { MismatchedSetException mse = (MismatchedSetException)e; System.err.println("mismatched token: "+ e.token+ "; expecting set "+mse.expecting); } else if ( e instanceof MismatchedNotSetException ) { MismatchedNotSetException mse = (MismatchedNotSetException)e; System.err.println("mismatched token: "+ e.token+ "; expecting set "+mse.expecting); } else if ( e instanceof FailedPredicateException ) { FailedPredicateException fpe = (FailedPredicateException)e; System.err.println("rule "+fpe.ruleName+" failed predicate: {"+ fpe.predicateText+"}?"); } } /** Recover from an error found on the input stream. Mostly this is * NoViableAlt exceptions, but could be a mismatched token that * the match() routine could not recover from. */ public void recover(IntStream input, RecognitionException re) { if ( lastErrorIndex==input.index() ) { // uh oh, another error at same token index; must be a case // where LT(1) is in the recovery token set so nothing is // consumed; consume a single token so at least to prevent // an infinite loop; this is a failsafe. input.consume(); } lastErrorIndex = input.index(); BitSet followSet = computeErrorRecoverySet(); beginResync(); consumeUntil(input, followSet); endResync(); } /** A hook to listen in on the token consumption during error recovery. * The DebugParser subclasses this to fire events to the listenter. */ public void beginResync() { } public void endResync() { } /* Compute the error recovery set for the current rule. During * rule invocation, the parser pushes the set of tokens that can * follow that rule reference on the stack; this amounts to * computing FIRST of what follows the rule reference in the * enclosing rule. This local follow set only includes tokens * from within the rule; i.e., the FIRST computation done by * ANTLR stops at the end of a rule. * * EXAMPLE * * When you find a "no viable alt exception", the input is not * consistent with any of the alternatives for rule r. The best * thing to do is to consume tokens until you see something that * can legally follow a call to r *or* any rule that called r. * You don't want the exact set of viable next tokens because the * input might just be missing a token--you might consume the * rest of the input looking for one of the missing tokens. * * Consider grammar: * * a : '[' b ']' * | '(' b ')' * ; * b : c '^' INT ; * c : ID * | INT * ; * * At each rule invocation, the set of tokens that could follow * that rule is pushed on a stack. Here are the various "local" * follow sets: * * FOLLOW(b1_in_a) = FIRST(']') = ']' * FOLLOW(b2_in_a) = FIRST(')') = ')' * FOLLOW(c_in_b) = FIRST('^') = '^' * * Upon erroneous input "[]", the call chain is * * a -> b -> c * * and, hence, the follow context stack is: * * depth local follow set after call to rule * 0 <EOF> a (from main()) * 1 ']' b * 3 '^' c * * Notice that ')' is not included, because b would have to have * been called from a different context in rule a for ')' to be * included. * * For error recovery, we cannot consider FOLLOW(c) * (context-sensitive or otherwise). We need the combined set of * all context-sensitive FOLLOW sets--the set of all tokens that * could follow any reference in the call chain. We need to * resync to one of those tokens. Note that FOLLOW(c)='^' and if * we resync'd to that token, we'd consume until EOF. We need to * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. * In this case, for input "[]", LA(1) is in this set so we would * not consume anything and after printing an error rule c would * return normally. It would not find the required '^' though. * At this point, it gets a mismatched token error and throws an * exception (since LA(1) is not in the viable following token * set). The rule exception handler tries to recover, but finds * the same recovery set and doesn't consume anything. Rule b * exits normally returning to rule a. Now it finds the ']' (and * with the successful match exits errorRecovery mode). * * So, you cna see that the parser walks up call chain looking * for the token that was a member of the recovery set. * * Errors are not generated in errorRecovery mode. * * ANTLR's error recovery mechanism is based upon original ideas: * * "Algorithms + Data Structures = Programs" by Niklaus Wirth * * and * * "A note on error recovery in recursive descent parsers": * http://portal.acm.org/citation.cfm?id=947902.947905 * * Later, Josef Grosch had some good ideas: * * "Efficient and Comfortable Error Recovery in Recursive Descent * Parsers": * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip * * Like Grosch I implemented local FOLLOW sets that are combined * at run-time upon error to avoid overhead during parsing. */ protected BitSet computeErrorRecoverySet() { return combineFollows(false); } /** Compute the context-sensitive FOLLOW set for current rule. * This is set of token types that can follow a specific rule * reference given a specific call chain. You get the set of * viable tokens that can possibly come next (lookahead depth 1) * given the current call chain. Contrast this with the * definition of plain FOLLOW for rule r: * * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} * * where x in T* and alpha, beta in V*; T is set of terminals and * V is the set of terminals and nonterminals. In other words, * FOLLOW(r) is the set of all tokens that can possibly follow * references to r in *any* sentential form (context). At * runtime, however, we know precisely which context applies as * we have the call chain. We may compute the exact (rather * than covering superset) set of following tokens. * * For example, consider grammar: * * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} * | "return" expr '.' * ; * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} * atom : INT // FOLLOW(atom)=={'+',')',';','.'} * | '(' expr ')' * ; * * The FOLLOW sets are all inclusive whereas context-sensitive * FOLLOW sets are precisely what could follow a rule reference. * For input input "i=(3);", here is the derivation: * * stat => ID '=' expr ';' * => ID '=' atom ('+' atom)* ';'
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -