baserecognizer.java

来自「ANTLR(ANother Tool for Language Recognit」· Java 代码 · 共 717 行 · 第 1/2 页
JAVA
717 行
package org.antlr.runtime;import java.util.*;/** A generic recognizer that can handle recognizers generated from *  lexer, parser, and tree grammars.  This is all the parsing *  support code essentially; most of it is error recovery stuff and *  backtracking. */public abstract class BaseRecognizer {	public static final int MEMO_RULE_FAILED = -2;	public static final int MEMO_RULE_UNKNOWN = -1;	public static final int INITIAL_FOLLOW_STACK_SIZE = 100;	// copies from Token object for convenience in actions	public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;	public static final int HIDDEN = Token.HIDDEN_CHANNEL;	/** Track the set of token types that can follow any rule invocation.	 *  Stack grows upwards.  When it hits the max, it grows 2x in size	 *  and keeps going.	 */	protected BitSet[] following = new BitSet[INITIAL_FOLLOW_STACK_SIZE];	protected int _fsp = -1;	/** This is true when we see an error and before having successfully	 *  matched a token.  Prevents generation of more than one error message	 *  per error.	 */	protected boolean errorRecovery = false;	/** The index into the input stream where the last error occurred.	 * 	This is used to prevent infinite loops where an error is found	 *  but no token is consumed during recovery...another error is found,	 *  ad naseum.  This is a failsafe mechanism to guarantee that at least	 *  one token/tree node is consumed for two errors.	 */	protected int lastErrorIndex = -1;	/** In lieu of a return value, this indicates that a rule or token	 *  has failed to match.  Reset to false upon valid token match.	 */	protected boolean failed = false;	/** If 0, no backtracking is going on.  Safe to exec actions etc...	 *  If >0 then it's the level of backtracking.	 */	protected int backtracking = 0;	/** An array[size num rules] of Map<Integer,Integer> that tracks	 *  the stop token index for each rule.  ruleMemo[ruleIndex] is	 *  the memoization table for ruleIndex.  For key ruleStartIndex, you	 *  get back the stop token for associated rule or MEMO_RULE_FAILED.	 *	 *  This is only used if rule memoization is on (which it is by default).	 */	protected Map[] ruleMemo;	/** reset the parser's state */	public void reset() {		// TODO: implement this!		//following.setSize(0);	}	/** Match current input symbol against ttype.  Upon error, do one token	 *  insertion or deletion if possible.  You can override to not recover	 *  here and bail out of the current production to the normal error	 *  exception catch (at the end of the method) by just throwing	 *  MismatchedTokenException upon input.LA(1)!=ttype.	 */	public void match(IntStream input, int ttype, BitSet follow)		throws RecognitionException	{		if ( input.LA(1)==ttype ) {			input.consume();			errorRecovery = false;			failed = false;			return;		}		if ( backtracking>0 ) {			failed = true;			return;		}		mismatch(input, ttype, follow);		return;	}	public void matchAny(IntStream input) {		errorRecovery = false;		failed = false;		input.consume();	}	/** factor out what to do upon token mismatch so tree parsers can behave	 *  differently.	 */	protected void mismatch(IntStream input, int ttype, BitSet follow)		throws RecognitionException	{		MismatchedTokenException mte =			new MismatchedTokenException(ttype, input);		recoverFromMismatchedToken(input, mte, ttype, follow);	}	/** Report a recognition problem.  Java is not polymorphic on the	 *  argument types so you have to check the type of exception yourself.	 *  That's not very clean but it's better than generating a bunch of	 *  catch clauses in each rule and makes it easy to extend with	 *  more exceptions w/o breaking old code.	 *	 *  This method sets errorRecovery to indicate the parser is recovering	 *  not parsing.  Once in recovery mode, no errors are generated.	 *  To get out of recovery mode, the parser must successfully match	 *  a token (after a resync).  So it will go:	 *	 * 		1. error occurs	 * 		2. enter recovery mode, report error	 * 		3. consume until token found in resynch set	 * 		4. try to resume parsing	 * 		5. next match() will reset errorRecovery mode	 */	public void reportError(RecognitionException e) {		// if we've already reported an error and have not matched a token		// yet successfully, don't report any errors.		if ( errorRecovery ) {			//System.err.print("[SPURIOUS] ");			return;		}		errorRecovery = true;		displayRecognitionError(this.getClass().getName(),								this.getTokenNames(),								e);	}	public static void displayRecognitionError(String name,											   String[] tokenNames,											   RecognitionException e)	{		System.err.print(getRuleInvocationStack(e, name)+						 ": line "+e.line+":"+e.charPositionInLine+" ");		if ( e instanceof MismatchedTokenException ) {			MismatchedTokenException mte = (MismatchedTokenException)e;			String tokenName="<unknown>";			if ( mte.expecting==Token.EOF ) {				tokenName = "EOF";			}			else {				tokenName = tokenNames[mte.expecting];			}			System.err.println("mismatched token: "+							   e.token+							   "; expecting type "+							   tokenName);		}		else if ( e instanceof MismatchedTreeNodeException ) {			MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;			String tokenName="<unknown>";			if ( mtne.expecting==Token.EOF ) {				tokenName = "EOF";			}			else {				tokenName = tokenNames[mtne.expecting];			}			System.err.println("mismatched tree node: "+							   mtne.foundNode+							   "; expecting type "+							   tokenName);		}		else if ( e instanceof NoViableAltException ) {			NoViableAltException nvae = (NoViableAltException)e;			System.err.println(//"decision=<<"+nvae.grammarDecisionDescription+">>"+							   "state "+nvae.stateNumber+							   " (decision="+nvae.decisionNumber+							   ") no viable alt; token="+							   e.token);		}		else if ( e instanceof EarlyExitException ) {			EarlyExitException eee = (EarlyExitException)e;			System.err.println("required (...)+ loop (decision="+							   eee.decisionNumber+							   ") did not match anything; token="+							   e.token);		}		else if ( e instanceof MismatchedSetException ) {			MismatchedSetException mse = (MismatchedSetException)e;			System.err.println("mismatched token: "+							   e.token+							   "; expecting set "+mse.expecting);		}		else if ( e instanceof MismatchedNotSetException ) {			MismatchedNotSetException mse = (MismatchedNotSetException)e;			System.err.println("mismatched token: "+							   e.token+							   "; expecting set "+mse.expecting);		}		else if ( e instanceof FailedPredicateException ) {			FailedPredicateException fpe = (FailedPredicateException)e;			System.err.println("rule "+fpe.ruleName+" failed predicate: {"+							   fpe.predicateText+"}?");		}	}	/** Recover from an error found on the input stream.  Mostly this is	 *  NoViableAlt exceptions, but could be a mismatched token that	 *  the match() routine could not recover from.	 */	public void recover(IntStream input, RecognitionException re) {		if ( lastErrorIndex==input.index() ) {			// uh oh, another error at same token index; must be a case			// where LT(1) is in the recovery token set so nothing is			// consumed; consume a single token so at least to prevent			// an infinite loop; this is a failsafe.			input.consume();		}		lastErrorIndex = input.index();		BitSet followSet = computeErrorRecoverySet();		beginResync();		consumeUntil(input, followSet);		endResync();	}	/** A hook to listen in on the token consumption during error recovery.	 *  The DebugParser subclasses this to fire events to the listenter.	 */	public void beginResync() {	}	public void endResync() {	}	/*  Compute the error recovery set for the current rule.  During	 *  rule invocation, the parser pushes the set of tokens that can	 *  follow that rule reference on the stack; this amounts to	 *  computing FIRST of what follows the rule reference in the	 *  enclosing rule. This local follow set only includes tokens	 *  from within the rule; i.e., the FIRST computation done by	 *  ANTLR stops at the end of a rule.	 *	 *  EXAMPLE	 *	 *  When you find a "no viable alt exception", the input is not	 *  consistent with any of the alternatives for rule r.  The best	 *  thing to do is to consume tokens until you see something that	 *  can legally follow a call to r *or* any rule that called r.	 *  You don't want the exact set of viable next tokens because the	 *  input might just be missing a token--you might consume the	 *  rest of the input looking for one of the missing tokens.	 *	 *  Consider grammar:	 *	 *  a : '[' b ']'	 *    | '(' b ')'	 *    ;	 *  b : c '^' INT ;	 *  c : ID	 *    | INT	 *    ;	 *	 *  At each rule invocation, the set of tokens that could follow	 *  that rule is pushed on a stack.  Here are the various "local"	 *  follow sets:	 *	 *  FOLLOW(b1_in_a) = FIRST(']') = ']'	 *  FOLLOW(b2_in_a) = FIRST(')') = ')'	 *  FOLLOW(c_in_b) = FIRST('^') = '^'	 *	 *  Upon erroneous input "[]", the call chain is	 *	 *  a -> b -> c	 *	 *  and, hence, the follow context stack is:	 *	 *  depth  local follow set     after call to rule	 *    0         <EOF>                    a (from main())	 *    1          ']'                     b	 *    3          '^'                     c	 *	 *  Notice that ')' is not included, because b would have to have	 *  been called from a different context in rule a for ')' to be	 *  included.	 *	 *  For error recovery, we cannot consider FOLLOW(c)	 *  (context-sensitive or otherwise).  We need the combined set of	 *  all context-sensitive FOLLOW sets--the set of all tokens that	 *  could follow any reference in the call chain.  We need to	 *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if	 *  we resync'd to that token, we'd consume until EOF.  We need to	 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.	 *  In this case, for input "[]", LA(1) is in this set so we would	 *  not consume anything and after printing an error rule c would	 *  return normally.  It would not find the required '^' though.	 *  At this point, it gets a mismatched token error and throws an	 *  exception (since LA(1) is not in the viable following token	 *  set).  The rule exception handler tries to recover, but finds	 *  the same recovery set and doesn't consume anything.  Rule b	 *  exits normally returning to rule a.  Now it finds the ']' (and	 *  with the successful match exits errorRecovery mode).	 *	 *  So, you cna see that the parser walks up call chain looking	 *  for the token that was a member of the recovery set.	 *	 *  Errors are not generated in errorRecovery mode.	 *	 *  ANTLR's error recovery mechanism is based upon original ideas:	 *	 *  "Algorithms + Data Structures = Programs" by Niklaus Wirth	 *	 *  and	 *	 *  "A note on error recovery in recursive descent parsers":	 *  http://portal.acm.org/citation.cfm?id=947902.947905	 *	 *  Later, Josef Grosch had some good ideas:	 *	 *  "Efficient and Comfortable Error Recovery in Recursive Descent	 *  Parsers":	 *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip	 *	 *  Like Grosch I implemented local FOLLOW sets that are combined	 *  at run-time upon error to avoid overhead during parsing.	 */	protected BitSet computeErrorRecoverySet() {		return combineFollows(false);	}	/** Compute the context-sensitive FOLLOW set for current rule.	 *  This is set of token types that can follow a specific rule	 *  reference given a specific call chain.  You get the set of	 *  viable tokens that can possibly come next (lookahead depth 1)	 *  given the current call chain.  Contrast this with the	 *  definition of plain FOLLOW for rule r:	 *	 *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}	 *	 *  where x in T* and alpha, beta in V*; T is set of terminals and	 *  V is the set of terminals and nonterminals.  In other words,	 *  FOLLOW(r) is the set of all tokens that can possibly follow	 *  references to r in *any* sentential form (context).  At	 *  runtime, however, we know precisely which context applies as	 *  we have the call chain.  We may compute the exact (rather	 *  than covering superset) set of following tokens.	 *	 *  For example, consider grammar:	 *	 *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}	 *       | "return" expr '.'	 *       ;	 *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}	 *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}	 *       | '(' expr ')'	 *       ;	 *	 *  The FOLLOW sets are all inclusive whereas context-sensitive	 *  FOLLOW sets are precisely what could follow a rule reference.	 *  For input input "i=(3);", here is the derivation:	 *	 *  stat => ID '=' expr ';'	 *       => ID '=' atom ('+' atom)* ';'
baserecognizer.java - 源码说明

本页面展示了「ANTLR(ANother Tool for Language Recognition)它是这样的一种工具」中的 baserecognizer.java 源码文件，采用 Java 编程语言编写，共 717 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Recognition相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?