📄 baserecognizer.java
字号:
* * The FOLLOW sets are all inclusive whereas context-sensitive * FOLLOW sets are precisely what could follow a rule reference. * For input input "i=(3);", here is the derivation: * * stat => ID '=' expr ';' * => ID '=' atom ('+' atom)* ';' * => ID '=' '(' expr ')' ('+' atom)* ';' * => ID '=' '(' atom ')' ('+' atom)* ';' * => ID '=' '(' INT ')' ('+' atom)* ';' * => ID '=' '(' INT ')' ';' * * At the "3" token, you'd have a call chain of * * stat -> expr -> atom -> expr -> atom * * What can follow that specific nested ref to atom? Exactly ')' * as you can see by looking at the derivation of this specific * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. * * You want the exact viable token set when recovering from a * token mismatch. Upon token mismatch, if LA(1) is member of * the viable next token set, then you know there is most likely * a missing token in the input stream. "Insert" one by just not * throwing an exception. */ protected BitSet computeContextSensitiveRuleFOLLOW() { return combineFollows(true); } protected BitSet combineFollows(boolean exact) { int top = _fsp; BitSet followSet = new BitSet(); for (int i=top; i>=0; i--) { BitSet localFollowSet = (BitSet) following[i]; /* System.out.println("local follow depth "+i+"="+ localFollowSet.toString(getTokenNames())+")"); */ followSet.orInPlace(localFollowSet); if ( exact && !localFollowSet.member(Token.EOR_TOKEN_TYPE) ) { break; } } followSet.remove(Token.EOR_TOKEN_TYPE); return followSet; } /** Attempt to recover from a single missing or extra token. * * EXTRA TOKEN * * LA(1) is not what we are looking for. If LA(2) has the right token, * however, then assume LA(1) is some extra spurious token. Delete it * and LA(2) as if we were doing a normal match(), which advances the * input. * * MISSING TOKEN * * If current token is consistent with what could come after * ttype then it is ok to "insert" the missing token, else throw * exception For example, Input "i=(3;" is clearly missing the * ')'. When the parser returns from the nested call to expr, it * will have call chain: * * stat -> expr -> atom * * and it will be trying to match the ')' at this point in the * derivation: * * => ID '=' '(' INT ')' ('+' atom)* ';' * ^ * match() will see that ';' doesn't match ')' and report a * mismatched token error. To recover, it sees that LA(1)==';' * is in the set of tokens that can follow the ')' token * reference in rule atom. It can assume that you forgot the ')'. */ public void recoverFromMismatchedToken(IntStream input, RecognitionException e, int ttype, BitSet follow) throws RecognitionException { // if next token is what we are looking for then "delete" this token if ( input.LA(2)==ttype ) { reportError(e); /* System.err.println("recoverFromMismatchedToken deleting "+input.LT(1)+ " since "+input.LT(2)+" is what we want"); */ beginResync(); input.consume(); // simply delete extra token endResync(); input.consume(); // move past ttype token as if all were ok return; } if ( !recoverFromMismatchedElement(input,e,follow) ) { throw e; } } public void recoverFromMismatchedSet(IntStream input, RecognitionException e, BitSet follow) throws RecognitionException { // TODO do single token deletion like above for Token mismatch if ( !recoverFromMismatchedElement(input,e,follow) ) { throw e; } } /** This code is factored out from mismatched token and mismatched set * recovery. It handles "single token insertion" error recovery for * both. No tokens are consumed to recover from insertions. Return * true if recovery was possible else return false. */ protected boolean recoverFromMismatchedElement(IntStream input, RecognitionException e, BitSet follow) { if ( follow==null ) { // we have no information about the follow; we can only consume // a single token and hope for the best return false; } //System.out.println("recoverFromMismatchedElement"); // compute what can follow this grammar element reference if ( follow.member(Token.EOR_TOKEN_TYPE) ) { BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW(); follow = follow.or(viableTokensFollowingThisRule); follow.remove(Token.EOR_TOKEN_TYPE); } // if current token is consistent with what could come after set // then it is ok to "insert" the missing token, else throw exception //System.out.println("viable tokens="+follow.toString(getTokenNames())+")"); if ( follow.member(input.LA(1)) ) { //System.out.println("LT(1)=="+input.LT(1)+" is consistent with what follows; inserting..."); reportError(e); return true; } //System.err.println("nothing to do; throw exception"); return false; } public void consumeUntil(IntStream input, int tokenType) { //System.out.println("consumeUntil "+tokenType); int ttype = input.LA(1); while (ttype != Token.EOF && ttype != tokenType) { input.consume(); ttype = input.LA(1); } } /** Consume tokens until one matches the given token set */ public void consumeUntil(IntStream input, BitSet set) { //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); int ttype = input.LA(1); while (ttype != Token.EOF && !set.member(ttype) ) { //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); input.consume(); ttype = input.LA(1); } } /** Push a rule's follow set using our own hardcoded stack */ protected void pushFollow(BitSet fset) { if ( (_fsp +1)>=following.length ) { BitSet[] f = new BitSet[following.length*2]; System.arraycopy(following, 0, f, 0, following.length-1); following = f; } following[++_fsp] = fset; } /** Return List<String> of the rules in your parser instance * leading up to a call to this method. You could override if * you want more details such as the file/line info of where * in the parser java code a rule is invoked. * * This is very useful for error messages and for context-sensitive * error recovery. */ public List getRuleInvocationStack() { String parserClassName = getClass().getName(); return getRuleInvocationStack(new Throwable(), parserClassName); } /** A more general version of getRuleInvocationStack where you can * pass in, for example, a RecognitionException to get it's rule * stack trace. This routine is shared with all recognizers, hence, * static. * * TODO: move to a utility class or something; weird having lexer call this */ public static List getRuleInvocationStack(Throwable e, String recognizerClassName) { List rules = new ArrayList(); StackTraceElement[] stack = e.getStackTrace(); int i = 0; for (i=stack.length-1; i>=0; i--) { StackTraceElement t = stack[i]; if ( t.getClassName().startsWith("org.antlr.runtime.") ) { continue; // skip support code such as this method } if ( t.getMethodName().equals(NEXT_TOKEN_RULE_NAME) ) { continue; } if ( !t.getClassName().equals(recognizerClassName) ) { continue; // must not be part of this parser } rules.add(t.getMethodName()); } return rules; } public int getBacktrackingLevel() { return backtracking; } /** Used to print out token names like ID during debugging and * error reporting. The generated parsers implement a method * that overrides this to point to their String[] tokenNames. */ public String[] getTokenNames() { return null; } /** For debugging and other purposes, might want the grammar name. * Have ANTLR generate an implementation for this method. */ public String getGrammarFileName() { return null; } /** A convenience method for use most often with template rewrites. * Convert a List<Token> to List<String> */ public List toStrings(List tokens) { if ( tokens==null ) return null; List strings = new ArrayList(tokens.size()); for (int i=0; i<tokens.size(); i++) { strings.add(((Token)tokens.get(i)).getText()); } return strings; } /** Convert a List<RuleReturnScope> to List<StringTemplate> by copying * out the .st property. Useful when converting from * list labels to template attributes: * * a : ids+=rule -> foo(ids={toTemplates($ids)}) * ; * TJP: this is not needed anymore. $ids is a List of templates * when output=template * public List toTemplates(List retvals) { if ( retvals==null ) return null; List strings = new ArrayList(retvals.size()); for (int i=0; i<retvals.size(); i++) { strings.add(((RuleReturnScope)retvals.get(i)).getTemplate()); } return strings; } */ /** Given a rule number and a start token index number, return * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from * start index. If this rule has parsed input starting from the * start index before, then return where the rule stopped parsing. * It returns the index of the last token matched by the rule. * * For now we use a hashtable and just the slow Object-based one. * Later, we can make a special one for ints and also one that * tosses out data after we commit past input position i. */ public int getRuleMemoization(int ruleIndex, int ruleStartIndex) { if ( ruleMemo[ruleIndex]==null ) { ruleMemo[ruleIndex] = new HashMap(); } Integer stopIndexI = (Integer)ruleMemo[ruleIndex].get(new Integer(ruleStartIndex)); if ( stopIndexI==null ) { return MEMO_RULE_UNKNOWN; } return stopIndexI.intValue(); } /** Has this rule already parsed input at the current index in the * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. * If we attempted but failed to parse properly before, return * MEMO_RULE_FAILED. * * This method has a side-effect: if we have seen this input for * this rule and successfully parsed before, then seek ahead to * 1 past the stop token matched for this rule last time. */ public boolean alreadyParsedRule(IntStream input, int ruleIndex) { int stopIndex = getRuleMemoization(ruleIndex, input.index()); if ( stopIndex==MEMO_RULE_UNKNOWN ) { return false; } if ( stopIndex==MEMO_RULE_FAILED ) { //System.out.println("rule "+ruleIndex+" will never succeed"); failed=true; } else { //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+failed); input.seek(stopIndex+1); // jump to one past stop token } return true; } /** Record whether or not this rule parsed the input at this position * successfully. Use a standard java hashtable for now. */ public void memoize(IntStream input, int ruleIndex, int ruleStartIndex) { int stopTokenIndex = failed?MEMO_RULE_FAILED:input.index()-1; if ( ruleMemo[ruleIndex]!=null ) { ruleMemo[ruleIndex].put( new Integer(ruleStartIndex), new Integer(stopTokenIndex) ); } } /** Assume failure in case a rule bails out with an exception. * Reset to rule stop index if successful. public void memoizeFailure(int ruleIndex, int ruleStartIndex) { ruleMemo[ruleIndex].put( new Integer(ruleStartIndex), MEMO_RULE_FAILED_I ); } */ /** After successful completion of a rule, record success for this * rule and that it can skip ahead next time it attempts this * rule for this input position. public void memoizeSuccess(IntStream input, int ruleIndex, int ruleStartIndex) { ruleMemo[ruleIndex].put( new Integer(ruleStartIndex), new Integer(input.index()-1) ); } */ /** return how many rule/input-index pairs there are in total. * TODO: this includes synpreds. :( */ public int getRuleMemoizationCacheSize() { int n = 0; for (int i = 0; ruleMemo!=null && i < ruleMemo.length; i++) { Map ruleMap = ruleMemo[i]; if ( ruleMap!=null ) { n += ruleMap.size(); // how many input indexes are recorded? } } return n; } public void traceIn(String ruleName, int ruleIndex, Object inputSymbol) { System.out.print("enter "+ruleName+" "+inputSymbol); if ( failed ) { System.out.println(" failed="+failed); } if ( backtracking>0 ) { System.out.print(" backtracking="+backtracking); } System.out.println(); } public void traceOut(String ruleName, int ruleIndex, Object inputSymbol) { System.out.print("exit "+ruleName+" "+inputSymbol); if ( failed ) { System.out.println(" failed="+failed); } if ( backtracking>0 ) { System.out.print(" backtracking="+backtracking); } System.out.println(); } /** A syntactic predicate. Returns true/false depending on whether * the specified grammar fragment matches the current input stream. * This resets the failed instance var afterwards. public boolean synpred(IntStream input, GrammarFragmentPtr fragment) { //int i = input.index(); //System.out.println("begin backtracking="+backtracking+" @"+i+"="+((CommonTokenStream)input).LT(1)); backtracking++; beginBacktrack(backtracking); int start = input.mark(); try {fragment.invoke();} catch (RecognitionException re) { System.err.println("impossible: "+re); } boolean success = !failed; input.rewind(start); endBacktrack(backtracking, success); backtracking--; //System.out.println("end backtracking="+backtracking+": "+(failed?"FAILED":"SUCCEEDED")+" @"+input.index()+" should be "+i); failed=false; return success; } */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -