lexer.java

来自「ANTLR(ANother Tool for Language Recognit」· Java 代码 · 共 320 行
JAVA
320 行
/* [The "BSD licence"] Copyright (c) 2005-2006 Terence Parr All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright    notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright    notice, this list of conditions and the following disclaimer in the    documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products    derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/package org.antlr.runtime;/** A lexer is recognizer that draws input symbols from a character stream. *  lexer grammars result in a subclass of this object. A Lexer object *  uses simplified match() and error recovery mechanisms in the interest *  of speed. */public abstract class Lexer extends BaseRecognizer implements TokenSource {	/** Where is the lexer drawing characters from? */    protected CharStream input;	/** The goal of all lexer rules/methods is to create a token object.	 *  This is an instance variable as multiple rules may collaborate to	 *  create a single token.  nextToken will return this object after	 *  matching lexer rule(s).  If you subclass to allow multiple token	 *  emissions, then set this to the last token to be matched or	 *  something nonnull so that the auto token emit mechanism will not	 *  emit another token.	 */    protected Token token;	/** What character index in the stream did the current token start at?	 *  Needed, for example, to get the text for current token.  Set at	 *  the start of nextToken. 	 */	protected int tokenStartCharIndex = -1;	/** You can set the text for the current token to override what is in	 *  the input char buffer.  Use setText() or can set this instance var. 	 */	protected String text;	/** We must track the token rule nesting level as we only want to	 *  emit a token automatically at the outermost level so we don't get	 *  two if FLOAT calls INT.  To save code space and time, do not	 *  inc/dec this in fragment rules.	 */	protected int ruleNestingLevel;	public Lexer() {	}	public Lexer(CharStream input) {		this.input = input;	}	/** Return a token from this source; i.e., match a token on the char	 *  stream.	 */    public Token nextToken() {		while (true) {			token = null;			tokenStartCharIndex = getCharIndex();			text = null;			if ( input.LA(1)==CharStream.EOF ) {                return Token.EOF_TOKEN;            }            try {                mTokens();				if ( token!=Token.SKIP_TOKEN ) {					return token;				}			}            catch (RecognitionException re) {                reportError(re);                recover(re);            }        }    }	/** Instruct the lexer to skip creating a token for current lexer rule	 *  and look for another token.  nextToken() knows to keep looking when	 *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that	 *  if token==null at end of any token rule, it creates one for you	 *  and emits it.	 */	public void skip() {		token = Token.SKIP_TOKEN;	}	/** This is the lexer entry point that sets instance var 'token' */	public abstract void mTokens() throws RecognitionException;	/** Set the char stream and reset the lexer */	public void setCharStream(CharStream input) {		this.input = input;		token = null;		tokenStartCharIndex = -1;		ruleNestingLevel = 0;	}	public void emit(Token token) {		this.token = token;	}	/** The standard method called to automatically emit a token at the	 *  outermost lexical rule.  The token object should point into the	 *  char buffer start..stop.  If there is a text override in 'text',	 *  use that to set the token's text.	 */	public Token emit(int tokenType,					  int line, int charPosition,					  int channel,					  int start, int stop)	{		Token t = new CommonToken(input, tokenType, channel, start, stop);		t.setLine(line);		t.setText(text);		t.setCharPositionInLine(charPosition);		emit(t);		return t;	}	public void match(String s) throws MismatchedTokenException {        int i = 0;        while ( i<s.length() ) {            if ( input.LA(1)!=s.charAt(i) ) {				if ( backtracking>0 ) {					failed = true;					return;				}				MismatchedTokenException mte =					new MismatchedTokenException(s.charAt(i), input);				recover(mte);				throw mte;            }            i++;            input.consume();			failed = false;        }    }    public void matchAny() {        input.consume();    }    public void match(int c) throws MismatchedTokenException {        if ( input.LA(1)!=c ) {			if ( backtracking>0 ) {				failed = true;				return;			}			MismatchedTokenException mte =				new MismatchedTokenException(c, input);			recover(mte);			throw mte;        }        input.consume();		failed = false;    }    public void matchRange(int a, int b)		throws MismatchedRangeException	{        if ( input.LA(1)<a || input.LA(1)>b ) {			if ( backtracking>0 ) {				failed = true;				return;			}            MismatchedRangeException mre =				new MismatchedRangeException(a,b,input);			recover(mre);			throw mre;        }        input.consume();		failed = false;    }    public int getLine() {        return input.getLine();    }    public int getCharPositionInLine() {        return input.getCharPositionInLine();    }	/** What is the index of the current character of lookahead? */	public int getCharIndex() {		return input.index();	}	/** Return the text matched so far for the current token or any	 *  text override.	 */	public String getText() {		if ( text!=null ) {			return text;		}		return input.substring(tokenStartCharIndex,getCharIndex()-1);	}	/** Set the complete text of this token; it wipes any previous	 *  changes to the text.	 */	public void setText(String text) {		this.text = text;	}	/** Report a recognition problem.  Java is not polymorphic on the	 *  argument types so you have to check the type of exception yourself.	 *  That's not very clean but it's better than generating a bunch of	 *  catch clauses in each rule and makes it easy to extend with	 *  more exceptions w/o breaking old code.	 */	public void reportError(RecognitionException e) {		/** TODO: not thought about recovery in lexer yet.		 *		// if we've already reported an error and have not matched a token		// yet successfully, don't report any errors.		if ( errorRecovery ) {			//System.err.print("[SPURIOUS] ");			return;		}		errorRecovery = true;		 */		displayRecognitionError(this.getClass().getName(),e);	}	public static void displayRecognitionError(String name,											   RecognitionException e)	{		System.err.print(getRuleInvocationStack(e, name)+						 ": line "+e.line+":"+e.charPositionInLine+" ");		if ( e instanceof MismatchedTokenException ) {			MismatchedTokenException mte = (MismatchedTokenException)e;			System.err.println("mismatched char: '"+							   ((char)e.c)+							   "' on line "+e.line+							   "; expecting char '"+(char)mte.expecting+"'");		}		else if ( e instanceof NoViableAltException ) {			NoViableAltException nvae = (NoViableAltException)e;			System.err.println(nvae.grammarDecisionDescription+							   " state "+nvae.stateNumber+							   " (decision="+nvae.decisionNumber+							   ") no viable alt line "+e.line+":"+e.charPositionInLine+"; char='"+							   ((char)e.c)+"'");		}		else if ( e instanceof EarlyExitException ) {			EarlyExitException eee = (EarlyExitException)e;			System.err.println("required (...)+ loop (decision="+							   eee.decisionNumber+							   ") did not match anything; on line "+							   e.line+":"+e.charPositionInLine+" char="+							   ((char)e.c)+"'");		}		else if ( e instanceof MismatchedSetException ) {			MismatchedSetException mse = (MismatchedSetException)e;			System.err.println("mismatched char: '"+							   ((char)e.c)+							   "' on line "+e.line+							   ":"+e.charPositionInLine+							   "; expecting set "+mse.expecting);		}		else if ( e instanceof MismatchedNotSetException ) {			MismatchedSetException mse = (MismatchedSetException)e;			System.err.println("mismatched char: '"+							   ((char)e.c)+							   "' on line "+e.line+							   ":"+e.charPositionInLine+							   "; expecting set "+mse.expecting);		}		else if ( e instanceof MismatchedRangeException ) {			MismatchedRangeException mre = (MismatchedRangeException)e;			System.err.println("mismatched char: '"+							   ((char)e.c)+							   "' on line "+e.line+							   ":"+e.charPositionInLine+							   "; expecting set '"+(char)mre.a+"'..'"+							   (char)mre.b+"'");		}		else if ( e instanceof FailedPredicateException ) {			FailedPredicateException fpe = (FailedPredicateException)e;			System.err.println("rule "+fpe.ruleName+" failed predicate: {"+							   fpe.predicateText+"}?");					}	}	/** Lexers can normally match any char in it's vocabulary after matching	 *  a token, so do the easy thing and just kill a character and hope	 *  it all works out.  You can instead use the rule invocation stack	 *  to do sophisticated error recovery if you are in a fragment rule.	 */	public void recover(RecognitionException re) {		//System.out.println("consuming char "+(char)input.LA(1)+" during recovery");		//re.printStackTrace();		input.consume();	}}
lexer.java - 源码说明

本页面展示了「ANTLR(ANother Tool for Language Recognition)它是这样的一种工具」中的 lexer.java 源码文件，采用 Java 编程语言编写，共 320 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Recognition相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?