recognizers.py

来自「antlr最新版本V3源代码」· Python 代码 · 共 1,189 行 · 第 1/3 页
1,189 行
class TokenSource(object):    """    @brief Abstract baseclass for token producers.        A source of tokens must provide a sequence of tokens via nextToken()    and also must reveal it's source of characters; CommonToken's text is    computed from a CharStream; it only store indices into the char stream.    Errors from the lexer are never passed to the parser.  Either you want    to keep going or you do not upon token recognition error.  If you do not    want to continue lexing then you do not want to continue parsing.  Just    throw an exception not under RecognitionException and Java will naturally    toss you all the way out of the recognizers.  If you want to continue    lexing then you should not throw an exception to the parser--it has already    requested a token.  Keep lexing until you get a valid one.  Just report    errors and keep going, looking for a valid token.    """        def nextToken(self):        """Return a Token object from your input stream (usually a CharStream).                Do not fail/return upon lexing error; keep chewing on the characters        until you get a good one; errors are not passed through to the parser.        """        raise NotImplementedError    class Lexer(BaseRecognizer, TokenSource):    """    @brief Baseclass for generated lexer classes.        A lexer is recognizer that draws input symbols from a character stream.    lexer grammars result in a subclass of this object. A Lexer object    uses simplified match() and error recovery mechanisms in the interest    of speed.    """    def __init__(self, input):        BaseRecognizer.__init__(self)        TokenSource.__init__(self)                # Where is the lexer drawing characters from?        self.input = input        # The goal of all lexer rules/methods is to create a token object.	# This is an instance variable as multiple rules may collaborate to	# create a single token.  nextToken will return this object after	# matching lexer rule(s).  If you subclass to allow multiple token	# emissions, then set this to the last token to be matched or	# something nonnull so that the auto token emit mechanism will not	# emit another token.        self.token = None	# What character index in the stream did the current token start at?	# Needed, for example, to get the text for current token.  Set at	# the start of nextToken.        self.tokenStartCharIndex = -1        # The line on which the first character of the token resides        self.tokenStartLine = -1        # The character position of first character within the line        self.tokenStartCharPositionInLine = -1        # The channel number for the current token        self.channel = DEFAULT_CHANNEL        # The token type for the current token        self.type = INVALID_TOKEN_TYPE                # You can set the text for the current token to override what is in	# the input char buffer.  Use setText() or can set this instance var.        self._text = None    def reset(self):        BaseRecognizer.reset(self) # reset all recognizer state variables        # wack Lexer state variables        self.token = None        self.type = INVALID_TOKEN_TYPE        self.channel = DEFAULT_CHANNEL        self.tokenStartCharIndex = -1        self.tokenStartLine = -1        self.tokenStartCharPositionInLine = -1        self._text = None        if self.input is not None:            self.input.seek(0) # rewind the input    def nextToken(self):        """        Return a token from this source; i.e., match a token on the char	stream.	"""                while 1:            self.token = None            self.channel = DEFAULT_CHANNEL            self.tokenStartCharIndex = self.input.index()            self.tokenStartCharPositionInLine = self.input.charPositionInLine            self.tokenStartLine = self.input.line            self._text = None            if self.input.LA(1) == EOF:                return EOF_TOKEN            try:                self.mTokens()                                if self.token is None:                    self.emit()                                    elif self.token == SKIP_TOKEN:                    continue                return self.token            except RecognitionException, re:                self.reportError(re)                self.recover(re)    def skip(self):        """	Instruct the lexer to skip creating a token for current lexer rule	and look for another token.  nextToken() knows to keep looking when	a lexer rule finishes with token set to SKIP_TOKEN.  Recall that	if token==null at end of any token rule, it creates one for you	and emits it.	"""                self.token = SKIP_TOKEN    def mTokens(self):        """This is the lexer entry point that sets instance var 'token'"""        # abstract method        raise NotImplementedError        def setCharStream(self, input):        """Set the char stream and reset the lexer"""        self.input = None        self.reset()        self.input = input    def emit(self, token=None):        """        The standard method called to automatically emit a token at the	outermost lexical rule.  The token object should point into the	char buffer start..stop.  If there is a text override in 'text',	use that to set the token's text.	"""        if token is None:            token = CommonToken(                input=self.input,                type=self.type,                channel=self.channel,                start=self.tokenStartCharIndex,                stop=self.getCharIndex()-1                )            token.line = self.tokenStartLine            token.text = self.text            token.charPositionInLine = self.tokenStartCharPositionInLine        self.token = token                return token    def match(self, s):        if isinstance(s, basestring):            i = 0            while i < len(s):                if self.input.LA(1) != s[i]:                    if self.backtracking > 0:                        self.failed = True                        return                    mte = MismatchedTokenException(s[i], self.input)                    self.recover(mte)                    raise mte                i += 1                self.input.consume()                self.failed = False        else:            if self.input.LA(1) != s:                if self.backtracking > 0:                    self.failed = True                    return                mte = MismatchedTokenException(s, self.input)                self.recover(mte)                raise mte                    self.input.consume()            self.failed = False                def matchAny(self):        self.input.consume()    def matchRange(self, a, b):        if self.input.LA(1) < a or self.input.LA(1) > b:            if self.backtracking > 0:                self.failed = True                return            mre = MismatchedRangeException(a, b, self.input)            self.recover(mre)            raise mre        self.input.consume()        self.failed = False    def getLine(self):        return self.input.line    def getCharPositionInLine(self):        return self.input.charPositionInLine    def getCharIndex(self):        """What is the index of the current character of lookahead?"""                return self.input.index()    def getText(self):        """        Return the text matched so far for the current token or any        text override.        """        if self._text is not None:            return self._text                return self.input.substring(            self.tokenStartCharIndex,            self.getCharIndex()-1            )    def setText(self, text):        """        Set the complete text of this token; it wipes any previous	changes to the text.	"""        self._text = text    text = property(getText, setText)    def reportError(self, e):        ## TODO: not thought about recovery in lexer yet.        ## # if we've already reported an error and have not matched a token        ## # yet successfully, don't report any errors.        ## if self.errorRecovery:        ##     #System.err.print("[SPURIOUS] ");        ##     return;        ##         ## self.errorRecovery = True        self.displayRecognitionError(self.tokenNames, e)    def getErrorMessage(self, e, tokenNames):        msg = None                if isinstance(e, MismatchedTokenException):            msg = "mismatched character " \                  + self.getCharErrorDisplay(e.c) \                  + " expecting " \                  + self.getCharErrorDisplay(e.expecting)        elif isinstance(e, NoViableAltException):            msg = "no viable alternative at character " \                  + self.getCharErrorDisplay(e.c)        elif isinstance(e, EarlyExitException):            msg = "required (...)+ loop did not match anything at character " \                  + self.getCharErrorDisplay(e.c)                    elif isinstance(e, MismatchedSetException):            msg = "mismatched character " \                  + self.getCharErrorDisplay(e.c) \                  + " expecting set " \                  + repr(e.expecting)        elif isinstance(e, MismatchedNotSetException):            msg = "mismatched character " \                  + self.getCharErrorDisplay(e.c) \                  + " expecting set " \                  + repr(e.expecting)        elif isinstance(e, MismatchedRangeException):            msg = "mismatched character " \                  + self.getCharErrorDisplay(e.c) \                  + " expecting set " \                  + self.getCharErrorDisplay(e.a) \                  + ".." \                  + self.getCharErrorDisplay(e.b)        else:            msg = BaseRecognizer.getErrorMessage(self, e, tokenNames)        return msg    def getCharErrorDisplay(self, c):        if c == EOF:            c = '<EOF>'        return repr(c)    def recover(self, re):        """        Lexers can normally match any char in it's vocabulary after matching	a token, so do the easy thing and just kill a character and hope	it all works out.  You can instead use the rule invocation stack	to do sophisticated error recovery if you are in a fragment rule.	"""        self.input.consume()    def traceIn(self, ruleName, ruleIndex):        inputSymbol = "%s line=%d:%s" % (self.input.LT(1),                                         self.getLine(),                                         self.getCharPositionInLine()                                         )                BaseRecognizer.traceIn(self, ruleName, ruleIndex, inputSymbol)    def traceOut(self, ruleName, ruleIndex):        inputSymbol = "%s line=%d:%s" % (self.input.LT(1),                                         self.getLine(),                                         self.getCharPositionInLine()                                         )        BaseRecognizer.traceOut(self, ruleName, ruleIndex, inputSymbol)class Parser(BaseRecognizer):    """    @brief Baseclass for generated parser classes.    """        def __init__(self, lexer):        BaseRecognizer.__init__(self)        self.setTokenStream(lexer)    def reset(self):        BaseRecognizer.reset(self) # reset all recognizer state variables        if self.input is not None:            self.input.seek(0) # rewind the input    def setTokenStream(self, input):        """Set the token stream and reset the parser"""                self.input = None        self.reset()        self.input = input    def getTokenStream(self):        return self.input    def traceIn(self, ruleName, ruleIndex):        BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1))    def traceOut(self, ruleName, ruleIndex):        BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1))
recognizers.py - 源码说明

本页面展示了「antlr最新版本V3源代码」中的 recognizers.py 源码文件，采用 Python 编程语言编写，共 1,189 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫开发者社区收录了大量与ANTLR相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?