recognizers.py

来自「antlr最新版本V3源代码」· Python 代码 · 共 1,189 行 · 第 1/3 页

PY
1,189
字号
class TokenSource(object):    """    @brief Abstract baseclass for token producers.        A source of tokens must provide a sequence of tokens via nextToken()    and also must reveal it's source of characters; CommonToken's text is    computed from a CharStream; it only store indices into the char stream.    Errors from the lexer are never passed to the parser.  Either you want    to keep going or you do not upon token recognition error.  If you do not    want to continue lexing then you do not want to continue parsing.  Just    throw an exception not under RecognitionException and Java will naturally    toss you all the way out of the recognizers.  If you want to continue    lexing then you should not throw an exception to the parser--it has already    requested a token.  Keep lexing until you get a valid one.  Just report    errors and keep going, looking for a valid token.    """        def nextToken(self):        """Return a Token object from your input stream (usually a CharStream).                Do not fail/return upon lexing error; keep chewing on the characters        until you get a good one; errors are not passed through to the parser.        """        raise NotImplementedError    class Lexer(BaseRecognizer, TokenSource):    """    @brief Baseclass for generated lexer classes.        A lexer is recognizer that draws input symbols from a character stream.    lexer grammars result in a subclass of this object. A Lexer object    uses simplified match() and error recovery mechanisms in the interest    of speed.    """    def __init__(self, input):        BaseRecognizer.__init__(self)        TokenSource.__init__(self)                # Where is the lexer drawing characters from?        self.input = input        # The goal of all lexer rules/methods is to create a token object.	# This is an instance variable as multiple rules may collaborate to	# create a single token.  nextToken will return this object after	# matching lexer rule(s).  If you subclass to allow multiple token	# emissions, then set this to the last token to be matched or	# something nonnull so that the auto token emit mechanism will not	# emit another token.        self.token = None	# What character index in the stream did the current token start at?	# Needed, for example, to get the text for current token.  Set at	# the start of nextToken.        self.tokenStartCharIndex = -1        # The line on which the first character of the token resides        self.tokenStartLine = -1        # The character position of first character within the line        self.tokenStartCharPositionInLine = -1        # The channel number for the current token        self.channel = DEFAULT_CHANNEL        # The token type for the current token        self.type = INVALID_TOKEN_TYPE                # You can set the text for the current token to override what is in	# the input char buffer.  Use setText() or can set this instance var.        self._text = None    def reset(self):        BaseRecognizer.reset(self) # reset all recognizer state variables        # wack Lexer state variables        self.token = None        self.type = INVALID_TOKEN_TYPE        self.channel = DEFAULT_CHANNEL        self.tokenStartCharIndex = -1        self.tokenStartLine = -1        self.tokenStartCharPositionInLine = -1        self._text = None        if self.input is not None:            self.input.seek(0) # rewind the input    def nextToken(self):        """        Return a token from this source; i.e., match a token on the char	stream.	"""                while 1:            self.token = None            self.channel = DEFAULT_CHANNEL            self.tokenStartCharIndex = self.input.index()            self.tokenStartCharPositionInLine = self.input.charPositionInLine            self.tokenStartLine = self.input.line            self._text = None            if self.input.LA(1) == EOF:                return EOF_TOKEN            try:                self.mTokens()                                if self.token is None:                    self.emit()                                    elif self.token == SKIP_TOKEN:                    continue                return self.token            except RecognitionException, re:                self.reportError(re)                self.recover(re)    def skip(self):        """	Instruct the lexer to skip creating a token for current lexer rule	and look for another token.  nextToken() knows to keep looking when	a lexer rule finishes with token set to SKIP_TOKEN.  Recall that	if token==null at end of any token rule, it creates one for you	and emits it.	"""                self.token = SKIP_TOKEN    def mTokens(self):        """This is the lexer entry point that sets instance var 'token'"""        # abstract method        raise NotImplementedError        def setCharStream(self, input):        """Set the char stream and reset the lexer"""        self.input = None        self.reset()        self.input = input    def emit(self, token=None):        """        The standard method called to automatically emit a token at the	outermost lexical rule.  The token object should point into the	char buffer start..stop.  If there is a text override in 'text',	use that to set the token's text.	"""        if token is None:            token = CommonToken(                input=self.input,                type=self.type,                channel=self.channel,                start=self.tokenStartCharIndex,                stop=self.getCharIndex()-1                )            token.line = self.tokenStartLine            token.text = self.text            token.charPositionInLine = self.tokenStartCharPositionInLine        self.token = token                return token    def match(self, s):        if isinstance(s, basestring):            i = 0            while i < len(s):                if self.input.LA(1) != s[i]:                    if self.backtracking > 0:                        self.failed = True                        return                    mte = MismatchedTokenException(s[i], self.input)                    self.recover(mte)                    raise mte                i += 1                self.input.consume()                self.failed = False        else:            if self.input.LA(1) != s:                if self.backtracking > 0:                    self.failed = True                    return                mte = MismatchedTokenException(s, self.input)                self.recover(mte)                raise mte                    self.input.consume()            self.failed = False                def matchAny(self):        self.input.consume()    def matchRange(self, a, b):        if self.input.LA(1) < a or self.input.LA(1) > b:            if self.backtracking > 0:                self.failed = True                return            mre = MismatchedRangeException(a, b, self.input)            self.recover(mre)            raise mre        self.input.consume()        self.failed = False    def getLine(self):        return self.input.line    def getCharPositionInLine(self):        return self.input.charPositionInLine    def getCharIndex(self):        """What is the index of the current character of lookahead?"""                return self.input.index()    def getText(self):        """        Return the text matched so far for the current token or any        text override.        """        if self._text is not None:            return self._text                return self.input.substring(            self.tokenStartCharIndex,            self.getCharIndex()-1            )    def setText(self, text):        """        Set the complete text of this token; it wipes any previous	changes to the text.	"""        self._text = text    text = property(getText, setText)    def reportError(self, e):        ## TODO: not thought about recovery in lexer yet.        ## # if we've already reported an error and have not matched a token        ## # yet successfully, don't report any errors.        ## if self.errorRecovery:        ##     #System.err.print("[SPURIOUS] ");        ##     return;        ##         ## self.errorRecovery = True        self.displayRecognitionError(self.tokenNames, e)    def getErrorMessage(self, e, tokenNames):        msg = None                if isinstance(e, MismatchedTokenException):            msg = "mismatched character " \                  + self.getCharErrorDisplay(e.c) \                  + " expecting " \                  + self.getCharErrorDisplay(e.expecting)        elif isinstance(e, NoViableAltException):            msg = "no viable alternative at character " \                  + self.getCharErrorDisplay(e.c)        elif isinstance(e, EarlyExitException):            msg = "required (...)+ loop did not match anything at character " \                  + self.getCharErrorDisplay(e.c)                    elif isinstance(e, MismatchedSetException):            msg = "mismatched character " \                  + self.getCharErrorDisplay(e.c) \                  + " expecting set " \                  + repr(e.expecting)        elif isinstance(e, MismatchedNotSetException):            msg = "mismatched character " \                  + self.getCharErrorDisplay(e.c) \                  + " expecting set " \                  + repr(e.expecting)        elif isinstance(e, MismatchedRangeException):            msg = "mismatched character " \                  + self.getCharErrorDisplay(e.c) \                  + " expecting set " \                  + self.getCharErrorDisplay(e.a) \                  + ".." \                  + self.getCharErrorDisplay(e.b)        else:            msg = BaseRecognizer.getErrorMessage(self, e, tokenNames)        return msg    def getCharErrorDisplay(self, c):        if c == EOF:            c = '<EOF>'        return repr(c)    def recover(self, re):        """        Lexers can normally match any char in it's vocabulary after matching	a token, so do the easy thing and just kill a character and hope	it all works out.  You can instead use the rule invocation stack	to do sophisticated error recovery if you are in a fragment rule.	"""        self.input.consume()    def traceIn(self, ruleName, ruleIndex):        inputSymbol = "%s line=%d:%s" % (self.input.LT(1),                                         self.getLine(),                                         self.getCharPositionInLine()                                         )                BaseRecognizer.traceIn(self, ruleName, ruleIndex, inputSymbol)    def traceOut(self, ruleName, ruleIndex):        inputSymbol = "%s line=%d:%s" % (self.input.LT(1),                                         self.getLine(),                                         self.getCharPositionInLine()                                         )        BaseRecognizer.traceOut(self, ruleName, ruleIndex, inputSymbol)class Parser(BaseRecognizer):    """    @brief Baseclass for generated parser classes.    """        def __init__(self, lexer):        BaseRecognizer.__init__(self)        self.setTokenStream(lexer)    def reset(self):        BaseRecognizer.reset(self) # reset all recognizer state variables        if self.input is not None:            self.input.seek(0) # rewind the input    def setTokenStream(self, input):        """Set the token stream and reset the parser"""                self.input = None        self.reset()        self.input = input    def getTokenStream(self):        return self.input    def traceIn(self, ruleName, ruleIndex):        BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1))    def traceOut(self, ruleName, ruleIndex):        BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1))

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?