📄 yacc.py
字号:
#-----------------------------------------------------------------------------# ply: yacc.py## Author(s): David M. Beazley (dave@dabeaz.com)## Copyright (C) 2001-2007, David M. Beazley## This library is free software; you can redistribute it and/or# modify it under the terms of the GNU Lesser General Public# License as published by the Free Software Foundation; either# version 2.1 of the License, or (at your option) any later version.## This library is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU# Lesser General Public License for more details.## You should have received a copy of the GNU Lesser General Public# License along with this library; if not, write to the Free Software# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA## See the file COPYING for a complete copy of the LGPL.### This implements an LR parser that is constructed from grammar rules defined# as Python functions. The grammer is specified by supplying the BNF inside# Python documentation strings. The inspiration for this technique was borrowed# from John Aycock's Spark parsing system. PLY might be viewed as cross between# Spark and the GNU bison utility.## The current implementation is only somewhat object-oriented. The# LR parser itself is defined in terms of an object (which allows multiple# parsers to co-exist). However, most of the variables used during table# construction are defined in terms of global variables. Users shouldn't# notice unless they are trying to define multiple parsers at the same# time using threads (in which case they should have their head examined).## This implementation supports both SLR and LALR(1) parsing. LALR(1)# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced# by the more efficient DeRemer and Pennello algorithm.## :::::::: WARNING :::::::## Construction of LR parsing tables is fairly complicated and expensive.# To make this module run fast, a *LOT* of work has been put into# optimization---often at the expensive of readability and what might# consider to be good Python "coding style." Modify the code at your# own risk!# ----------------------------------------------------------------------------__version__ = "2.3"#-----------------------------------------------------------------------------# === User configurable parameters ===## Change these to modify the default behavior of yacc (if you wish)#-----------------------------------------------------------------------------yaccdebug = 1 # Debugging mode. If set, yacc generates a # a 'parser.out' file in the current directorydebug_file = 'parser.out' # Default name of the debugging filetab_module = 'parsetab' # Default name of the table moduledefault_lr = 'LALR' # Default LR table generation methoderror_count = 3 # Number of symbols that must be shifted to leave recovery modeimport re, types, sys, cStringIO, md5, os.path# Exception raised for yacc-related errorsclass YaccError(Exception): pass# Available instance types. This is used when parsers are defined by a class.# it's a little funky because I want to preserve backwards compatibility# with Python 2.0 where types.ObjectType is undefined.try: _INSTANCETYPE = (types.InstanceType, types.ObjectType)except AttributeError: _INSTANCETYPE = types.InstanceType class object: pass # Note: needed if no new-style classes present#-----------------------------------------------------------------------------# === LR Parsing Engine ===## The following classes are used for the LR parser itself. These are not# used during table construction and are independent of the actual LR# table generation algorithm#-----------------------------------------------------------------------------# This class is used to hold non-terminal grammar symbols during parsing.# It normally has the following attributes set:# .type = Grammar symbol type# .value = Symbol value# .lineno = Starting line number# .endlineno = Ending line number (optional, set automatically)# .lexpos = Starting lex position# .endlexpos = Ending lex position (optional, set automatically)class YaccSymbol(object): def __str__(self): return self.type def __repr__(self): return str(self)# This class is a wrapper around the objects actually passed to each# grammar rule. Index lookup and assignment actually assign the# .value attribute of the underlying YaccSymbol object.# The lineno() method returns the line number of a given# item (or 0 if not defined). The linespan() method returns# a tuple of (startline,endline) representing the range of lines# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)# representing the range of positional information for a symbol.class YaccProduction: def __init__(self,s,stack=None): self.slice = s self.pbstack = [] self.stack = stack def __getitem__(self,n): if n >= 0: return self.slice[n].value else: return self.stack[n].value def __setitem__(self,n,v): self.slice[n].value = v def __getslice__(self,i,j): return [s.value for s in self.slice[i:j]] def __len__(self): return len(self.slice) def lineno(self,n): return getattr(self.slice[n],"lineno",0) def linespan(self,n): startline = getattr(self.slice[n],"lineno",0) endline = getattr(self.slice[n],"endlineno",startline) return startline,endline def lexpos(self,n): return getattr(self.slice[n],"lexpos",0) def lexspan(self,n): startpos = getattr(self.slice[n],"lexpos",0) endpos = getattr(self.slice[n],"endlexpos",startpos) return startpos,endpos def pushback(self,n): if n <= 0: raise ValueError, "Expected a positive value" if n > (len(self.slice)-1): raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1) for i in range(0,n): self.pbstack.append(self.slice[-i-1])# The LR Parsing engine. This is defined as a class so that multiple parsers# can exist in the same process. A user never instantiates this directly.# Instead, the global yacc() function should be used to create a suitable Parser# object.class Parser: def __init__(self,magic=None): # This is a hack to keep users from trying to instantiate a Parser # object directly. if magic != "xyzzy": raise YaccError, "Can't instantiate Parser. Use yacc() instead." # Reset internal state self.productions = None # List of productions self.errorfunc = None # Error handling function self.action = { } # LR Action table self.goto = { } # LR goto table self.require = { } # Attribute require table self.method = "Unknown LR" # Table construction method used def errok(self): self.errorok = 1 def restart(self): del self.statestack[:] del self.symstack[:] sym = YaccSymbol() sym.type = '$end' self.symstack.append(sym) self.statestack.append(0) def parse(self,input=None,lexer=None,debug=0,tracking=0): lookahead = None # Current lookahead symbol lookaheadstack = [ ] # Stack of lookahead symbols actions = self.action # Local reference to action table goto = self.goto # Local reference to goto table prod = self.productions # Local reference to production list pslice = YaccProduction(None) # Production object passed to grammar rules errorcount = 0 # Used during error recovery # If no lexer was given, we will try to use the lex module if not lexer: import lex lexer = lex.lexer pslice.lexer = lexer pslice.parser = self # If input was supplied, pass to lexer if input: lexer.input(input) # Tokenize function get_token = lexer.token statestack = [ ] # Stack of parsing states self.statestack = statestack symstack = [ ] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production errtoken = None # Err token # The start state is assumed to be (0,$end) statestack.append(0) sym = YaccSymbol() sym.type = '$end' symstack.append(sym) state = 0 while 1: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer if debug > 1: print 'state', state if not lookahead: if not lookaheadstack: lookahead = get_token() # Get the next token else: lookahead = lookaheadstack.pop() if not lookahead: lookahead = YaccSymbol() lookahead.type = '$end' if debug: errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip() # Check the action table ltype = lookahead.type t = actions[state].get(ltype) if debug > 1: print 'action', t if t is not None: if t > 0: # shift a symbol on the stack if ltype == '$end': # Error, end of input sys.stderr.write("yacc: Parse error. EOF\n") return statestack.append(t) state = t if debug > 1: sys.stderr.write("%-60s shift state %s\n" % (errorlead, t)) symstack.append(lookahead) lookahead = None # Decrease error count on successful shift if errorcount: errorcount -=1 continue if t < 0: # reduce a symbol on the stack, emit a production p = prod[-t] pname = p.name plen = p.len # Get production function sym = YaccSymbol() sym.type = pname # Production name sym.value = None if debug > 1: sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t)) if plen: targ = symstack[-plen-1:] targ[0] = sym if tracking: t1 = targ[1] sym.lineno = t1.lineno sym.lexpos = t1.lexpos t1 = targ[-1] sym.endlineno = getattr(t1,"endlineno",t1.lineno) sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) del symstack[-plen:] del statestack[-plen:] else: if tracking: sym.lineno = lexer.lineno sym.lexpos = lexer.lexpos targ = [ sym ] pslice.slice = targ # Call the grammar rule with our special slice object p.func(pslice) # If there was a pushback, put that on the stack if pslice.pbstack: lookaheadstack.append(lookahead) for _t in pslice.pbstack: lookaheadstack.append(_t) lookahead = None pslice.pbstack = [] symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) continue if t == 0: n = symstack[-1] return getattr(n,"value",None) if t == None: if debug: sys.stderr.write(errorlead + "\n") # We have some kind of parsing error here. To handle # this, we are going to push the current token onto # the tokenstack and replace it with an 'error' token. # If there are any synchronization rules, they may # catch it. # # In addition to pushing the error token, we call call # the user defined p_error() function if this is the # first syntax error. This function is only called if # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count self.errorok = 0 errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: global errok,token,restart errok = self.errok # Set some special functions available in error recovery token = get_token restart = self.restart tok = self.errorfunc(errtoken) del errok, token, restart # Delete special functions if self.errorok: # User must have done some kind of panic # mode recovery on their own. The
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -