📄 yacc.py
字号:
# returned token is the next lookahead lookahead = tok errtoken = None continue else: if errtoken: if hasattr(errtoken,"lineno"): lineno = lookahead.lineno else: lineno = 0 if lineno: sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) else: sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) else: sys.stderr.write("yacc: Parse error in input. EOF\n") return else: errorcount = error_count # case 1: the statestack only has 1 entry on it. If we're in this state, the # entire parse has been rolled back and we're completely hosed. The token is # discarded and we just keep going. if len(statestack) <= 1 and lookahead.type != '$end': lookahead = None errtoken = None # Nuke the pushback stack del lookaheadstack[:] continue # case 2: the statestack has a couple of entries on it, but we're # at the end of the file. nuke the top entry and generate an error token # Start nuking entries on the stack if lookahead.type == '$end': # Whoa. We're really hosed here. Bail out return if lookahead.type != 'error': sym = symstack[-1] if sym.type == 'error': # Hmmm. Error is on top of stack, we'll just nuke input # symbol and continue lookahead = None continue t = YaccSymbol() t.type = 'error' if hasattr(lookahead,"lineno"): t.lineno = lookahead.lineno t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: symstack.pop() statestack.pop() continue # Call an error function here raise RuntimeError, "yacc: internal parser error!!!\n"# -----------------------------------------------------------------------------# === Parser Construction ===## The following functions and variables are used to implement the yacc() function# itself. This is pretty hairy stuff involving lots of error checking,# construction of LR items, kernels, and so forth. Although a lot of# this work is done using global variables, the resulting Parser object# is completely self contained--meaning that it is safe to repeatedly# call yacc() with different grammars in the same application.# -----------------------------------------------------------------------------# -----------------------------------------------------------------------------# validate_file()## This function checks to see if there are duplicated p_rulename() functions# in the parser module file. Without this function, it is really easy for# users to make mistakes by cutting and pasting code fragments (and it's a real# bugger to try and figure out why the resulting parser doesn't work). Therefore,# we just do a little regular expression pattern matching of def statements# to try and detect duplicates.# -----------------------------------------------------------------------------def validate_file(filename): base,ext = os.path.splitext(filename) if ext != '.py': return 1 # No idea. Assume it's okay. try: f = open(filename) lines = f.readlines() f.close() except IOError: return 1 # Oh well # Match def p_funcname( fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') counthash = { } linen = 1 noerror = 1 for l in lines: m = fre.match(l) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev)) noerror = 0 linen += 1 return noerror# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.def validate_dict(d): for n,v in d.items(): if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue if n[0:2] == 't_': continue if n[0:2] == 'p_': sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n) if 1 and isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1: try: doc = v.__doc__.split(" ") if doc[1] == ':': sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.func_code.co_filename, v.func_code.co_firstlineno,n)) except StandardError: pass# -----------------------------------------------------------------------------# === GRAMMAR FUNCTIONS ===## The following global variables and functions are used to store, manipulate,# and verify the grammar rules specified by the user.# -----------------------------------------------------------------------------# Initialize all of the global variables used during grammar constructiondef initialize_vars(): global Productions, Prodnames, Prodmap, Terminals global Nonterminals, First, Follow, Precedence, LRitems global Errorfunc, Signature, Requires Productions = [None] # A list of all of the productions. The first # entry is always reserved for the purpose of # building an augmented grammar Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all # productions of that nonterminal. Prodmap = { } # A dictionary that is only used to detect duplicate # productions. Terminals = { } # A dictionary mapping the names of terminal symbols to a # list of the rules where they are used. Nonterminals = { } # A dictionary mapping names of nonterminals to a list # of rule numbers where they are used. First = { } # A dictionary of precomputed FIRST(x) symbols Follow = { } # A dictionary of precomputed FOLLOW(x) symbols Precedence = { } # Precedence rules for each terminal. Contains tuples of the # form ('right',level) or ('nonassoc', level) or ('left',level) LRitems = [ ] # A list of all LR items for the grammar. These are the # productions with the "dot" like E -> E . PLUS E Errorfunc = None # User defined error handler Signature = md5.new() # Digital signature of the grammar rules, precedence # and other information. Used to determined when a # parsing table needs to be regenerated. Requires = { } # Requires list # File objects used when creating the parser.out debugging file global _vf, _vfc _vf = cStringIO.StringIO() _vfc = cStringIO.StringIO()# -----------------------------------------------------------------------------# class Production:## This class stores the raw information about a single production or grammar rule.# It has a few required attributes:## name - Name of the production (nonterminal)# prod - A list of symbols making up its production# number - Production number.## In addition, a few additional attributes are used to help with debugging or# optimization of table generation.## file - File where production action is defined.# lineno - Line number where action is defined# func - Action function# prec - Precedence level# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E'# then lr_next refers to 'E -> E PLUS . E'# lr_index - LR item index (location of the ".") in the prod list.# lookaheads - LALR lookahead symbols for this item# len - Length of the production (number of symbols on right hand side)# -----------------------------------------------------------------------------class Production: def __init__(self,**kw): for k,v in kw.items(): setattr(self,k,v) self.lr_index = -1 self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure self.lr1_added = 0 # Flag indicating whether or not added to LR1 self.usyms = [ ] self.lookaheads = { } self.lk_added = { } self.setnumbers = [ ] def __str__(self): if self.prod: s = "%s -> %s" % (self.name," ".join(self.prod)) else: s = "%s -> <empty>" % self.name return s def __repr__(self): return str(self) # Compute lr_items from the production def lr_item(self,n): if n > len(self.prod): return None p = Production() p.name = self.name p.prod = list(self.prod) p.number = self.number p.lr_index = n p.lookaheads = { } p.setnumbers = self.setnumbers p.prod.insert(n,".") p.prod = tuple(p.prod) p.len = len(p.prod) p.usyms = self.usyms # Precompute list of productions immediately following try: p.lrafter = Prodnames[p.prod[n+1]] except (IndexError,KeyError),e: p.lrafter = [] try: p.lrbefore = p.prod[n-1] except IndexError: p.lrbefore = None return pclass MiniProduction: pass# regex matching identifiers_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')# -----------------------------------------------------------------------------# add_production()## Given an action function, this function assembles a production rule.# The production rule is assumed to be found in the function's docstring.# This rule has the general syntax:## name1 ::= production1# | production2# | production3# ...# | productionn# name2 ::= production1# | production2# ...# -----------------------------------------------------------------------------def add_production(f,file,line,prodname,syms): if Terminals.has_key(prodname): sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname)) return -1 if prodname == 'error': sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname)) return -1 if not _is_identifier.match(prodname): sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname)) return -1 for x in range(len(syms)): s = syms[x] if s[0] in "'\"": try: c = eval(s) if (len(c) > 1): sys.stderr.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line,s, prodname)) return -1 if not Terminals.has_key(c): Terminals[c] = [] syms[x] = c continue except SyntaxError: pass if not _is_identifier.match(s) and s != '%prec': sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname)) return -1 # See if the rule is already in the rulemap map = "%s -> %s" % (prodname,syms) if Prodmap.has_key(map): m = Prodmap[map] sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m)) sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line)) return -1 p = Production() p.name = prodname p.prod = syms p.file = file p.line = line p.func = f p.number = len(Productions) Productions.append(p) Prodmap[map] = p if not Nonterminals.has_key(prodname): Nonterminals[prodname] = [ ] # Add all terminals to Terminals i = 0 while i < len(p.prod): t = p.prod[i] if t == '%prec': try: precname = p.prod[i+1] except IndexError: sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line)) return -1 prec = Precedence.get(precname,None) if not prec: sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname)) return -1 else: p.prec = prec del p.prod[i] del p.prod[i] continue if Terminals.has_key(t):
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -