gardensnake.py
来自「M5,一个功能强大的多处理器系统模拟器.很多针对处理器架构,性能的研究都使用它作」· Python 代码 · 共 710 行 · 第 1/2 页
PY
710 行
# GardenSnake - a parser generator demonstration program## This implements a modified version of a subset of Python:# - only 'def', 'return' and 'if' statements# - 'if' only has 'then' clause (no elif nor else)# - single-quoted strings only, content in raw format# - numbers are decimal.Decimal instances (not integers or floats)# - no print statment; use the built-in 'print' function# - only < > == + - / * implemented (and unary + -)# - assignment and tuple assignment work# - no generators of any sort# - no ... well, no quite a lot# Why? I'm thinking about a new indentation-based configuration# language for a project and wanted to figure out how to do it. Once# I got that working I needed a way to test it out. My original AST# was dumb so I decided to target Python's AST and compile it into# Python code. Plus, it's pretty cool that it only took a day or so# from sitting down with Ply to having working code.# This uses David Beazley's Ply from http://www.dabeaz.com/ply/# This work is hereby released into the Public Domain. To view a copy of# the public domain dedication, visit# http://creativecommons.org/licenses/publicdomain/ or send a letter to# Creative Commons, 543 Howard Street, 5th Floor, San Francisco,# California, 94105, USA.## Portions of this work are derived from Python's Grammar definition# and may be covered under the Python copyright and license## Andrew Dalke / Dalke Scientific Software, LLC# 30 August 2006 / Cape Town, South Africa# Changelog:# 30 August - added link to CC license; removed the "swapcase" encoding# Modifications for inclusion in PLY distributionimport syssys.path.insert(0,"../..")from ply import *##### Lexer #######import leximport decimaltokens = ( 'DEF', 'IF', 'NAME', 'NUMBER', # Python decimals 'STRING', # single quoted strings only; syntax of raw strings 'LPAR', 'RPAR', 'COLON', 'EQ', 'ASSIGN', 'LT', 'GT', 'PLUS', 'MINUS', 'MULT', 'DIV', 'RETURN', 'WS', 'NEWLINE', 'COMMA', 'SEMICOLON', 'INDENT', 'DEDENT', 'ENDMARKER', )#t_NUMBER = r'\d+'# taken from decmial.py but without the leading signdef t_NUMBER(t): r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?""" t.value = decimal.Decimal(t.value) return tdef t_STRING(t): r"'([^\\']+|\\'|\\\\)*'" # I think this is right ... t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun return tt_COLON = r':'t_EQ = r'=='t_ASSIGN = r'='t_LT = r'<'t_GT = r'>'t_PLUS = r'\+'t_MINUS = r'-'t_MULT = r'\*'t_DIV = r'/'t_COMMA = r','t_SEMICOLON = r';'# Ply nicely documented how to do this.RESERVED = { "def": "DEF", "if": "IF", "return": "RETURN", }def t_NAME(t): r'[a-zA-Z_][a-zA-Z0-9_]*' t.type = RESERVED.get(t.value, "NAME") return t# Putting this before t_WS let it consume lines with only comments in# them so the latter code never sees the WS part. Not consuming the# newline. Needed for "if 1: #comment"def t_comment(t): r"[ ]*\043[^\n]*" # \043 is '#' pass# Whitespacedef t_WS(t): r' [ ]+ ' if t.lexer.at_line_start and t.lexer.paren_count == 0: return t# Don't generate newline tokens when inside of parenthesis, eg# a = (1,# 2, 3)def t_newline(t): r'\n+' t.lexer.lineno += len(t.value) t.type = "NEWLINE" if t.lexer.paren_count == 0: return tdef t_LPAR(t): r'\(' t.lexer.paren_count += 1 return tdef t_RPAR(t): r'\)' # check for underflow? should be the job of the parser t.lexer.paren_count -= 1 return tdef t_error(t): raise SyntaxError("Unknown symbol %r" % (t.value[0],)) print "Skipping", repr(t.value[0]) t.lexer.skip(1)## I implemented INDENT / DEDENT generation as a post-processing filter# The original lex token stream contains WS and NEWLINE characters.# WS will only occur before any other tokens on a line.# I have three filters. One tags tokens by adding two attributes.# "must_indent" is True if the token must be indented from the# previous code. The other is "at_line_start" which is True for WS# and the first non-WS/non-NEWLINE on a line. It flags the check so# see if the new line has changed indication level.# Python's syntax has three INDENT states# 0) no colon hence no need to indent# 1) "if 1: go()" - simple statements have a COLON but no need for an indent# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indentNO_INDENT = 0MAY_INDENT = 1MUST_INDENT = 2# only care about whitespace at the start of a linedef track_tokens_filter(lexer, tokens): lexer.at_line_start = at_line_start = True indent = NO_INDENT saw_colon = False for token in tokens: token.at_line_start = at_line_start if token.type == "COLON": at_line_start = False indent = MAY_INDENT token.must_indent = False elif token.type == "NEWLINE": at_line_start = True if indent == MAY_INDENT: indent = MUST_INDENT token.must_indent = False elif token.type == "WS": assert token.at_line_start == True at_line_start = True token.must_indent = False else: # A real token; only indent after COLON NEWLINE if indent == MUST_INDENT: token.must_indent = True else: token.must_indent = False at_line_start = False indent = NO_INDENT yield token lexer.at_line_start = at_line_startdef _new_token(type, lineno): tok = lex.LexToken() tok.type = type tok.value = None tok.lineno = lineno return tok# Synthesize a DEDENT tagdef DEDENT(lineno): return _new_token("DEDENT", lineno)# Synthesize an INDENT tagdef INDENT(lineno): return _new_token("INDENT", lineno)# Track the indentation level and emit the right INDENT / DEDENT events.def indentation_filter(tokens): # A stack of indentation levels; will never pop item 0 levels = [0] token = None depth = 0 prev_was_ws = False for token in tokens:## if 1:## print "Process", token,## if token.at_line_start:## print "at_line_start",## if token.must_indent:## print "must_indent",## print # WS only occurs at the start of the line # There may be WS followed by NEWLINE so # only track the depth here. Don't indent/dedent # until there's something real. if token.type == "WS": assert depth == 0 depth = len(token.value) prev_was_ws = True # WS tokens are never passed to the parser continue if token.type == "NEWLINE": depth = 0 if prev_was_ws or token.at_line_start: # ignore blank lines continue # pass the other cases on through yield token continue # then it must be a real token (not WS, not NEWLINE) # which can affect the indentation level prev_was_ws = False if token.must_indent: # The current depth must be larger than the previous level if not (depth > levels[-1]): raise IndentationError("expected an indented block") levels.append(depth) yield INDENT(token.lineno) elif token.at_line_start: # Must be on the same level or one of the previous levels if depth == levels[-1]: # At the same level pass elif depth > levels[-1]: raise IndentationError("indentation increase but not in new block") else: # Back up; but only if it matches a previous level try: i = levels.index(depth) except ValueError: raise IndentationError("inconsistent indentation") for _ in range(i+1, len(levels)): yield DEDENT(token.lineno) levels.pop() yield token ### Finished processing ### # Must dedent any remaining levels if len(levels) > 1: assert token is not None for _ in range(1, len(levels)): yield DEDENT(token.lineno)# The top-level filter adds an ENDMARKER, if requested.# Python's grammar uses it.def filter(lexer, add_endmarker = True): token = None tokens = iter(lexer.token, None) tokens = track_tokens_filter(lexer, tokens) for token in indentation_filter(tokens): yield token if add_endmarker: lineno = 1 if token is not None: lineno = token.lineno yield _new_token("ENDMARKER", lineno)# Combine Ply and my filters into a new lexerclass IndentLexer(object): def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags) self.token_stream = None def input(self, s, add_endmarker=True): self.lexer.paren_count = 0 self.lexer.input(s) self.token_stream = filter(self.lexer, add_endmarker) def token(self): try: return self.token_stream.next() except StopIteration: return None########## Parser (tokens -> AST) ####### also part of Ply#import yacc# I use the Python ASTfrom compiler import ast# Helper functiondef Assign(left, right): names = [] if isinstance(left, ast.Name): # Single assignment on left return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right) elif isinstance(left, ast.Tuple): # List of things - make sure they are Name nodes names = [] for child in left.getChildren(): if not isinstance(child, ast.Name): raise SyntaxError("that assignment not supported") names.append(child.name) ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names] return ast.Assign([ast.AssTuple(ass_list)], right) else: raise SyntaxError("Can't do that yet")
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?