gardensnake.py

来自「M5,一个功能强大的多处理器系统模拟器.很多针对处理器架构,性能的研究都使用它作」· Python 代码 · 共 710 行 · 第 1/2 页

PY
710
字号
# GardenSnake - a parser generator demonstration program## This implements a modified version of a subset of Python:#  - only 'def', 'return' and 'if' statements#  - 'if' only has 'then' clause (no elif nor else)#  - single-quoted strings only, content in raw format#  - numbers are decimal.Decimal instances (not integers or floats)#  - no print statment; use the built-in 'print' function#  - only < > == + - / * implemented (and unary + -)#  - assignment and tuple assignment work#  - no generators of any sort#  - no ... well, no quite a lot# Why?  I'm thinking about a new indentation-based configuration# language for a project and wanted to figure out how to do it.  Once# I got that working I needed a way to test it out.  My original AST# was dumb so I decided to target Python's AST and compile it into# Python code.  Plus, it's pretty cool that it only took a day or so# from sitting down with Ply to having working code.# This uses David Beazley's Ply from http://www.dabeaz.com/ply/# This work is hereby released into the Public Domain. To view a copy of# the public domain dedication, visit# http://creativecommons.org/licenses/publicdomain/ or send a letter to# Creative Commons, 543 Howard Street, 5th Floor, San Francisco,# California, 94105, USA.## Portions of this work are derived from Python's Grammar definition# and may be covered under the Python copyright and license##          Andrew Dalke / Dalke Scientific Software, LLC#             30 August 2006 / Cape Town, South Africa# Changelog:#  30 August - added link to CC license; removed the "swapcase" encoding# Modifications for inclusion in PLY distributionimport syssys.path.insert(0,"../..")from ply import *##### Lexer #######import leximport decimaltokens = (    'DEF',    'IF',    'NAME',    'NUMBER',  # Python decimals    'STRING',  # single quoted strings only; syntax of raw strings    'LPAR',    'RPAR',    'COLON',    'EQ',    'ASSIGN',    'LT',    'GT',    'PLUS',    'MINUS',    'MULT',    'DIV',    'RETURN',    'WS',    'NEWLINE',    'COMMA',    'SEMICOLON',    'INDENT',    'DEDENT',    'ENDMARKER',    )#t_NUMBER = r'\d+'# taken from decmial.py but without the leading signdef t_NUMBER(t):    r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""    t.value = decimal.Decimal(t.value)    return tdef t_STRING(t):    r"'([^\\']+|\\'|\\\\)*'"  # I think this is right ...    t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun    return tt_COLON = r':'t_EQ = r'=='t_ASSIGN = r'='t_LT = r'<'t_GT = r'>'t_PLUS = r'\+'t_MINUS = r'-'t_MULT = r'\*'t_DIV = r'/'t_COMMA = r','t_SEMICOLON = r';'# Ply nicely documented how to do this.RESERVED = {  "def": "DEF",  "if": "IF",  "return": "RETURN",  }def t_NAME(t):    r'[a-zA-Z_][a-zA-Z0-9_]*'    t.type = RESERVED.get(t.value, "NAME")    return t# Putting this before t_WS let it consume lines with only comments in# them so the latter code never sees the WS part.  Not consuming the# newline.  Needed for "if 1: #comment"def t_comment(t):    r"[ ]*\043[^\n]*"  # \043 is '#'    pass# Whitespacedef t_WS(t):    r' [ ]+ '    if t.lexer.at_line_start and t.lexer.paren_count == 0:        return t# Don't generate newline tokens when inside of parenthesis, eg#   a = (1,#        2, 3)def t_newline(t):    r'\n+'    t.lexer.lineno += len(t.value)    t.type = "NEWLINE"    if t.lexer.paren_count == 0:        return tdef t_LPAR(t):    r'\('    t.lexer.paren_count += 1    return tdef t_RPAR(t):    r'\)'    # check for underflow?  should be the job of the parser    t.lexer.paren_count -= 1    return tdef t_error(t):    raise SyntaxError("Unknown symbol %r" % (t.value[0],))    print "Skipping", repr(t.value[0])    t.lexer.skip(1)## I implemented INDENT / DEDENT generation as a post-processing filter# The original lex token stream contains WS and NEWLINE characters.# WS will only occur before any other tokens on a line.# I have three filters.  One tags tokens by adding two attributes.# "must_indent" is True if the token must be indented from the# previous code.  The other is "at_line_start" which is True for WS# and the first non-WS/non-NEWLINE on a line.  It flags the check so# see if the new line has changed indication level.# Python's syntax has three INDENT states#  0) no colon hence no need to indent#  1) "if 1: go()" - simple statements have a COLON but no need for an indent#  2) "if 1:\n  go()" - complex statements have a COLON NEWLINE and must indentNO_INDENT = 0MAY_INDENT = 1MUST_INDENT = 2# only care about whitespace at the start of a linedef track_tokens_filter(lexer, tokens):    lexer.at_line_start = at_line_start = True    indent = NO_INDENT    saw_colon = False    for token in tokens:        token.at_line_start = at_line_start        if token.type == "COLON":            at_line_start = False            indent = MAY_INDENT            token.must_indent = False        elif token.type == "NEWLINE":            at_line_start = True            if indent == MAY_INDENT:                indent = MUST_INDENT            token.must_indent = False        elif token.type == "WS":            assert token.at_line_start == True            at_line_start = True            token.must_indent = False        else:            # A real token; only indent after COLON NEWLINE            if indent == MUST_INDENT:                token.must_indent = True            else:                token.must_indent = False            at_line_start = False            indent = NO_INDENT        yield token        lexer.at_line_start = at_line_startdef _new_token(type, lineno):    tok = lex.LexToken()    tok.type = type    tok.value = None    tok.lineno = lineno    return tok# Synthesize a DEDENT tagdef DEDENT(lineno):    return _new_token("DEDENT", lineno)# Synthesize an INDENT tagdef INDENT(lineno):    return _new_token("INDENT", lineno)# Track the indentation level and emit the right INDENT / DEDENT events.def indentation_filter(tokens):    # A stack of indentation levels; will never pop item 0    levels = [0]    token = None    depth = 0    prev_was_ws = False    for token in tokens:##        if 1:##            print "Process", token,##            if token.at_line_start:##                print "at_line_start",##            if token.must_indent:##                print "must_indent",##            print        # WS only occurs at the start of the line        # There may be WS followed by NEWLINE so        # only track the depth here.  Don't indent/dedent        # until there's something real.        if token.type == "WS":            assert depth == 0            depth = len(token.value)            prev_was_ws = True            # WS tokens are never passed to the parser            continue        if token.type == "NEWLINE":            depth = 0            if prev_was_ws or token.at_line_start:                # ignore blank lines                continue            # pass the other cases on through            yield token            continue        # then it must be a real token (not WS, not NEWLINE)        # which can affect the indentation level        prev_was_ws = False        if token.must_indent:            # The current depth must be larger than the previous level            if not (depth > levels[-1]):                raise IndentationError("expected an indented block")            levels.append(depth)            yield INDENT(token.lineno)        elif token.at_line_start:            # Must be on the same level or one of the previous levels            if depth == levels[-1]:                # At the same level                pass            elif depth > levels[-1]:                raise IndentationError("indentation increase but not in new block")            else:                # Back up; but only if it matches a previous level                try:                    i = levels.index(depth)                except ValueError:                    raise IndentationError("inconsistent indentation")                for _ in range(i+1, len(levels)):                    yield DEDENT(token.lineno)                    levels.pop()        yield token    ### Finished processing ###    # Must dedent any remaining levels    if len(levels) > 1:        assert token is not None        for _ in range(1, len(levels)):            yield DEDENT(token.lineno)# The top-level filter adds an ENDMARKER, if requested.# Python's grammar uses it.def filter(lexer, add_endmarker = True):    token = None    tokens = iter(lexer.token, None)    tokens = track_tokens_filter(lexer, tokens)    for token in indentation_filter(tokens):        yield token    if add_endmarker:        lineno = 1        if token is not None:            lineno = token.lineno        yield _new_token("ENDMARKER", lineno)# Combine Ply and my filters into a new lexerclass IndentLexer(object):    def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):        self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags)        self.token_stream = None    def input(self, s, add_endmarker=True):        self.lexer.paren_count = 0        self.lexer.input(s)        self.token_stream = filter(self.lexer, add_endmarker)    def token(self):        try:            return self.token_stream.next()        except StopIteration:            return None##########   Parser (tokens -> AST) ####### also part of Ply#import yacc# I use the Python ASTfrom compiler import ast# Helper functiondef Assign(left, right):    names = []    if isinstance(left, ast.Name):        # Single assignment on left        return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)    elif isinstance(left, ast.Tuple):        # List of things - make sure they are Name nodes        names = []        for child in left.getChildren():            if not isinstance(child, ast.Name):                raise SyntaxError("that assignment not supported")            names.append(child.name)        ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]        return ast.Assign([ast.AssTuple(ass_list)], right)    else:        raise SyntaxError("Can't do that yet")

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?