isa_parser.py

来自「M5,一个功能强大的多处理器系统模拟器.很多针对处理器架构,性能的研究都使用它作」· Python 代码 · 共 1,867 行 · 第 1/5 页

PY
1,867
字号
# Copyright (c) 2003, 2004, 2005# The Regents of The University of Michigan# All Rights Reserved## This code is part of the M5 simulator.## Permission is granted to use, copy, create derivative works and# redistribute this software and such derivative works for any# purpose, so long as the copyright notice above, this grant of# permission, and the disclaimer below appear in all copies made; and# so long as the name of The University of Michigan is not used in any# advertising or publicity pertaining to the use or distribution of# this software without specific, written prior authorization.## THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION FROM THE# UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY PURPOSE, AND# WITHOUT WARRANTY BY THE UNIVERSITY OF MICHIGAN OF ANY KIND, EITHER# EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR# PURPOSE. THE REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE# LIABLE FOR ANY DAMAGES, INCLUDING DIRECT, SPECIAL, INDIRECT,# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM# ARISING OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN# IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF SUCH# DAMAGES.## Authors: Steven K. Reinhardt#          Korey Sewellimport osimport sysimport reimport stringimport traceback# get type namesfrom types import *# Prepend the directory where the PLY lex & yacc modules are found# to the search path.  Assumes we're compiling in a subdirectory# of 'build' in the current tree.sys.path[0:0] = [os.environ['M5_PLY']]from ply import lexfrom ply import yacc#######################################################################                                Lexer## The PLY lexer module takes two things as input:# - A list of token names (the string list 'tokens')# - A regular expression describing a match for each token.  The#   regexp for token FOO can be provided in two ways:#   - as a string variable named t_FOO#   - as the doc string for a function named t_FOO.  In this case,#     the function is also executed, allowing an action to be#     associated with each token match.####################################################################### Reserved words.  These are listed separately as they are matched# using the same regexp as generic IDs, but distinguished in the# t_ID() function.  The PLY documentation suggests this approach.reserved = (    'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',    'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',    'OUTPUT', 'SIGNED', 'TEMPLATE'    )# List of tokens.  The lex module requires this.tokens = reserved + (    # identifier    'ID',    # integer literal    'INTLIT',    # string literal    'STRLIT',    # code literal    'CODELIT',    # ( ) [ ] { } < > , ; . : :: *    'LPAREN', 'RPAREN',    'LBRACKET', 'RBRACKET',    'LBRACE', 'RBRACE',    'LESS', 'GREATER', 'EQUALS',    'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',    'ASTERISK',    # C preprocessor directives    'CPPDIRECTIVE'# The following are matched but never returned. commented out to# suppress PLY warning    # newfile directive#    'NEWFILE',    # endfile directive#    'ENDFILE')# Regular expressions for token matchingt_LPAREN           = r'\('t_RPAREN           = r'\)'t_LBRACKET         = r'\['t_RBRACKET         = r'\]'t_LBRACE           = r'\{'t_RBRACE           = r'\}'t_LESS             = r'\<'t_GREATER          = r'\>'t_EQUALS           = r'='t_COMMA            = r','t_SEMI             = r';'t_DOT              = r'\.'t_COLON            = r':'t_DBLCOLON         = r'::'t_ASTERISK	   = r'\*'# Identifiers and reserved wordsreserved_map = { }for r in reserved:    reserved_map[r.lower()] = rdef t_ID(t):    r'[A-Za-z_]\w*'    t.type = reserved_map.get(t.value,'ID')    return t# Integer literaldef t_INTLIT(t):    r'(0x[\da-fA-F]+)|\d+'    try:        t.value = int(t.value,0)    except ValueError:        error(t.lexer.lineno, 'Integer value "%s" too large' % t.value)        t.value = 0    return t# String literal.  Note that these use only single quotes, and# can span multiple lines.def t_STRLIT(t):    r"(?m)'([^'])+'"    # strip off quotes    t.value = t.value[1:-1]    t.lexer.lineno += t.value.count('\n')    return t# "Code literal"... like a string literal, but delimiters are# '{{' and '}}' so they get formatted nicely under emacs c-modedef t_CODELIT(t):    r"(?m)\{\{([^\}]|}(?!\}))+\}\}"    # strip off {{ & }}    t.value = t.value[2:-2]    t.lexer.lineno += t.value.count('\n')    return tdef t_CPPDIRECTIVE(t):    r'^\#[^\#].*\n'    t.lexer.lineno += t.value.count('\n')    return tdef t_NEWFILE(t):    r'^\#\#newfile\s+"[\w/.-]*"'    fileNameStack.push((t.value[11:-1], t.lexer.lineno))    t.lexer.lineno = 0def t_ENDFILE(t):    r'^\#\#endfile'    (old_filename, t.lexer.lineno) = fileNameStack.pop()## The functions t_NEWLINE, t_ignore, and t_error are# special for the lex module.## Newlinesdef t_NEWLINE(t):    r'\n+'    t.lexer.lineno += t.value.count('\n')# Commentsdef t_comment(t):    r'//.*'# Completely ignored characterst_ignore           = ' \t\x0c'# Error handlerdef t_error(t):    error(t.lexer.lineno, "illegal character '%s'" % t.value[0])    t.skip(1)# Build the lexerlexer = lex.lex()#######################################################################                                Parser## Every function whose name starts with 'p_' defines a grammar rule.# The rule is encoded in the function's doc string, while the# function body provides the action taken when the rule is matched.# The argument to each function is a list of the values of the# rule's symbols: t[0] for the LHS, and t[1..n] for the symbols# on the RHS.  For tokens, the value is copied from the t.value# attribute provided by the lexer.  For non-terminals, the value# is assigned by the producing rule; i.e., the job of the grammar# rule function is to set the value for the non-terminal on the LHS# (by assigning to t[0]).###################################################################### The LHS of the first grammar rule is used as the start symbol# (in this case, 'specification').  Note that this rule enforces# that there will be exactly one namespace declaration, with 0 or more# global defs/decls before and after it.  The defs & decls before# the namespace decl will be outside the namespace; those after# will be inside.  The decoder function is always inside the namespace.def p_specification(t):    'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'    global_code = t[1]    isa_name = t[2]    namespace = isa_name + "Inst"    # wrap the decode block as a function definition    t[4].wrap_decode_block('''StaticInstPtr%(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst){    using namespace %(namespace)s;''' % vars(), '}')    # both the latter output blocks and the decode block are in the namespace    namespace_code = t[3] + t[4]    # pass it all back to the caller of yacc.parse()    t[0] = (isa_name, namespace, global_code, namespace_code)# ISA name declaration looks like "namespace <foo>;"def p_name_decl(t):    'name_decl : NAMESPACE ID SEMI'    t[0] = t[2]# 'opt_defs_and_outputs' is a possibly empty sequence of# def and/or output statements.def p_opt_defs_and_outputs_0(t):    'opt_defs_and_outputs : empty'    t[0] = GenCode()def p_opt_defs_and_outputs_1(t):    'opt_defs_and_outputs : defs_and_outputs'    t[0] = t[1]def p_defs_and_outputs_0(t):    'defs_and_outputs : def_or_output'    t[0] = t[1]def p_defs_and_outputs_1(t):    'defs_and_outputs : defs_and_outputs def_or_output'    t[0] = t[1] + t[2]# The list of possible definition/output statements.def p_def_or_output(t):    '''def_or_output : def_format                     | def_bitfield                     | def_bitfield_struct                     | def_template                     | def_operand_types                     | def_operands                     | output_header                     | output_decoder                     | output_exec                     | global_let'''    t[0] = t[1]# Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied# directly to the appropriate output section.# Protect any non-dict-substitution '%'s in a format string# (i.e. those not followed by '(')def protect_non_subst_percents(s):    return re.sub(r'%(?!\()', '%%', s)# Massage output block by substituting in template definitions and bit# operators.  We handle '%'s embedded in the string that don't# indicate template substitutions (or CPU-specific symbols, which get# handled in GenCode) by doubling them first so that the format# operation will reduce them back to single '%'s.def process_output(s):    s = protect_non_subst_percents(s)    # protects cpu-specific symbols too    s = protect_cpu_symbols(s)    return substBitOps(s % templateMap)def p_output_header(t):    'output_header : OUTPUT HEADER CODELIT SEMI'    t[0] = GenCode(header_output = process_output(t[3]))def p_output_decoder(t):    'output_decoder : OUTPUT DECODER CODELIT SEMI'    t[0] = GenCode(decoder_output = process_output(t[3]))def p_output_exec(t):    'output_exec : OUTPUT EXEC CODELIT SEMI'    t[0] = GenCode(exec_output = process_output(t[3]))# global let blocks 'let {{...}}' (Python code blocks) are executed# directly when seen.  Note that these execute in a special variable# context 'exportContext' to prevent the code from polluting this# script's namespace.def p_global_let(t):    'global_let : LET CODELIT SEMI'    updateExportContext()    exportContext["header_output"] = ''    exportContext["decoder_output"] = ''    exportContext["exec_output"] = ''    exportContext["decode_block"] = ''    try:        exec fixPythonIndentation(t[2]) in exportContext    except Exception, exc:        error(t.lexer.lineno,              'error: %s in global let block "%s".' % (exc, t[2]))    t[0] = GenCode(header_output = exportContext["header_output"],                   decoder_output = exportContext["decoder_output"],                   exec_output = exportContext["exec_output"],                   decode_block = exportContext["decode_block"])# Define the mapping from operand type extensions to C++ types and bit# widths (stored in operandTypeMap).def p_def_operand_types(t):    'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'    try:        userDict = eval('{' + t[3] + '}')    except Exception, exc:        error(t.lexer.lineno,              'error: %s in def operand_types block "%s".' % (exc, t[3]))    buildOperandTypeMap(userDict, t.lexer.lineno)    t[0] = GenCode() # contributes nothing to the output C++ file# Define the mapping from operand names to operand classes and other# traits.  Stored in operandNameMap.def p_def_operands(t):    'def_operands : DEF OPERANDS CODELIT SEMI'    if not globals().has_key('operandTypeMap'):        error(t.lexer.lineno,              'error: operand types must be defined before operands')    try:        userDict = eval('{' + t[3] + '}')    except Exception, exc:        error(t.lexer.lineno,              'error: %s in def operands block "%s".' % (exc, t[3]))    buildOperandNameMap(userDict, t.lexer.lineno)    t[0] = GenCode() # contributes nothing to the output C++ file# A bitfield definition looks like:# 'def [signed] bitfield <ID> [<first>:<last>]'# This generates a preprocessor macro in the output file.def p_def_bitfield_0(t):    'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'    expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])    if (t[2] == 'signed'):        expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)    hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)    t[0] = GenCode(header_output = hash_define)# alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'def p_def_bitfield_1(t):    'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'    expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])    if (t[2] == 'signed'):        expr = 'sext<%d>(%s)' % (1, expr)    hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)    t[0] = GenCode(header_output = hash_define)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?