isa_parser.py
来自「M5,一个功能强大的多处理器系统模拟器.很多针对处理器架构,性能的研究都使用它作」· Python 代码 · 共 1,867 行 · 第 1/5 页
PY
1,867 行
# Copyright (c) 2003, 2004, 2005# The Regents of The University of Michigan# All Rights Reserved## This code is part of the M5 simulator.## Permission is granted to use, copy, create derivative works and# redistribute this software and such derivative works for any# purpose, so long as the copyright notice above, this grant of# permission, and the disclaimer below appear in all copies made; and# so long as the name of The University of Michigan is not used in any# advertising or publicity pertaining to the use or distribution of# this software without specific, written prior authorization.## THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION FROM THE# UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY PURPOSE, AND# WITHOUT WARRANTY BY THE UNIVERSITY OF MICHIGAN OF ANY KIND, EITHER# EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR# PURPOSE. THE REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE# LIABLE FOR ANY DAMAGES, INCLUDING DIRECT, SPECIAL, INDIRECT,# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM# ARISING OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN# IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF SUCH# DAMAGES.## Authors: Steven K. Reinhardt# Korey Sewellimport osimport sysimport reimport stringimport traceback# get type namesfrom types import *# Prepend the directory where the PLY lex & yacc modules are found# to the search path. Assumes we're compiling in a subdirectory# of 'build' in the current tree.sys.path[0:0] = [os.environ['M5_PLY']]from ply import lexfrom ply import yacc####################################################################### Lexer## The PLY lexer module takes two things as input:# - A list of token names (the string list 'tokens')# - A regular expression describing a match for each token. The# regexp for token FOO can be provided in two ways:# - as a string variable named t_FOO# - as the doc string for a function named t_FOO. In this case,# the function is also executed, allowing an action to be# associated with each token match.####################################################################### Reserved words. These are listed separately as they are matched# using the same regexp as generic IDs, but distinguished in the# t_ID() function. The PLY documentation suggests this approach.reserved = ( 'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT', 'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS', 'OUTPUT', 'SIGNED', 'TEMPLATE' )# List of tokens. The lex module requires this.tokens = reserved + ( # identifier 'ID', # integer literal 'INTLIT', # string literal 'STRLIT', # code literal 'CODELIT', # ( ) [ ] { } < > , ; . : :: * 'LPAREN', 'RPAREN', 'LBRACKET', 'RBRACKET', 'LBRACE', 'RBRACE', 'LESS', 'GREATER', 'EQUALS', 'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON', 'ASTERISK', # C preprocessor directives 'CPPDIRECTIVE'# The following are matched but never returned. commented out to# suppress PLY warning # newfile directive# 'NEWFILE', # endfile directive# 'ENDFILE')# Regular expressions for token matchingt_LPAREN = r'\('t_RPAREN = r'\)'t_LBRACKET = r'\['t_RBRACKET = r'\]'t_LBRACE = r'\{'t_RBRACE = r'\}'t_LESS = r'\<'t_GREATER = r'\>'t_EQUALS = r'='t_COMMA = r','t_SEMI = r';'t_DOT = r'\.'t_COLON = r':'t_DBLCOLON = r'::'t_ASTERISK = r'\*'# Identifiers and reserved wordsreserved_map = { }for r in reserved: reserved_map[r.lower()] = rdef t_ID(t): r'[A-Za-z_]\w*' t.type = reserved_map.get(t.value,'ID') return t# Integer literaldef t_INTLIT(t): r'(0x[\da-fA-F]+)|\d+' try: t.value = int(t.value,0) except ValueError: error(t.lexer.lineno, 'Integer value "%s" too large' % t.value) t.value = 0 return t# String literal. Note that these use only single quotes, and# can span multiple lines.def t_STRLIT(t): r"(?m)'([^'])+'" # strip off quotes t.value = t.value[1:-1] t.lexer.lineno += t.value.count('\n') return t# "Code literal"... like a string literal, but delimiters are# '{{' and '}}' so they get formatted nicely under emacs c-modedef t_CODELIT(t): r"(?m)\{\{([^\}]|}(?!\}))+\}\}" # strip off {{ & }} t.value = t.value[2:-2] t.lexer.lineno += t.value.count('\n') return tdef t_CPPDIRECTIVE(t): r'^\#[^\#].*\n' t.lexer.lineno += t.value.count('\n') return tdef t_NEWFILE(t): r'^\#\#newfile\s+"[\w/.-]*"' fileNameStack.push((t.value[11:-1], t.lexer.lineno)) t.lexer.lineno = 0def t_ENDFILE(t): r'^\#\#endfile' (old_filename, t.lexer.lineno) = fileNameStack.pop()## The functions t_NEWLINE, t_ignore, and t_error are# special for the lex module.## Newlinesdef t_NEWLINE(t): r'\n+' t.lexer.lineno += t.value.count('\n')# Commentsdef t_comment(t): r'//.*'# Completely ignored characterst_ignore = ' \t\x0c'# Error handlerdef t_error(t): error(t.lexer.lineno, "illegal character '%s'" % t.value[0]) t.skip(1)# Build the lexerlexer = lex.lex()####################################################################### Parser## Every function whose name starts with 'p_' defines a grammar rule.# The rule is encoded in the function's doc string, while the# function body provides the action taken when the rule is matched.# The argument to each function is a list of the values of the# rule's symbols: t[0] for the LHS, and t[1..n] for the symbols# on the RHS. For tokens, the value is copied from the t.value# attribute provided by the lexer. For non-terminals, the value# is assigned by the producing rule; i.e., the job of the grammar# rule function is to set the value for the non-terminal on the LHS# (by assigning to t[0]).###################################################################### The LHS of the first grammar rule is used as the start symbol# (in this case, 'specification'). Note that this rule enforces# that there will be exactly one namespace declaration, with 0 or more# global defs/decls before and after it. The defs & decls before# the namespace decl will be outside the namespace; those after# will be inside. The decoder function is always inside the namespace.def p_specification(t): 'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block' global_code = t[1] isa_name = t[2] namespace = isa_name + "Inst" # wrap the decode block as a function definition t[4].wrap_decode_block('''StaticInstPtr%(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst){ using namespace %(namespace)s;''' % vars(), '}') # both the latter output blocks and the decode block are in the namespace namespace_code = t[3] + t[4] # pass it all back to the caller of yacc.parse() t[0] = (isa_name, namespace, global_code, namespace_code)# ISA name declaration looks like "namespace <foo>;"def p_name_decl(t): 'name_decl : NAMESPACE ID SEMI' t[0] = t[2]# 'opt_defs_and_outputs' is a possibly empty sequence of# def and/or output statements.def p_opt_defs_and_outputs_0(t): 'opt_defs_and_outputs : empty' t[0] = GenCode()def p_opt_defs_and_outputs_1(t): 'opt_defs_and_outputs : defs_and_outputs' t[0] = t[1]def p_defs_and_outputs_0(t): 'defs_and_outputs : def_or_output' t[0] = t[1]def p_defs_and_outputs_1(t): 'defs_and_outputs : defs_and_outputs def_or_output' t[0] = t[1] + t[2]# The list of possible definition/output statements.def p_def_or_output(t): '''def_or_output : def_format | def_bitfield | def_bitfield_struct | def_template | def_operand_types | def_operands | output_header | output_decoder | output_exec | global_let''' t[0] = t[1]# Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied# directly to the appropriate output section.# Protect any non-dict-substitution '%'s in a format string# (i.e. those not followed by '(')def protect_non_subst_percents(s): return re.sub(r'%(?!\()', '%%', s)# Massage output block by substituting in template definitions and bit# operators. We handle '%'s embedded in the string that don't# indicate template substitutions (or CPU-specific symbols, which get# handled in GenCode) by doubling them first so that the format# operation will reduce them back to single '%'s.def process_output(s): s = protect_non_subst_percents(s) # protects cpu-specific symbols too s = protect_cpu_symbols(s) return substBitOps(s % templateMap)def p_output_header(t): 'output_header : OUTPUT HEADER CODELIT SEMI' t[0] = GenCode(header_output = process_output(t[3]))def p_output_decoder(t): 'output_decoder : OUTPUT DECODER CODELIT SEMI' t[0] = GenCode(decoder_output = process_output(t[3]))def p_output_exec(t): 'output_exec : OUTPUT EXEC CODELIT SEMI' t[0] = GenCode(exec_output = process_output(t[3]))# global let blocks 'let {{...}}' (Python code blocks) are executed# directly when seen. Note that these execute in a special variable# context 'exportContext' to prevent the code from polluting this# script's namespace.def p_global_let(t): 'global_let : LET CODELIT SEMI' updateExportContext() exportContext["header_output"] = '' exportContext["decoder_output"] = '' exportContext["exec_output"] = '' exportContext["decode_block"] = '' try: exec fixPythonIndentation(t[2]) in exportContext except Exception, exc: error(t.lexer.lineno, 'error: %s in global let block "%s".' % (exc, t[2])) t[0] = GenCode(header_output = exportContext["header_output"], decoder_output = exportContext["decoder_output"], exec_output = exportContext["exec_output"], decode_block = exportContext["decode_block"])# Define the mapping from operand type extensions to C++ types and bit# widths (stored in operandTypeMap).def p_def_operand_types(t): 'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI' try: userDict = eval('{' + t[3] + '}') except Exception, exc: error(t.lexer.lineno, 'error: %s in def operand_types block "%s".' % (exc, t[3])) buildOperandTypeMap(userDict, t.lexer.lineno) t[0] = GenCode() # contributes nothing to the output C++ file# Define the mapping from operand names to operand classes and other# traits. Stored in operandNameMap.def p_def_operands(t): 'def_operands : DEF OPERANDS CODELIT SEMI' if not globals().has_key('operandTypeMap'): error(t.lexer.lineno, 'error: operand types must be defined before operands') try: userDict = eval('{' + t[3] + '}') except Exception, exc: error(t.lexer.lineno, 'error: %s in def operands block "%s".' % (exc, t[3])) buildOperandNameMap(userDict, t.lexer.lineno) t[0] = GenCode() # contributes nothing to the output C++ file# A bitfield definition looks like:# 'def [signed] bitfield <ID> [<first>:<last>]'# This generates a preprocessor macro in the output file.def p_def_bitfield_0(t): 'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI' expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8]) if (t[2] == 'signed'): expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr) hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) t[0] = GenCode(header_output = hash_define)# alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'def p_def_bitfield_1(t): 'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI' expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6]) if (t[2] == 'signed'): expr = 'sext<%d>(%s)' % (1, expr) hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) t[0] = GenCode(header_output = hash_define)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?