📄 lex.py
字号:
# If token is processed by a function, call it newtok = func(tok) # Every function must return a token, if nothing, we just move to next token if not newtok: lexpos = self.lexpos # This is here in case user has updated lexpos. break # Verify type of the token. If not in the token map, raise an error if not self.lexoptimize: if not self.lextokens.has_key(newtok.type): raise LexError, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( func.func_code.co_filename, func.func_code.co_firstlineno, func.__name__, newtok.type),lexdata[lexpos:]) return newtok else: # No match, see if in literals if lexdata[lexpos] in self.lexliterals: tok = LexToken() tok.value = lexdata[lexpos] tok.lineno = self.lineno tok.lexer = self tok.type = tok.value tok.lexpos = lexpos self.lexpos = lexpos + 1 return tok # No match. Call t_error() if defined. if self.lexerrorf: tok = LexToken() tok.value = self.lexdata[lexpos:] tok.lineno = self.lineno tok.type = "error" tok.lexer = self tok.lexpos = lexpos self.lexpos = lexpos newtok = self.lexerrorf(tok) if lexpos == self.lexpos: # Error method didn't change text position at all. This is an error. raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) lexpos = self.lexpos if not newtok: continue return newtok self.lexpos = lexpos raise LexError, ("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) self.lexpos = lexpos + 1 if self.lexdata is None: raise RuntimeError, "No input string given with input()" return None# -----------------------------------------------------------------------------# _validate_file()## This checks to see if there are duplicated t_rulename() functions or strings# in the parser input file. This is done using a simple regular expression# match on each line in the filename.# -----------------------------------------------------------------------------def _validate_file(filename): import os.path base,ext = os.path.splitext(filename) if ext != '.py': return 1 # No idea what the file is. Return OK try: f = open(filename) lines = f.readlines() f.close() except IOError: return 1 # Oh well fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') counthash = { } linen = 1 noerror = 1 for l in lines: m = fre.match(l) if not m: m = sre.match(l) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: print >>sys.stderr, "%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev) noerror = 0 linen += 1 return noerror# -----------------------------------------------------------------------------# _funcs_to_names()## Given a list of regular expression functions, this converts it to a list# suitable for output to a table file# -----------------------------------------------------------------------------def _funcs_to_names(funclist): result = [] for f in funclist: if f and f[0]: result.append((f[0].__name__,f[1])) else: result.append(f) return result# -----------------------------------------------------------------------------# _names_to_funcs()## Given a list of regular expression function names, this converts it back to# functions.# -----------------------------------------------------------------------------def _names_to_funcs(namelist,fdict): result = [] for n in namelist: if n and n[0]: result.append((fdict[n[0]],n[1])) else: result.append(n) return result# -----------------------------------------------------------------------------# _form_master_re()## This function takes a list of all of the regex components and attempts to# form the master regular expression. Given limitations in the Python re# module, it may be necessary to break the master regex into separate expressions.# -----------------------------------------------------------------------------def _form_master_re(relist,reflags,ldict,toknames): if not relist: return [] regex = "|".join(relist) try: lexre = re.compile(regex,re.VERBOSE | reflags) # Build the index to function map for the matching engine lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) for f,i in lexre.groupindex.items(): handle = ldict.get(f,None) if type(handle) in (types.FunctionType, types.MethodType): lexindexfunc[i] = (handle,toknames[handle.__name__]) elif handle is not None: # If rule was specified as a string, we build an anonymous # callback function to carry out the action if f.find("ignore_") > 0: lexindexfunc[i] = (None,None) else: lexindexfunc[i] = (None, toknames[f]) return [(lexre,lexindexfunc)],[regex] except Exception,e: m = int(len(relist)/2) if m == 0: m = 1 llist, lre = _form_master_re(relist[:m],reflags,ldict,toknames) rlist, rre = _form_master_re(relist[m:],reflags,ldict,toknames) return llist+rlist, lre+rre# -----------------------------------------------------------------------------# def _statetoken(s,names)## Given a declaration name s of the form "t_" and a dictionary whose keys are# state names, this function returns a tuple (states,tokenname) where states# is a tuple of state names and tokenname is the name of the token. For example,# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')# -----------------------------------------------------------------------------def _statetoken(s,names): nonstate = 1 parts = s.split("_") for i in range(1,len(parts)): if not names.has_key(parts[i]) and parts[i] != 'ANY': break if i > 1: states = tuple(parts[1:i]) else: states = ('INITIAL',) if 'ANY' in states: states = tuple(names.keys()) tokenname = "_".join(parts[i:]) return (states,tokenname)# -----------------------------------------------------------------------------# lex(module)## Build all of the regular expression rules from definitions in the supplied module# -----------------------------------------------------------------------------def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0): global lexer ldict = None stateinfo = { 'INITIAL' : 'inclusive'} error = 0 files = { } lexobj = Lexer() lexobj.lexdebug = debug lexobj.lexoptimize = optimize global token,input if nowarn: warn = 0 else: warn = 1 if object: module = object if module: # User supplied a module object. if isinstance(module, types.ModuleType): ldict = module.__dict__ elif isinstance(module, _INSTANCETYPE): _items = [(k,getattr(module,k)) for k in dir(module)] ldict = { } for (i,v) in _items: ldict[i] = v else: raise ValueError,"Expected a module or instance" lexobj.lexmodule = module else: # No module given. We might be able to get information from the caller. try: raise RuntimeError except RuntimeError: e,b,t = sys.exc_info() f = t.tb_frame f = f.f_back # Walk out to our calling function ldict = f.f_globals # Grab its globals dictionary if optimize and lextab: try: lexobj.readtab(lextab,ldict) token = lexobj.token input = lexobj.input lexer = lexobj return lexobj except ImportError: pass # Get the tokens, states, and literals variables (if any) if (module and isinstance(module,_INSTANCETYPE)): tokens = getattr(module,"tokens",None) states = getattr(module,"states",None) literals = getattr(module,"literals","") else: tokens = ldict.get("tokens",None) states = ldict.get("states",None) literals = ldict.get("literals","") if not tokens: raise SyntaxError,"lex: module does not define 'tokens'" if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): raise SyntaxError,"lex: tokens must be a list or tuple." # Build a dictionary of valid token names lexobj.lextokens = { } if not optimize: for n in tokens: if not _is_identifier.match(n): print >>sys.stderr, "lex: Bad token name '%s'" % n error = 1 if warn and lexobj.lextokens.has_key(n): print >>sys.stderr, "lex: Warning. Token '%s' multiply defined." % n lexobj.lextokens[n] = None else: for n in tokens: lexobj.lextokens[n] = None if debug: print "lex: tokens = '%s'" % lexobj.lextokens.keys() try: for c in literals: if not (isinstance(c,types.StringType) or isinstance(c,types.UnicodeType)) or len(c) > 1: print >>sys.stderr, "lex: Invalid literal %s. Must be a single character" % repr(c) error = 1 continue except TypeError: print >>sys.stderr, "lex: Invalid literals specification. literals must be a sequence of characters." error = 1 lexobj.lexliterals = literals # Build statemap if states: if not (isinstance(states,types.TupleType) or isinstance(states,types.ListType)): print >>sys.stderr, "lex: states must be defined as a tuple or list."
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -