📄 lex.py
字号:
#-----------------------------------------------------------------------------# ply: lex.py## Author: David M. Beazley (dave@dabeaz.com)## Copyright (C) 2001-2007, David M. Beazley## This library is free software; you can redistribute it and/or# modify it under the terms of the GNU Lesser General Public# License as published by the Free Software Foundation; either# version 2.1 of the License, or (at your option) any later version.## This library is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU# Lesser General Public License for more details.## You should have received a copy of the GNU Lesser General Public# License along with this library; if not, write to the Free Software# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA## See the file COPYING for a complete copy of the LGPL.#-----------------------------------------------------------------------------__version__ = "2.3"import re, sys, types# Regular expression used to match valid token names_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')# Available instance types. This is used when lexers are defined by a class.# It's a little funky because I want to preserve backwards compatibility# with Python 2.0 where types.ObjectType is undefined.try: _INSTANCETYPE = (types.InstanceType, types.ObjectType)except AttributeError: _INSTANCETYPE = types.InstanceType class object: pass # Note: needed if no new-style classes present# Exception thrown when invalid token encountered and no default error# handler is defined.class LexError(Exception): def __init__(self,message,s): self.args = (message,) self.text = s# Token classclass LexToken(object): def __str__(self): return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) def __repr__(self): return str(self) def skip(self,n): self.lexer.skip(n)# -----------------------------------------------------------------------------# Lexer class## This class encapsulates all of the methods and data associated with a lexer.## input() - Store a new string in the lexer# token() - Get the next token# -----------------------------------------------------------------------------class Lexer: def __init__(self): self.lexre = None # Master regular expression. This is a list of # tuples (re,findex) where re is a compiled # regular expression and findex is a list # mapping regex group numbers to rules self.lexretext = None # Current regular expression strings self.lexstatere = {} # Dictionary mapping lexer states to master regexs self.lexstateretext = {} # Dictionary mapping lexer states to regex strings self.lexstate = "INITIAL" # Current lexer state self.lexstatestack = [] # Stack of lexer states self.lexstateinfo = None # State information self.lexstateignore = {} # Dictionary of ignored characters for each state self.lexstateerrorf = {} # Dictionary of error functions for each state self.lexreflags = 0 # Optional re compile flags self.lexdata = None # Actual input data (as a string) self.lexpos = 0 # Current position in input text self.lexlen = 0 # Length of the input text self.lexerrorf = None # Error rule (if any) self.lextokens = None # List of valid tokens self.lexignore = "" # Ignored characters self.lexliterals = "" # Literal characters that can be passed through self.lexmodule = None # Module self.lineno = 1 # Current line number self.lexdebug = 0 # Debugging mode self.lexoptimize = 0 # Optimized mode def clone(self,object=None): c = Lexer() c.lexstatere = self.lexstatere c.lexstateinfo = self.lexstateinfo c.lexstateretext = self.lexstateretext c.lexstate = self.lexstate c.lexstatestack = self.lexstatestack c.lexstateignore = self.lexstateignore c.lexstateerrorf = self.lexstateerrorf c.lexreflags = self.lexreflags c.lexdata = self.lexdata c.lexpos = self.lexpos c.lexlen = self.lexlen c.lextokens = self.lextokens c.lexdebug = self.lexdebug c.lineno = self.lineno c.lexoptimize = self.lexoptimize c.lexliterals = self.lexliterals c.lexmodule = self.lexmodule # If the object parameter has been supplied, it means we are attaching the # lexer to a new object. In this case, we have to rebind all methods in # the lexstatere and lexstateerrorf tables. if object: newtab = { } for key, ritem in self.lexstatere.items(): newre = [] for cre, findex in ritem: newfindex = [] for f in findex: if not f or not f[0]: newfindex.append(f) continue newfindex.append((getattr(object,f[0].__name__),f[1])) newre.append((cre,newfindex)) newtab[key] = newre c.lexstatere = newtab c.lexstateerrorf = { } for key, ef in self.lexstateerrorf.items(): c.lexstateerrorf[key] = getattr(object,ef.__name__) c.lexmodule = object # Set up other attributes c.begin(c.lexstate) return c # ------------------------------------------------------------ # writetab() - Write lexer information to a table file # ------------------------------------------------------------ def writetab(self,tabfile): tf = open(tabfile+".py","w") tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) tf.write("_lextokens = %s\n" % repr(self.lextokens)) tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) tabre = { } for key, lre in self.lexstatere.items(): titem = [] for i in range(len(lre)): titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1]))) tabre[key] = titem tf.write("_lexstatere = %s\n" % repr(tabre)) tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) taberr = { } for key, ef in self.lexstateerrorf.items(): if ef: taberr[key] = ef.__name__ else: taberr[key] = None tf.write("_lexstateerrorf = %s\n" % repr(taberr)) tf.close() # ------------------------------------------------------------ # readtab() - Read lexer information from a tab file # ------------------------------------------------------------ def readtab(self,tabfile,fdict): exec "import %s as lextab" % tabfile self.lextokens = lextab._lextokens self.lexreflags = lextab._lexreflags self.lexliterals = lextab._lexliterals self.lexstateinfo = lextab._lexstateinfo self.lexstateignore = lextab._lexstateignore self.lexstatere = { } self.lexstateretext = { } for key,lre in lextab._lexstatere.items(): titem = [] txtitem = [] for i in range(len(lre)): titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict))) txtitem.append(lre[i][0]) self.lexstatere[key] = titem self.lexstateretext[key] = txtitem self.lexstateerrorf = { } for key,ef in lextab._lexstateerrorf.items(): self.lexstateerrorf[key] = fdict[ef] self.begin('INITIAL') # ------------------------------------------------------------ # input() - Push a new string into the lexer # ------------------------------------------------------------ def input(self,s): if not (isinstance(s,types.StringType) or isinstance(s,types.UnicodeType)): raise ValueError, "Expected a string" self.lexdata = s self.lexpos = 0 self.lexlen = len(s) # ------------------------------------------------------------ # begin() - Changes the lexing state # ------------------------------------------------------------ def begin(self,state): if not self.lexstatere.has_key(state): raise ValueError, "Undefined state" self.lexre = self.lexstatere[state] self.lexretext = self.lexstateretext[state] self.lexignore = self.lexstateignore.get(state,"") self.lexerrorf = self.lexstateerrorf.get(state,None) self.lexstate = state # ------------------------------------------------------------ # push_state() - Changes the lexing state and saves old on stack # ------------------------------------------------------------ def push_state(self,state): self.lexstatestack.append(self.lexstate) self.begin(state) # ------------------------------------------------------------ # pop_state() - Restores the previous state # ------------------------------------------------------------ def pop_state(self): self.begin(self.lexstatestack.pop()) # ------------------------------------------------------------ # current_state() - Returns the current lexing state # ------------------------------------------------------------ def current_state(self): return self.lexstate # ------------------------------------------------------------ # skip() - Skip ahead n characters # ------------------------------------------------------------ def skip(self,n): self.lexpos += n # ------------------------------------------------------------ # token() - Return the next token from the Lexer # # Note: This function has been carefully implemented to be as fast # as possible. Don't make changes unless you really know what # you are doing # ------------------------------------------------------------ def token(self): # Make local copies of frequently referenced attributes lexpos = self.lexpos lexlen = self.lexlen lexignore = self.lexignore lexdata = self.lexdata while lexpos < lexlen: # This code provides some short-circuit code for whitespace, tabs, and other ignored characters if lexdata[lexpos] in lexignore: lexpos += 1 continue # Look for a regular expression match for lexre,lexindexfunc in self.lexre: m = lexre.match(lexdata,lexpos) if not m: continue # Set last match in lexer so that rules can access it if they want self.lexmatch = m # Create a token for return tok = LexToken() tok.value = m.group() tok.lineno = self.lineno tok.lexpos = lexpos tok.lexer = self lexpos = m.end() i = m.lastindex func,tok.type = lexindexfunc[i] self.lexpos = lexpos if not func: # If no token type was set, it's an ignored token if tok.type: return tok break # if func not callable, it means it's an ignored token if not callable(func): break
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -