📄 magic.py
字号:
# Found on a russian zope mailing list, and modified to fix bugs in parsing# the magic file and string making# -- Daniel Berlin <dberlin@dberlin.org>import sys, struct, time, re, exceptions, pprint, stat, os, pwd, grp_mew = 0# _magic='/tmp/magic'# _magic='/usr/share/magic.mime'_magic='/usr/share/magic.mime'mime = 1_ldate_adjust = lambda x: time.mktime( time.gmtime(x) )BUFFER_SIZE = 1024 * 128 # 128K should be enough...class MagicError(exceptions.Exception): passdef _handle(fmt='@x',adj=None): return fmt, struct.calcsize(fmt), adjKnownTypes = { # 'byte':_handle('@b'), 'byte':_handle('@B'), 'ubyte':_handle('@B'), 'string':('s',0,None), 'pstring':_handle('p'),# 'short':_handle('@h'),# 'beshort':_handle('>h'),# 'leshort':_handle('<h'), 'short':_handle('@H'), 'beshort':_handle('>H'), 'leshort':_handle('<H'), 'ushort':_handle('@H'), 'ubeshort':_handle('>H'), 'uleshort':_handle('<H'), 'long':_handle('@l'), 'belong':_handle('>l'), 'lelong':_handle('<l'), 'ulong':_handle('@L'), 'ubelong':_handle('>L'), 'ulelong':_handle('<L'), 'date':_handle('=l'), 'bedate':_handle('>l'), 'ledate':_handle('<l'), 'ldate':_handle('=l',_ldate_adjust), 'beldate':_handle('>l',_ldate_adjust), 'leldate':_handle('<l',_ldate_adjust),}_mew_cnt = 0def mew(x): global _mew_cnt if _mew : if x=='.' : _mew_cnt += 1 if _mew_cnt % 64 == 0 : sys.stderr.write( '\n' ) sys.stderr.write( '.' ) else: sys.stderr.write( '\b'+x )def has_format(s): n = 0 l = None for c in s : if c == '%' : if l == '%' : n -= 1 else : n += 1 l = c return ndef read_asciiz(file,size=None,pos=None): s = [] if pos : mew('s') file.seek( pos, 0 ) mew('z') if size is not None : s = [file.read( size ).split('\0')[0]] else: while 1 : c = file.read(1) if (not c) or (ord(c)==0) or (c=='\n') : break s.append (c) mew('Z') return ''.join(s)def a2i(v,base=0): if v[-1:] in 'lL' : v = v[:-1] return int( v, base )_cmap = { '\\' : '\\', '0' : '\0',}for c in range(ord('a'),ord('z')+1) : try : e = eval('"\\%c"' % chr(c)) except ValueError : pass else : _cmap[chr(c)] = eelse: del c del edef make_string(s): return eval( '"'+s.replace('"','\\"')+'"')class MagicTestError(MagicError): passclass MagicTest: def __init__(self,offset,mtype,test,message,line=None,level=None): self.line, self.level = line, level self.mtype = mtype self.mtest = test self.subtests = [] self.mask = None self.smod = None self.nmod = None self.offset, self.type, self.test, self.message = \ offset,mtype,test,message if self.mtype == 'true' : return # XXX hack to enable level skips if test[-1:]=='\\' and test[-2:]!='\\\\' : self.test += 'n' # looks like someone wanted EOL to match? if mtype[:6]=='string' : if '/' in mtype : # for strings self.type, self.smod = \ mtype[:mtype.find('/')], mtype[mtype.find('/')+1:] else: for nm in '&+-' : if nm in mtype : # for integer-based self.nmod, self.type, self.mask = ( nm, mtype[:mtype.find(nm)], # convert mask to int, autodetect base int( mtype[mtype.find(nm)+1:], 0 ) ) break self.struct, self.size, self.cast = KnownTypes[ self.type ] def __str__(self): return '%s %s %s %s' % ( self.offset, self.mtype, self.mtest, self.message ) def __repr__(self): return 'MagicTest(%s,%s,%s,%s,line=%s,level=%s,subtests=\n%s%s)' % ( `self.offset`, `self.mtype`, `self.mtest`, `self.message`, `self.line`, `self.level`, '\t'*self.level, pprint.pformat(self.subtests) ) def run(self,file): result = '' do_close = 0 try: if type(file) == type('x') : file = open( file, 'r', BUFFER_SIZE ) do_close = 1# else:# saved_pos = file.tell() if self.mtype != 'true' : data = self.read(file) last = file.tell() else: data = last = None if self.check( data ) : result = self.message+' ' if has_format( result ) : result %= data for test in self.subtests : m = test.run(file) if m is not None : result += m return make_string( result ) finally: if do_close : file.close()# else:# file.seek( saved_pos, 0 ) def get_mod_and_value(self): if self.type[-6:] == 'string' : # "something like\tthis\n" if self.test[0] in '=<>' : mod, value = self.test[0], make_string( self.test[1:] ) else: mod, value = '=', make_string( self.test ) else: if self.test[0] in '=<>&^' : mod, value = self.test[0], a2i(self.test[1:]) elif self.test[0] == 'x': mod = self.test[0] value = 0 else: mod, value = '=', a2i(self.test) return mod, value def read(self,file): mew( 's' ) file.seek( self.offset(file), 0 ) # SEEK_SET mew( 'r' ) try: data = rdata = None # XXX self.size might be 0 here... if self.size == 0 : # this is an ASCIIZ string... size = None if self.test != '>\\0' : # magic's hack for string read... value = self.get_mod_and_value()[1] size = (value=='\0') and None or len(value) rdata = data = read_asciiz( file, size=size ) else: rdata = file.read( self.size ) if not rdata or (len(rdata)!=self.size) : return None data = struct.unpack( self.struct, rdata )[0] # XXX hack?? except: print >>sys.stderr, self print >>sys.stderr, '@%s struct=%s size=%d rdata=%s' % ( self.offset, `self.struct`, self.size,`rdata`) raise mew( 'R' ) if self.cast : data = self.cast( data ) if self.mask : try: if self.nmod == '&' : data &= self.mask elif self.nmod == '+' : data += self.mask elif self.nmod == '-' : data -= self.mask else: raise MagicTestError(self.nmod) except: print >>sys.stderr,'data=%s nmod=%s mask=%s' % ( `data`, `self.nmod`, `self.mask` ) raise return data def check(self,data): mew('.') if self.mtype == 'true' : return '' # not None ! mod, value = self.get_mod_and_value() if self.type[-6:] == 'string' : # "something like\tthis\n" if self.smod : xdata = data if 'b' in self.smod : # all blanks are optional xdata = ''.join( data.split() ) value = ''.join( value.split() ) if 'c' in self.smod : # all blanks are optional xdata = xdata.upper() value = value.upper() # if 'B' in self.smod : # compact blanks ### XXX sorry, i don't understand this :-( # data = ' '.join( data.split() ) # if ' ' not in data : return None else: xdata = data try: if mod == '=' : result = data == value elif mod == '<' : result = data < value elif mod == '>' : result = data > value elif mod == '&' : result = data & value elif mod == '^' : result = (data & (~value)) == 0 elif mod == 'x' : result = 1 else : raise MagicTestError(self.test) if result : zdata, zval = `data`, `value` if self.mtype[-6:]!='string' : try: zdata, zval = hex(data), hex(value) except: zdata, zval = `data`, `value` if 0 : print >>sys.stderr, '%s @%s %s:%s %s %s => %s (%s)' % ( '>'*self.level, self.offset, zdata, self.mtype, `mod`, zval, `result`, self.message ) return result except: print >>sys.stderr,'mtype=%s data=%s mod=%s value=%s' % ( `self.mtype`, `data`, `mod`, `value` ) raise def add(self,mt): if not isinstance(mt,MagicTest) : raise MagicTestError((mt,'incorrect subtest type %s'%(type(mt),))) if mt.level == self.level+1 : self.subtests.append( mt ) elif self.subtests : self.subtests[-1].add( mt ) elif mt.level > self.level+1 : # it's possible to get level 3 just after level 1 !!! :-( level = self.level + 1 while level < mt.level : xmt = MagicTest(None,'true','x','',line=self.line,level=level) self.add( xmt ) level += 1 else: self.add( mt ) # retry... else: raise MagicTestError((mt,'incorrect subtest level %s'%(`mt.level`,))) def last_test(self): return self.subtests[-1]#end class MagicTestclass OffsetError(MagicError): passclass Offset: pos_format = {'b':'<B','B':'>B','s':'<H','S':'>H','l':'<I','L':'>I',} pattern0 = re.compile(r''' # mere offset ^ &? # possible ampersand ( 0 # just zero | [1-9]{1,1}[0-9]* # decimal | 0[0-7]+ # octal | 0x[0-9a-f]+ # hex ) $ ''', re.X|re.I ) pattern1 = re.compile(r''' # indirect offset ^\( (?P<base>&?0 # just zero |&?[1-9]{1,1}[0-9]* # decimal |&?0[0-7]* # octal |&?0x[0-9A-F]+ # hex ) (?P<type> \. # this dot might be alone [BSL]? # one of this chars in either case )? (?P<sign> [-+]{0,1} )? (?P<off>0 # just zero |[1-9]{1,1}[0-9]* # decimal |0[0-7]* # octal |0x[0-9a-f]+ # hex )? \)$''', re.X|re.I ) def __init__(self,s): self.source = s self.value = None self.relative = 0 self.base = self.type = self.sign = self.offs = None m = Offset.pattern0.match( s ) if m : # just a number if s[0] == '&' : self.relative, self.value = 1, int( s[1:], 0 ) else: self.value = int( s, 0 ) return m = Offset.pattern1.match( s ) if m : # real indirect offset try: self.base = m.group('base') if self.base[0] == '&' : self.relative, self.base = 1, int( self.base[1:], 0 ) else: self.base = int( self.base, 0 ) if m.group('type') : self.type = m.group('type')[1:] self.sign = m.group('sign') if m.group('off') : self.offs = int( m.group('off'), 0 ) if self.sign == '-' : self.offs = 0 - self.offs except:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -