📄 np_filetypes.py
字号:
#! /usr/bin/env python################################################################################ ## Copyright 2005 University of Cambridge Computer Laboratory. ## ## This file is part of Nprobe. ## ## Nprobe is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## Nprobe is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Nprobe; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## ################################################################################################################################################################ Run checks on filetypes (MIME)###############################################################################from os.path import isfile, joinimport sysfrom nprobe import http_server_objtype_stringimport fmagic##############################################################################MAGIC_DEF = '/usr/share/magic'##############################################################################class FileTypes: def __init__(self, magic_file=MAGIC_DEF, objdir='', corr_by_magic=0, mlengths=[], verbose=0, report_diff_files=0): self.objdir = objdir self.corr_by_magic = corr_by_magic self.report_diff_files = report_diff_files self.verbose = verbose self.obj_checked = 0 self.obj_ok = 0 self.objdiffs = {} self.nofiles = 0 self.nofilelist = [] if not isfile(magic_file): print 'Error - magic file', magic_file, 'does not exist' sys.exit(1) self.magic = fmagic.fmagic(fmagic.MAGIC_MIME, magic_file) if not mlengths: self.check_obtype = self.check_ob self.mlens = [] else: self.check_obtype = self.check_ob_iter minb = mlengths[0] maxb = mlengths[1] inc = mlengths[2] if inc == 0: self.mlens = [minb] while minb < maxb: minb*=2 self.mlens.append(minb) else: self.mlens = range(minb, maxb + inc, inc) self.oks = [0]*(self.mlens[-1]+1) self.toks = {} ############################################################################## def get_mimetype(self, objfnm): return self.magic.magic_file(objfnm).strip().split(';')[0].split(',')[0]############################################################################## def check_ob_iter(self, obtype, fnm): objfnm = join(self.objdir, fnm) if isfile(objfnm): self.obj_checked +=1 ok = 0 type = http_server_objtype_string(obtype) f = open(objfnm, 'r') self.magic.load_buffer(self.mlens[-1], f.fileno()) for len in self.mlens: mtype = self.magic.magic_own_buffer(len).strip().split(';')[0].split(',')[0] #print fnm, 'len', len, 'type', mtype, 'claimed', type if mtype == type: ok = 1 break f.close() if ok: self.obj_ok += 1 self.oks[len] += 1 l = self.toks.setdefault(type, [0]*(self.mlens[-1]+1)) l[len] += 1 else: self.save_diff(type, mtype, fnm) else: self.nofiles += 1 if self.verbose: print 'NO FILE', fnm if self.report_diff_files: self.nofilelist.append(fnm) ############################################################################## def check_ob(self, obtype, fnm): objfnm = join(self.objdir, fnm) if isfile(objfnm): self.obj_checked +=1 type = http_server_objtype_string(obtype) mtype = self.get_mimetype(objfnm) if mtype == type: self.obj_ok += 1 if self.toks.has_key(type): self.toks[type] += 1 else: self.toks[type] = 1 else: self.save_diff(type, mtype, fnm) else: self.nofiles += 1 if self.verbose: print 'NO FILE', fnm if self.report_diff_files: self.nofilelist.append(fnm) ############################################################################## def save_diff(self, type, ft, fnm): if self.corr_by_magic: tmp = ft ft = type type = tmp s1 = 'by magic' s2 = 'claimed' else: s1 = 'claimed' s2 = 'by magic' if self.verbose: print '%10s %s %s - %s %s' % (fnm, s1, type, s2, ft) d = self.objdiffs.setdefault(type, [0, {}]) d[0] += 1 if self.report_diff_files: d2 = d[1].setdefault(ft, []) c, t = fnm.split('.') d2.append((int(c), int(t))) else: d2 = d[1].setdefault(ft, 0) d[1][ft] += 1 ############################################################################## def report_diffs(self, file=None): def writefile(s): if file: file.write(s+'\n') def report(s): print s writefile(s) def report_b(s): if file: file.write(s+'\n') else: print s def add(x, y): return x + y if not self.corr_by_magic: corr_str = 'HTTP header claimed type' s1 = 'claimed' s2 = 'magic' else: corr_str = '\'file magic\'' s2 = 'claimed' s1 = 'magic' if self.report_diff_files: difflist = [(tot, type, [(len(fl), ft, fl) for ft, fl in fts.items()]) for type, (tot, fts) in self.objdiffs.items()] else: difflist = [(tot, type, [(subtot, ft, []) for ft, subtot in fts.items()]) for type, (tot, fts) in self.objdiffs.items()] difflist.sort() print ndiffs = self.obj_checked - self.obj_ok report('HTTP claimed object types against \'magic\' identified types:-\n') report('%d Objects checked, %d (%.3f%%) differ, %d nofiles' % (self.obj_checked, ndiffs, float(ndiffs*100)/self.obj_checked, self.nofiles)) report(' (Differences correlated by %s)\n' % (corr_str)) report('%35s%15s%11s%11s%16s\n' % ((s1 + ' type').center(35), 'Non-matches', 'Matches', 'Total', '%Non-matches')) for tot, type, fts in difflist: if self.mlens: if self.toks.has_key(type): totok = reduce(add, self.toks[type]) else: totok = 0 else: totok = self.toks[type] totall = tot + totok report('%s%15d%11d%11d%12.3f' % (type.center(35), tot, totok, totall, float(tot*100)/totall)) fts.sort() id_string = s2 + ' - ' for subtot, ft, fl in fts: report(' %40s%10d' % (id_string + ft, subtot)) id_string = '' if self.report_diff_files: fl.sort() for f in fl: report_b(' %d.%d' % (f[0], f[1])) report('\n') ## if self.report_diff_files:## for subtot, ft, fl in fts:## report(' %s (%s) %d' % (ft, s2, subtot))## fl.sort()## for f in fl:## report_b(' %d.%d' % (f[0], f[1]))## else:## for subtot, ft in fts:## report(' %s (%s) %d' % (ft, s2, subtot)) report('\n') if self.report_diff_files: report_b('Nofiles:-') for f in self.nofilelist: report_b(' %s' % (f)) report('\n') if self.mlens: report('No. bytes examined for successful match (all types):-\n') report('%10s%10s%10s' % ('bytes', 'matches', '% whole')) accum = 0 for i in self.mlens: this = self.oks[i] accum += this report('%10d %10d %7.3f' % (i, this, float(accum*100)/self.obj_checked)) report_b('\n') report_b('No. bytes examined for successful match (by type):-\n') report('%30s%10s%10s' % ('Type'.center(30), 'bytes', 'matches')) gg = self.toks.items() gg.sort() for type, llist in gg: typestr = type for i in self.mlens: if llist[i]: report_b('%30s%10d%10d' % (typestr, i, llist[i])) typestr = '' report('\n\n')##############################################################################################################################################################def main(): ft = FileTypes() ############################################################################################################################################################## # Call main when run as scriptif __name__ == '__main__': main()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -