📄 cmp.py
字号:
#!/usr/bin/env python"""cmp.py sbase1 sbase2Combines output from sbase1.txt and sbase2.txt, which are created byrates.py from timtest.py output, and displays comparison statistics tostdout."""import sysf1n, f2n = sys.argv[1:3]# Return# (list of all f-p rates,# list of all f-n rates,# total f-p,# total f-n,# average f-p rate,# average f-n rate,# list of all ham score deviations,# list of all spam score deviations,# ham score deviation for all runs,# spam score deviations for all runs,# )# from summary file f.def suck(f): fns = [] fps = [] hamdev = [] spamdev = [] hamdevall = spamdevall = (0.0, 0.0) get = f.readline while 1: line = get() if line.startswith('-> <stat> tested'): print line, if line.find(' items; mean ') != -1: # -> <stat> Ham distribution for this pair: 1000 items; mean 0.05; sample sdev 0.68 # and later "sample " went away vals = line.split(';') mean = float(vals[1].split()[-1]) sdev = float(vals[2].split()[-1]) val = (mean, sdev) typ = vals[0].split()[2] if line.find('for all runs') != -1: if typ == 'Ham': hamdevall = val else: spamdevall = val elif line.find('all in this') != -1: if typ == 'Ham': hamdev.append(val) else: spamdev.append(val) continue if line.startswith('-> '): continue if line.startswith('total'): break # A line with an f-p rate and an f-n rate. p, n = map(float, line.split()) fps.append(p) fns.append(n) # "total unique false pos 0" # "total unique false neg 0" # "average fp % 0.0" # "average fn % 0.0" fptot = int(line.split()[-1]) fntot = int(get().split()[-1]) fpmean = float(get().split()[-1]) fnmean = float(get().split()[-1]) return (fps, fns, fptot, fntot, fpmean, fnmean, hamdev, spamdev, hamdevall, spamdevall)def tag(p1, p2): if p1 == p2: t = "tied " else: t = p1 < p2 and "lost " or "won " if p1: p = (p2 - p1) * 100.0 / p1 t += " %+7.2f%%" % p else: t += " +(was 0)" return tdef mtag(m1, m2): mean1, dev1 = m1 mean2, dev2 = m2 t = "%7.2f %7.2f " % (mean1, mean2) if mean1: mp = (mean2 - mean1) * 100.0 / mean1 t += "%+7.2f%%" % mp else: t += "+(was 0)" t += " %7.2f %7.2f " % (dev1, dev2) if dev1: dp = (dev2 - dev1) * 100.0 / dev1 t += "%+7.2f%%" % dp else: t += "+(was 0)" return tdef dump(p1s, p2s): alltags = "" for p1, p2 in zip(p1s, p2s): t = tag(p1, p2) print " %5.3f %5.3f %s" % (p1, p2, t) alltags += t + " " print for t in "won", "tied", "lost": print "%-4s %2d times" % (t, alltags.count(t)) printdef dumpdev(meandev1, meandev2): for m1, m2 in zip(meandev1, meandev2): print mtag(m1, m2)def windowsfy(fn): import os if os.path.exists(fn + '.txt'): return fn + '.txt' else: return fnprint f1n, '->', f2nf1n = windowsfy(f1n)f2n = windowsfy(f2n)(fp1, fn1, fptot1, fntot1, fpmean1, fnmean1, hamdev1, spamdev1, hamdevall1, spamdevall1) = suck(file(f1n))(fp2, fn2, fptot2, fntot2, fpmean2, fnmean2, hamdev2, spamdev2, hamdevall2, spamdevall2) = suck(file(f2n))printprint "false positive percentages"dump(fp1, fp2)print "total unique fp went from", fptot1, "to", fptot2, tag(fptot1, fptot2)print "mean fp % went from", fpmean1, "to", fpmean2, tag(fpmean1, fpmean2)printprint "false negative percentages"dump(fn1, fn2)print "total unique fn went from", fntot1, "to", fntot2, tag(fntot1, fntot2)print "mean fn % went from", fnmean1, "to", fnmean2, tag(fnmean1, fnmean2)printif len(hamdev1) == len(hamdev2) and len(spamdev1) == len(spamdev2): print "ham mean ham sdev" dumpdev(hamdev1, hamdev2) print print "ham mean and sdev for all runs" dumpdev([hamdevall1], [hamdevall2]) print print "spam mean spam sdev" dumpdev(spamdev1, spamdev2) print print "spam mean and sdev for all runs" dumpdev([spamdevall1], [spamdevall2]) print diff1 = spamdevall1[0] - hamdevall1[0] diff2 = spamdevall2[0] - hamdevall2[0] print "ham/spam mean difference: %2.2f %2.2f %+2.2f" % (diff1, diff2, diff2 - diff1)else: print "[info about ham & spam means & sdevs not available in both files]"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -