⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cmp.py

📁 用python实现的邮件过滤器
💻 PY
字号:
#!/usr/bin/env python"""cmp.py sbase1 sbase2Combines output from sbase1.txt and sbase2.txt, which are created byrates.py from timtest.py output, and displays comparison statistics tostdout."""import sysf1n, f2n = sys.argv[1:3]# Return#  (list of all f-p rates,#   list of all f-n rates,#   total f-p,#   total f-n,#   average f-p rate,#   average f-n rate,#   list of all ham score deviations,#   list of all spam score deviations,#   ham score deviation for all runs,#   spam score deviations for all runs,# )# from summary file f.def suck(f):    fns = []    fps = []    hamdev = []    spamdev = []    hamdevall = spamdevall = (0.0, 0.0)    get = f.readline    while 1:        line = get()        if line.startswith('-> <stat> tested'):            print line,        if line.find(' items; mean ') != -1:            # -> <stat> Ham distribution for this pair: 1000 items; mean 0.05; sample sdev 0.68            # and later "sample " went away            vals = line.split(';')            mean = float(vals[1].split()[-1])            sdev = float(vals[2].split()[-1])            val = (mean, sdev)            typ = vals[0].split()[2]            if line.find('for all runs') != -1:                if typ == 'Ham':                    hamdevall = val                else:                    spamdevall = val            elif line.find('all in this') != -1:                if typ == 'Ham':                    hamdev.append(val)                else:                    spamdev.append(val)            continue        if line.startswith('-> '):            continue        if line.startswith('total'):            break        # A line with an f-p rate and an f-n rate.        p, n = map(float, line.split())        fps.append(p)        fns.append(n)    # "total unique false pos 0"    # "total unique false neg 0"    # "average fp % 0.0"    # "average fn % 0.0"    fptot = int(line.split()[-1])    fntot = int(get().split()[-1])    fpmean = float(get().split()[-1])    fnmean = float(get().split()[-1])    return (fps, fns, fptot, fntot, fpmean, fnmean,            hamdev, spamdev, hamdevall, spamdevall)def tag(p1, p2):    if p1 == p2:        t = "tied          "    else:        t = p1 < p2 and "lost " or "won  "        if p1:            p = (p2 - p1) * 100.0 / p1            t += " %+7.2f%%" % p        else:            t += " +(was 0)"    return tdef mtag(m1, m2):    mean1, dev1 = m1    mean2, dev2 = m2    t = "%7.2f %7.2f " % (mean1, mean2)    if mean1:        mp = (mean2 - mean1) * 100.0 / mean1        t += "%+7.2f%%" % mp    else:        t += "+(was 0)"    t += "     %7.2f %7.2f " % (dev1, dev2)    if dev1:        dp = (dev2 - dev1) * 100.0 / dev1        t += "%+7.2f%%" % dp    else:        t += "+(was 0)"    return tdef dump(p1s, p2s):    alltags = ""    for p1, p2 in zip(p1s, p2s):        t = tag(p1, p2)        print "    %5.3f  %5.3f  %s" % (p1, p2, t)        alltags += t + " "    print    for t in "won", "tied", "lost":        print "%-4s %2d times" % (t, alltags.count(t))    printdef dumpdev(meandev1, meandev2):    for m1, m2 in zip(meandev1, meandev2):        print mtag(m1, m2)def windowsfy(fn):    import os    if os.path.exists(fn + '.txt'):        return fn + '.txt'    else:        return fnprint f1n, '->', f2nf1n = windowsfy(f1n)f2n = windowsfy(f2n)(fp1, fn1, fptot1, fntot1, fpmean1, fnmean1, hamdev1, spamdev1, hamdevall1, spamdevall1) = suck(file(f1n))(fp2, fn2, fptot2, fntot2, fpmean2, fnmean2, hamdev2, spamdev2, hamdevall2, spamdevall2) = suck(file(f2n))printprint "false positive percentages"dump(fp1, fp2)print "total unique fp went from", fptot1, "to", fptot2, tag(fptot1, fptot2)print "mean fp % went from", fpmean1, "to", fpmean2, tag(fpmean1, fpmean2)printprint "false negative percentages"dump(fn1, fn2)print "total unique fn went from", fntot1, "to", fntot2, tag(fntot1, fntot2)print "mean fn % went from", fnmean1, "to", fnmean2, tag(fnmean1, fnmean2)printif len(hamdev1) == len(hamdev2) and len(spamdev1) == len(spamdev2):    print "ham mean                     ham sdev"    dumpdev(hamdev1, hamdev2)    print    print "ham mean and sdev for all runs"    dumpdev([hamdevall1], [hamdevall2])    print    print "spam mean                    spam sdev"    dumpdev(spamdev1, spamdev2)    print    print "spam mean and sdev for all runs"    dumpdev([spamdevall1], [spamdevall2])    print    diff1 = spamdevall1[0] - hamdevall1[0]    diff2 = spamdevall2[0] - hamdevall2[0]    print "ham/spam mean difference: %2.2f %2.2f %+2.2f" % (diff1,                                                            diff2,                                                            diff2 - diff1)else:    print "[info about ham & spam means & sdevs not available in both files]"

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -