📄 pop3graph.py
字号:
#!/usr/bin/env python"""Analyse the pop3proxy's caches and produce a graph of how accurateclassifier has been over time. Only really meaningful if you startedwith an empty database."""from __future__ import divisionimport sysimport getoptfrom spambayes import mboxutilsfrom spambayes.FileCorpus import FileCorpus, FileMessageFactory, GzipFileMessageFactoryfrom spambayes.Options import optionsdef usage(): print __doc__def main(argv): opts, args = getopt.getopt(argv, "h", ["help"]) for opt, arg in opts: if opt in ("-h", "--help"): usage() return # Create the corpuses and the factory that reads the messages. if options["pop3proxy", "cache_use_gzip"]: messageFactory = GzipFileMessageFactory() else: messageFactory = FileMessageFactory() sc = get_pathname_option("Storage", "spam_cache") hc = get_pathname_option("Storage", "ham_cache") spamCorpus = FileCorpus(messageFactory, sc) hamCorpus = FileCorpus(messageFactory, hc) # Read in all the trained messages. allTrained = {} for corpus, disposition in [(spamCorpus, 'Yes'), (hamCorpus, 'No')]: for m in corpus: message = mboxutils.get_message(m.getSubstance()) message._pop3CacheDisposition = disposition allTrained[m.key()] = message # Sort the messages into the order they arrived, then work out a scaling # factor for the graph - 'limit' is the widest it can be in characters. keys = allTrained.keys() keys.sort() limit = 70 if len(keys) < limit: scale = 1 else: scale = len(keys) // (limit//2) # Build the data - an array of cumulative success indexed by count. count = successful = 0 successByCount = [] for key in keys: message = allTrained[key] disposition = message[options["Headers", "classification_header_name"]] if (message._pop3CacheDisposition == disposition): successful += 1 count += 1 if count % scale == (scale-1): successByCount.append(successful // scale) # Build the graph, as a list of rows of characters. size = count // scale graph = [[" " for i in range(size+3)] for j in range(size)] for c in range(size): graph[c][1] = "|" graph[c][c+3] = "." graph[successByCount[c]][c+3] = "*" graph.reverse() # Print the graph. print "\n Success of the classifier over time:\n" print " . - Number of messages over time" print " * - Number of correctly classified messages over time\n\n" for row in range(size): line = ''.join(graph[row]) if row == 0: print line + " %d" % count elif row == (count - successful) // scale: print line + " %d" % successful else: print line print " " + "_" * (size+2)if __name__ == '__main__': main(sys.argv[1:])
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -