📄 mkreversemap.py
字号:
#!/usr/bin/env python"""Create mapping from features to message idsusage %(prog)s [ options ] mailbox ...-d mapfile - identify file which will hold mapping information (required)-t ham|spam - identify the type of messages in the input mailbox(es)-h - print this documentation and exitOne of '-t ham' or '-t spam' must be given, as must one or more messagesources."""import sysimport getoptimport anydbmimport cPickle as picklefrom spambayes.mboxutils import getmboxfrom spambayes.tokenizer import tokenizefrom spambayes.Options import optionsfrom spambayes.classifier import Classifierprog = sys.argv[0]def usage(msg=None): if msg is not None: print >> sys.stderr, msg print >> sys.stderr, __doc__.strip() % globals()def mapmessages(f, mboxtype, mapdb): i = 0 for msg in getmbox(f): i += 1 sys.stdout.write('\r%s: %d' % (f, i)) sys.stdout.flush() msgid = msg.get("message-id") if msgid is None: continue for t in tokenize(msg): ham, spam = mapdb.get(t, ({}, {})) if mboxtype == "ham": msgids = ham.get(f, set()) msgids.add(msgid) ham[f] = msgids else: msgids = spam.get(f, set()) msgids.add(msgid) spam[f] = msgids mapdb[t] = (ham, spam) if options["Classifier", "x-use_bigrams"]: for t in Classifier()._enhance_wordstream(tokenize(msg)): ham, spam = mapdb.get(t, ({}, {})) if mboxtype == "ham": msgids = ham.get(f, set()) msgids.add(msgid) ham[f] = msgids else: msgids = spam.get(f, set()) msgids.add(msgid) spam[f] = msgids mapdb[t] = (ham, spam) sys.stdout.write("\n")def main(args): try: opts, args = getopt.getopt(args, "hd:t:", ["type=", "help", "database="]) except getopt.GetoptError, msg: usage(msg) return 1 mapfile = None mboxtype = None for opt, arg in opts: if opt in ("-h", "--help"): usage() return 0 elif opt in ("-d", "--database"): mapfile = arg elif opt in ("-t", "--type"): mboxtype = arg if mapfile is None: usage("'-d mapfile' is required") return 1 if mboxtype is None: usage("'-t ham|spam' is required") return 1 if mboxtype not in ("ham", "spam"): usage("mboxtype must be 'ham' or 'spam'") return 1 try: mapd = pickle.load(file(mapfile)) except IOError: mapd = {} for f in args: mapmessages(f, mboxtype, mapd) pickle.dump(mapd, file(mapfile, "w"))if __name__ == "__main__": sys.exit(main(sys.argv[1:]))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -