⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mkreversemap.py

📁 用python实现的邮件过滤器
💻 PY
字号:
#!/usr/bin/env python"""Create mapping from features to message idsusage %(prog)s [ options ] mailbox ...-d mapfile - identify file which will hold mapping information (required)-t ham|spam - identify the type of messages in the input mailbox(es)-h - print this documentation and exitOne of '-t ham' or '-t spam' must be given, as must one or more messagesources."""import sysimport getoptimport anydbmimport cPickle as picklefrom spambayes.mboxutils import getmboxfrom spambayes.tokenizer import tokenizefrom spambayes.Options import optionsfrom spambayes.classifier import Classifierprog = sys.argv[0]def usage(msg=None):    if msg is not None:        print >> sys.stderr, msg    print >> sys.stderr, __doc__.strip() % globals()def mapmessages(f, mboxtype, mapdb):    i = 0    for msg in getmbox(f):        i += 1        sys.stdout.write('\r%s: %d' % (f, i))        sys.stdout.flush()        msgid = msg.get("message-id")        if msgid is None:            continue        for t in tokenize(msg):            ham, spam = mapdb.get(t, ({}, {}))            if mboxtype == "ham":                msgids = ham.get(f, set())                msgids.add(msgid)                ham[f] = msgids            else:                msgids = spam.get(f, set())                msgids.add(msgid)                spam[f] = msgids            mapdb[t] = (ham, spam)        if options["Classifier", "x-use_bigrams"]:            for t in Classifier()._enhance_wordstream(tokenize(msg)):                ham, spam = mapdb.get(t, ({}, {}))                if mboxtype == "ham":                    msgids = ham.get(f, set())                    msgids.add(msgid)                    ham[f] = msgids                else:                    msgids = spam.get(f, set())                    msgids.add(msgid)                    spam[f] = msgids                mapdb[t] = (ham, spam)    sys.stdout.write("\n")def main(args):    try:        opts, args = getopt.getopt(args, "hd:t:",                                   ["type=", "help", "database="])    except getopt.GetoptError, msg:        usage(msg)        return 1    mapfile = None    mboxtype = None    for opt, arg in opts:        if opt in ("-h", "--help"):            usage()            return 0        elif opt in ("-d", "--database"):            mapfile = arg        elif opt in ("-t", "--type"):            mboxtype = arg    if mapfile is None:        usage("'-d mapfile' is required")        return 1    if mboxtype is None:        usage("'-t ham|spam' is required")        return 1    if mboxtype not in ("ham", "spam"):        usage("mboxtype must be 'ham' or 'spam'")        return 1    try:        mapd = pickle.load(file(mapfile))    except IOError:        mapd = {}    for f in args:        mapmessages(f, mboxtype, mapd)    pickle.dump(mapd, file(mapfile, "w"))if __name__ == "__main__":    sys.exit(main(sys.argv[1:]))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -