showclues.py

来自「用python实现的邮件过滤器」· Python 代码 · 共 172 行

172 行

#!/usr/bin/env python"""Usage: showclues.py [options] [filenames]Options can one or more of:    -h        show usage and exit    -d DBFILE        use database in DBFILE    -p PICKLEFILE        use pickle (instead of database) in PICKLEFILE    -m        markup output with HTML    -o section:option:value        set [section, option] in the options database to valueIf no filenames are given on the command line, standard input will beprocessed as a single message.  If one or more filenames are given on thecommand line, each will be processed according to the following rules:    * If the filename is '-', standard input will be processed as a single      message (may only be usefully given once).    * If the filename starts with '+' it will be processed as an MH folder.    * If the filename is a directory and it contains a subdirectory named      'cur', it will be processed as a Maildir.    * If the filename is a directory and it contains a subdirectory named      'Mail', it will be processed as an MH Mailbox.    * If the filename is a directory and not a Maildir nor an MH Mailbox, it      will be processed as a Mailbox directory consisting of just .txt and      .lorien files.    * Otherwise, the filename is treated as a Unix-style mailbox (messages      begin on a line starting with 'From ')."""# This module is part of the spambayes project, which is Copyright 2002-5# The Python Software Foundation and is covered by the Python Software# Foundation license.__author__ = "Tony Meyer <ta-meyer@ihug.co.nz>"__credits__ = "All the Spambayes folk."try:    True, Falseexcept NameError:    # Maintain compatibility with Python 2.2    True, False = 1, 0import cgiimport sysimport getoptfrom spambayes import storagefrom spambayes import mboxutilsfrom spambayes.classifier import Setfrom spambayes.Options import optionsfrom spambayes.tokenizer import tokenizedef ShowClues(bayes, msg, as_html=False):    if as_html:        heading = "<h2>", "</h2>"        tt = "<tt>", "</tt>"        br = "<br />"        pre = "<pre>", "</pre>"        strong = "<strong>", "</strong>"        escape = cgi.escape        code = "<code>", "</code>"        wrapper = "<html>\n<head>\n<style>\n\n    h2 {color: green}\n" \                  "</stytle>\n</head>\n<body>", "</body></html>"    else:        heading = '*' * 74 + "\n", "\n" + '*' * 74        tt = "", ""        br = ""        pre = "", ""        strong = "", ""        escape = lambda a:a        code = "", ""        wrapper = "", ""    tokens = list(tokenize(msg))    toks = list(Set(tokens))    toks.sort()    score, clues = bayes.spamprob(iter(tokens), evidence=True)    body = ["%sCombined Score: %d%% (%g)%s\n" %            (heading[0], round(score*100), score, heading[1])]    push = body.append    # Format internal scores.    word, score = clues.pop(0)    push("Internal ham score (%s%s%s): %g%s\n" %         (tt[0], word, tt[1], score, br))    word, score = clues.pop(0)    push("Internal spam score (%s%s%s): %g%s\n" %         (tt[0], word, tt[1], score, br))    # Format the # ham and spam trained on.    push(br)    push("\n")    push("# ham trained on: %d%s\n" % (bayes.nham, br))    push("# spam trained on: %d%s\n" % (bayes.nspam, br))    push(br)    push("\n")    # Format the clues.    push("%s%s Significant Tokens%s\n%s" %         (heading[0], len(clues), heading[1], pre[0]))    push(strong[0])    push("token                               spamprob         #ham  #spam\n")    push(strong[1])    push("\n")    format = " %-12g %8s %6s\n"    fetchword = bayes.wordinfo.get    for word, prob in clues:        record = fetchword(word)        if record:            nham = record.hamcount            nspam = record.spamcount        else:            nham = nspam = "-"        word = repr(word)        push(escape(word) + " " * (35-len(word)))        push(format % (prob, nham, nspam))    push(pre[1])    push("\n")    # Now the raw text of the message    push("%sMessage Stream%s\n%s\n" % (heading[0], heading[1], pre[0]))    push(escape(msg.as_string()))    push(pre[1])    push("\n")    # Show all the tokens in the message    push("%sAll Message Tokens%s\n" % (heading[0], heading[1]))    push("%d unique tokens%s%s" % (len(toks), br, br))    # Use <code> instead of <pre>, as <pre> is not word-wrapped by IE    # However, <code> does not require escaping.    # could use pprint, but not worth it.    for token in toks:        push("%s%s%s%s\n" % (code[0], repr(token), code[1], br))    # Put the body together with the rest of the message.    body = "%s%s%s" % (wrapper[0], ''.join(body), wrapper[1])    return bodyif __name__ == "__main__":    opts, args = getopt.getopt(sys.argv[1:], 'hmd:p:o:',                               ['help', 'option=', 'markup'])    markup = False    for opt, arg in opts:        if opt in ('-m', '--markup'):            markup = True        elif opt in ('-h', '--help'):            print __doc__            sys.exit()        elif opt in ('-o', '--option'):            options.set_from_cmdline(arg, sys.stderr)    dbname, usedb = storage.database_type(opts)    bayes = storage.open_storage(dbname, usedb)    bayes.load()    if not args:        args = ["-"]    for fname in args:        mbox = mboxutils.getmbox(fname)        for msg in mbox:            print ShowClues(bayes, msg, markup)

showclues.py - 源码说明

本页面展示了「用python实现的邮件过滤器」中的 showclues.py 源码文件，采用 Python 编程语言编写，共 172 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与python相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?