⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 profile.py

📁 用python实现的邮件过滤器
💻 PY
字号:
"""Spam/ham profile for a single VM user."""import ZODBfrom ZODB.PersistentList import PersistentListfrom Persistence import Persistentfrom BTrees.OOBTree import OOBTreefrom spambayes import classifierfrom spambayes.tokenizer import tokenizefrom pspam.folder import Folderfrom spambayes.Options import optionsimport ostry:    True, Falseexcept NameError:    # Maintain compatibility with Python 2.2    True, False = 1, 0def open_folders(dir, names, klass):    L = []    for name in names:        path = os.path.join(dir, name)        L.append(klass(path))    return Limport time_start = Nonedef log(s):    global _start    if _start is None:        _start = time.time()    print round(time.time() - _start, 2), sclass IterOOBTree(OOBTree):    def iteritems(self):        return self.items()class WordInfo(Persistent):    def __init__(self):        self.spamcount = self.hamcount = 0    def __repr__(self):        return "WordInfo(%r, %r)" % (self.spamcount, self.hamcount)##class PMetaInfo(classifier.MetaInfo, Persistent):##    passclass PMetaInfo(Persistent):    passclass PBayes(classifier.Bayes, Persistent):    WordInfoClass = WordInfo    def __init__(self):        classifier.Bayes.__init__(self)        self.wordinfo = IterOOBTree()        self.meta = PMetaInfo()    # XXX what about the getstate and setstate defined in base classclass Profile(Persistent):    FolderClass = Folder    def __init__(self, folder_dir):        self._dir = folder_dir        self.classifier = PBayes()        self.hams = PersistentList()        self.spams = PersistentList()    def add_ham(self, folder):        p = os.path.join(self._dir, folder)        f = self.FolderClass(p)        self.hams.append(f)    def add_spam(self, folder):        p = os.path.join(self._dir, folder)        f = self.FolderClass(p)        self.spams.append(f)    def update(self):        """Update classifier from current folder contents."""        changed1 = self._update(self.hams, False)        changed2 = self._update(self.spams, True)##        if changed1 or changed2:##            self.classifier.update_probabilities()        get_transaction().commit()        log("updated probabilities")    def _update(self, folders, is_spam):        changed = False        for f in folders:            log("update from %s" % f.path)            added, removed = f.read()            if added:                log("added %d" % len(added))            if removed:                log("removed %d" % len(removed))            get_transaction().commit()            if not (added or removed):                continue            changed = True            # It's important not to commit a transaction until            # after update_probabilities is called in update().            # Otherwise some new entries will cause scoring to fail.            for msg in added.keys():                self.classifier.learn(tokenize(msg), is_spam)            del added            get_transaction().commit(1)            log("learned")            for msg in removed.keys():                self.classifier.unlearn(tokenize(msg), is_spam)            if removed:                log("unlearned")            del removed            get_transaction().commit(1)        return changed

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -