📄 profile.py
字号:
"""Spam/ham profile for a single VM user."""import ZODBfrom ZODB.PersistentList import PersistentListfrom Persistence import Persistentfrom BTrees.OOBTree import OOBTreefrom spambayes import classifierfrom spambayes.tokenizer import tokenizefrom pspam.folder import Folderfrom spambayes.Options import optionsimport ostry: True, Falseexcept NameError: # Maintain compatibility with Python 2.2 True, False = 1, 0def open_folders(dir, names, klass): L = [] for name in names: path = os.path.join(dir, name) L.append(klass(path)) return Limport time_start = Nonedef log(s): global _start if _start is None: _start = time.time() print round(time.time() - _start, 2), sclass IterOOBTree(OOBTree): def iteritems(self): return self.items()class WordInfo(Persistent): def __init__(self): self.spamcount = self.hamcount = 0 def __repr__(self): return "WordInfo(%r, %r)" % (self.spamcount, self.hamcount)##class PMetaInfo(classifier.MetaInfo, Persistent):## passclass PMetaInfo(Persistent): passclass PBayes(classifier.Bayes, Persistent): WordInfoClass = WordInfo def __init__(self): classifier.Bayes.__init__(self) self.wordinfo = IterOOBTree() self.meta = PMetaInfo() # XXX what about the getstate and setstate defined in base classclass Profile(Persistent): FolderClass = Folder def __init__(self, folder_dir): self._dir = folder_dir self.classifier = PBayes() self.hams = PersistentList() self.spams = PersistentList() def add_ham(self, folder): p = os.path.join(self._dir, folder) f = self.FolderClass(p) self.hams.append(f) def add_spam(self, folder): p = os.path.join(self._dir, folder) f = self.FolderClass(p) self.spams.append(f) def update(self): """Update classifier from current folder contents.""" changed1 = self._update(self.hams, False) changed2 = self._update(self.spams, True)## if changed1 or changed2:## self.classifier.update_probabilities() get_transaction().commit() log("updated probabilities") def _update(self, folders, is_spam): changed = False for f in folders: log("update from %s" % f.path) added, removed = f.read() if added: log("added %d" % len(added)) if removed: log("removed %d" % len(removed)) get_transaction().commit() if not (added or removed): continue changed = True # It's important not to commit a transaction until # after update_probabilities is called in update(). # Otherwise some new entries will cause scoring to fail. for msg in added.keys(): self.classifier.learn(tokenize(msg), is_spam) del added get_transaction().commit(1) log("learned") for msg in removed.keys(): self.classifier.unlearn(tokenize(msg), is_spam) if removed: log("unlearned") del removed get_transaction().commit(1) return changed
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -