📄 addin.py
字号:
print "The timer is NOT enabled..." print "*" * 50 use_timer = False if use_timer: # The user wants to use a timer - see if we should only enable # the timer for known 'inbox' folders, or for all watched folders. is_inbox = self.target.IsReceiveFolder() if not is_inbox and self.manager.config.filter.timer_only_receive_folders: use_timer = False # Don't allow insane values for the timer. if use_timer: too = None if not isinstance(start_delay, types.FloatType) or \ not isinstance(interval, types.FloatType): print "*" * 50 print "Timer values are garbage!", repr(start_delay), repr(interval) use_timer = False elif start_delay < 0.4 or interval < 0.4: too = "too often" elif start_delay > 60 or interval > 60: too = "too infrequently" if too: print "*" * 50 print "The timer is configured to fire way " + too + \ " (delay=%s seconds, interval=%s seconds)" \ % (start_delay, interval) print "Please adjust your configuration. The timer is NOT enabled..." print "*" * 50 use_timer = False self.use_timer = use_timer self.timer_id = None def ReInit(self): # We may have swapped between timer and non-timer. if self.use_timer: self._KillTimer() self.Init(self, self.target, self.application, self.manager) def Close(self, *args): self._KillTimer() _BaseItemsEvent.Close(self, *args) def _DoStartTimer(self, delay): assert thread.get_ident() == self.owner_thread_ident assert self.timer_id is None, "Shouldn't start a timer when already have one" assert isinstance(delay, types.FloatType), "Timer values are float seconds" # And start a new timer. assert delay, "No delay means no timer!" delay = int(delay*1000) # convert to ms. self.timer_id = timer.set_timer(delay, self._TimerFunc) self.manager.LogDebug(1, "New message timer started - id=%d, delay=%d" % (self.timer_id, delay)) def _StartTimer(self): # First kill any existing timer self._KillTimer() # And start a new timer. delay = self.manager.config.filter.timer_start_delay field_name = self.manager.config.general.field_score_name self.timer_generator = self.target.GetNewUnscoredMessageGenerator(field_name) self._DoStartTimer(delay) def _KillTimer(self): assert thread.get_ident() == self.owner_thread_ident if self.timer_id is not None: timer.kill_timer(self.timer_id) self.manager.LogDebug(2, "The timer with id=%d was stopped" % self.timer_id) self.timer_id = None def _TimerFunc(self, event, time): # Kill the timer first assert thread.get_ident() == self.owner_thread_ident self.manager.LogDebug(1, "The timer with id=%s fired" % self.timer_id) self._KillTimer() assert self.timer_generator, "Can't have a timer with no generator" # Callback from Outlook - locale may have changed. locale.setlocale(locale.LC_NUMERIC, "C") # see locale comments above # Find a single to item process # If we did manage to process one, start a new timer. # If we didn't, we are done and can wait until some external # event triggers a new timer. try: # Zoom over items I have already seen. This is so when the spam # score it not saved, we do not continually look at the same old # unread messages (assuming they have been trained) before getting # to the new ones. # If the Spam score *is* saved, the generator should only return # ones that HaveSeen() returns False for, so therefore isn't a hit. while 1: item = self.timer_generator.next() try: if not HaveSeenMessage(item, self.manager): break except self.manager.message_store.NotFoundException: # ignore messages move underneath us self.manager.LogDebug(1, "The new message is skipping a message that moved underneath us") except StopIteration: # No items left in our generator self.timer_generator = None self.manager.LogDebug(1, "The new message timer found no new items, so is stopping") else: # We have an item to process - do it. try: ProcessMessage(item, self.manager) finally: # And setup the timer for the next check. delay = self.manager.config.filter.timer_interval self._DoStartTimer(delay) def OnItemAdd(self, item): # Callback from Outlook - locale may have changed. locale.setlocale(locale.LC_NUMERIC, "C") # see locale comments above self.manager.LogDebug(2, "OnItemAdd event for folder", self, "with item", item.Subject.encode("mbcs", "ignore")) # Due to the way our "missed message" indicator works, we do # a quick check here for "UnRead". If UnRead, we assume it is very # new and use our timer. If not unread, we know our missed message # generator would miss it, so we process it synchronously. if not self.use_timer or not item.UnRead: ms = self.manager.message_store msgstore_message = ms.GetMessage(item) ProcessMessage(msgstore_message, self.manager) else: self._StartTimer()# Event fired when item moved into the Spam folder.class SpamFolderItemsEvent(_BaseItemsEvent): def OnItemAdd(self, item): # Not sure what the best heuristics are here - for # now, we assume that if the calculated spam prob # was *not* certain-spam, or it is in the ham corpa, # then it should be trained as such. self.manager.LogDebug(2, "OnItemAdd event for SPAM folder", self, "with item", item.Subject.encode("mbcs", "ignore")) assert(not self.manager.config.training.train_manual_spam, "The folder shouldn't be hooked if this is False") # XXX - Theoretically we could get "not found" exception here, # but we have never guarded for it, and never seen it. If it does # happen life will go on, so for now we continue to ignore it. msgstore_message = self.manager.message_store.GetMessage(item) if not msgstore_message.IsFilterCandidate(): self.manager.LogDebug(1, "Not training message '%s' - we don't filter ones like that!") return if HaveSeenMessage(msgstore_message, self.manager): # If the message has ever been previously trained as ham, then # we *must* train as spam (well, we must untrain, but re-training # makes sense. # If we haven't been trained, but the spam score on the message # if not inside our spam threshold, then we also train as spam # (hopefully moving closer towards the spam threshold.) # Assuming that rescoring is more expensive than checking if # previously trained, try and optimize. import train self.manager.classifier_data.message_db.load_msg(msgstore_message) if train.been_trained_as_ham(msgstore_message): need_train = True else: prop = msgstore_message.GetField(self.manager.config.general.field_score_name) # We may not have been able to save the score - re-score now if prop is None: prop = self.manager.score(msgstore_message) need_train = self.manager.config.filter.spam_threshold > prop * 100 if need_train: TrainAsSpam(msgstore_message, self.manager)# Event function fired from the "Show Clues" UI items.def ShowClues(mgr, explorer): from cgi import escape app = explorer.Application msgstore_message = explorer.GetSelectedMessages(False) if msgstore_message is None: return mgr.classifier_data.message_db.load_msg(msgstore_message) item = msgstore_message.GetOutlookItem() score, clues = mgr.score(msgstore_message, evidence=True) new_msg = app.CreateItem(0) # NOTE: Silly Outlook always switches the message editor back to RTF # once the Body property has been set. Thus, there is no reasonable # way to get this as text only. Next best then is to use HTML, 'cos at # least we know how to exploit it! body = ["<h2>Combined Score: %d%% (%g)</h2>\n" % (round(score*100), score)] push = body.append # Format internal scores. push("Internal ham score (<tt>%s</tt>): %g<br>\n" % clues.pop(0)) push("Internal spam score (<tt>%s</tt>): %g<br>\n" % clues.pop(0)) # Format the # ham and spam trained on. c = mgr.GetClassifier() push("<br>\n") push("# ham trained on: %d<br>\n" % c.nham) push("# spam trained on: %d<br>\n" % c.nspam) push("<br>\n") # Report last modified date. modified_date = msgstore_message.date_modified if modified_date: from time import localtime, strftime modified_date = localtime(modified_date) date_string = strftime("%a, %d %b %Y %I:%M:%S %p", modified_date) push("As at %s:<br>\n" % (date_string,)) else: push("The last time this message was classified or trained:<br>\n") # Score when the message was classified - this will hopefully help # people realise that it may not necessarily be the same, and will # help diagnosing any 'wrong' scoring reported. original_score = msgstore_message.GetField(\ mgr.config.general.field_score_name) if original_score is not None: original_score *= 100.0 if original_score >= mgr.config.filter.spam_threshold: original_class = "spam" elif original_score >= mgr.config.filter.unsure_threshold: original_class = "unsure" else: original_class = "good" if original_score is None: push("This message had not been filtered.") else: original_score = round(original_score) push("This message was classified as %s (it scored %d%%)." % \ (original_class, original_score)) # Report whether this message has been trained or not. push("<br>\n") push("This message had %sbeen trained%s." % \ {False : ("", " as ham"), True : ("", " as spam"), None : ("not ", "")}[msgstore_message.t]) # Format the clues. push("<h2>%s Significant Tokens</h2>\n<PRE>" % len(clues)) push("<strong>") push("token spamprob #ham #spam\n") push("</strong>") format = " %-12g %8s %6s\n" fetchword = c.wordinfo.get for word, prob in clues: record = fetchword(word) if record: nham = record.hamcount nspam = record.spamcount else: nham = nspam = "-" if isinstance(word, UnicodeType): word = word.encode('mbcs', 'replace') else: word = repr(word) push(escape(word) + " " * (35-len(word))) push(format % (prob, nham, nspam)) push("</PRE>\n") # Now the raw text of the message, as best we can push("<h2>Message Stream</h2>\n") push("<PRE>\n") msg = msgstore_message.GetEmailPackageObject(strip_mime_headers=False) push(escape(msg.as_string(), True)) push("</PRE>\n") # Show all the tokens in the message from spambayes.tokenizer import tokenize from spambayes.classifier import Set # whatever classifier uses push("<h2>All Message Tokens</h2>\n") # need to re-fetch, as the tokens we see may be different based on # header stripping. toks = Set(tokenize( msgstore_message.GetEmailPackageObject(strip_mime_headers=True))) # create a sorted list toks = list(toks) toks.sort() push("%d unique tokens<br><br>" % len(toks)) # Use <code> instead of <pre>, as <pre> is not word-wrapped by IE # However, <code> does not require escaping. # could use pprint, but not worth it. for token in toks: if isinstance(token, UnicodeType):
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -