📄 addin.py

📁 用python实现的邮件过滤器
💻 PY
📖 第 1 页 / 共 5 页
字号:
            print "The timer is NOT enabled..."            print "*" * 50            use_timer = False        if use_timer:            # The user wants to use a timer - see if we should only enable            # the timer for known 'inbox' folders, or for all watched folders.            is_inbox = self.target.IsReceiveFolder()            if not is_inbox and self.manager.config.filter.timer_only_receive_folders:                use_timer = False        # Don't allow insane values for the timer.        if use_timer:            too = None            if not isinstance(start_delay, types.FloatType) or \               not isinstance(interval, types.FloatType):                print "*" * 50                print "Timer values are garbage!", repr(start_delay), repr(interval)                use_timer = False            elif start_delay < 0.4 or interval < 0.4:                too = "too often"            elif start_delay > 60 or interval > 60:                too = "too infrequently"            if too:                print "*" * 50                print "The timer is configured to fire way " + too + \                  " (delay=%s seconds, interval=%s seconds)" \                  % (start_delay, interval)                print "Please adjust your configuration.  The timer is NOT enabled..."                print "*" * 50                use_timer = False        self.use_timer = use_timer        self.timer_id = None    def ReInit(self):        # We may have swapped between timer and non-timer.        if self.use_timer:            self._KillTimer()        self.Init(self, self.target, self.application, self.manager)    def Close(self, *args):        self._KillTimer()        _BaseItemsEvent.Close(self, *args)    def _DoStartTimer(self, delay):        assert thread.get_ident() == self.owner_thread_ident        assert self.timer_id is None, "Shouldn't start a timer when already have one"        assert isinstance(delay, types.FloatType), "Timer values are float seconds"        # And start a new timer.        assert delay, "No delay means no timer!"        delay = int(delay*1000) # convert to ms.        self.timer_id = timer.set_timer(delay, self._TimerFunc)        self.manager.LogDebug(1, "New message timer started - id=%d, delay=%d" % (self.timer_id, delay))    def _StartTimer(self):        # First kill any existing timer        self._KillTimer()        # And start a new timer.        delay = self.manager.config.filter.timer_start_delay        field_name = self.manager.config.general.field_score_name        self.timer_generator = self.target.GetNewUnscoredMessageGenerator(field_name)        self._DoStartTimer(delay)    def _KillTimer(self):        assert thread.get_ident() == self.owner_thread_ident        if self.timer_id is not None:            timer.kill_timer(self.timer_id)            self.manager.LogDebug(2, "The timer with id=%d was stopped" % self.timer_id)            self.timer_id = None    def _TimerFunc(self, event, time):        # Kill the timer first        assert thread.get_ident() == self.owner_thread_ident        self.manager.LogDebug(1, "The timer with id=%s fired" % self.timer_id)        self._KillTimer()        assert self.timer_generator, "Can't have a timer with no generator"        # Callback from Outlook - locale may have changed.        locale.setlocale(locale.LC_NUMERIC, "C") # see locale comments above        # Find a single to item process        # If we did manage to process one, start a new timer.        # If we didn't, we are done and can wait until some external        # event triggers a new timer.        try:            # Zoom over items I have already seen.  This is so when the spam            # score it not saved, we do not continually look at the same old            # unread messages (assuming they have been trained) before getting            # to the new ones.            # If the Spam score *is* saved, the generator should only return            # ones that HaveSeen() returns False for, so therefore isn't a hit.            while 1:                item = self.timer_generator.next()                try:                    if not HaveSeenMessage(item, self.manager):                        break                except self.manager.message_store.NotFoundException:                    # ignore messages move underneath us                    self.manager.LogDebug(1, "The new message is skipping a message that moved underneath us")        except StopIteration:            # No items left in our generator            self.timer_generator = None            self.manager.LogDebug(1, "The new message timer found no new items, so is stopping")        else:            # We have an item to process - do it.            try:                ProcessMessage(item, self.manager)            finally:                # And setup the timer for the next check.                delay = self.manager.config.filter.timer_interval                self._DoStartTimer(delay)    def OnItemAdd(self, item):        # Callback from Outlook - locale may have changed.        locale.setlocale(locale.LC_NUMERIC, "C") # see locale comments above        self.manager.LogDebug(2, "OnItemAdd event for folder", self,                              "with item", item.Subject.encode("mbcs", "ignore"))        # Due to the way our "missed message" indicator works, we do        # a quick check here for "UnRead".  If UnRead, we assume it is very        # new and use our timer.  If not unread, we know our missed message        # generator would miss it, so we process it synchronously.        if not self.use_timer or not item.UnRead:            ms = self.manager.message_store            msgstore_message = ms.GetMessage(item)            ProcessMessage(msgstore_message, self.manager)        else:            self._StartTimer()# Event fired when item moved into the Spam folder.class SpamFolderItemsEvent(_BaseItemsEvent):    def OnItemAdd(self, item):        # Not sure what the best heuristics are here - for        # now, we assume that if the calculated spam prob        # was *not* certain-spam, or it is in the ham corpa,        # then it should be trained as such.        self.manager.LogDebug(2, "OnItemAdd event for SPAM folder", self,                              "with item", item.Subject.encode("mbcs", "ignore"))        assert(not self.manager.config.training.train_manual_spam,               "The folder shouldn't be hooked if this is False")        # XXX - Theoretically we could get "not found" exception here,        # but we have never guarded for it, and never seen it.  If it does        # happen life will go on, so for now we continue to ignore it.        msgstore_message = self.manager.message_store.GetMessage(item)        if not msgstore_message.IsFilterCandidate():            self.manager.LogDebug(1, "Not training message '%s' - we don't filter ones like that!")            return        if HaveSeenMessage(msgstore_message, self.manager):            # If the message has ever been previously trained as ham, then            # we *must* train as spam (well, we must untrain, but re-training            # makes sense.            # If we haven't been trained, but the spam score on the message            # if not inside our spam threshold, then we also train as spam            # (hopefully moving closer towards the spam threshold.)            # Assuming that rescoring is more expensive than checking if            # previously trained, try and optimize.            import train            self.manager.classifier_data.message_db.load_msg(msgstore_message)            if train.been_trained_as_ham(msgstore_message):                need_train = True            else:                prop = msgstore_message.GetField(self.manager.config.general.field_score_name)                # We may not have been able to save the score - re-score now                if prop is None:                    prop = self.manager.score(msgstore_message)                need_train = self.manager.config.filter.spam_threshold > prop * 100            if need_train:                TrainAsSpam(msgstore_message, self.manager)# Event function fired from the "Show Clues" UI items.def ShowClues(mgr, explorer):    from cgi import escape    app = explorer.Application    msgstore_message = explorer.GetSelectedMessages(False)    if msgstore_message is None:        return    mgr.classifier_data.message_db.load_msg(msgstore_message)    item = msgstore_message.GetOutlookItem()    score, clues = mgr.score(msgstore_message, evidence=True)    new_msg = app.CreateItem(0)    # NOTE: Silly Outlook always switches the message editor back to RTF    # once the Body property has been set.  Thus, there is no reasonable    # way to get this as text only.  Next best then is to use HTML, 'cos at    # least we know how to exploit it!    body = ["<h2>Combined Score: %d%% (%g)</h2>\n" %            (round(score*100), score)]    push = body.append    # Format internal scores.    push("Internal ham score (<tt>%s</tt>): %g<br>\n" % clues.pop(0))    push("Internal spam score (<tt>%s</tt>): %g<br>\n" % clues.pop(0))    # Format the # ham and spam trained on.    c = mgr.GetClassifier()    push("<br>\n")    push("# ham trained on: %d<br>\n" % c.nham)    push("# spam trained on: %d<br>\n" % c.nspam)    push("<br>\n")    # Report last modified date.    modified_date = msgstore_message.date_modified    if modified_date:        from time import localtime, strftime        modified_date = localtime(modified_date)        date_string = strftime("%a, %d %b %Y %I:%M:%S %p", modified_date)        push("As at %s:<br>\n" % (date_string,))    else:        push("The last time this message was classified or trained:<br>\n")    # Score when the message was classified - this will hopefully help    # people realise that it may not necessarily be the same, and will    # help diagnosing any 'wrong' scoring reported.    original_score = msgstore_message.GetField(\        mgr.config.general.field_score_name)    if original_score is not None:        original_score *= 100.0        if original_score >= mgr.config.filter.spam_threshold:            original_class = "spam"        elif original_score >= mgr.config.filter.unsure_threshold:            original_class = "unsure"        else:            original_class = "good"    if original_score is None:        push("This message had not been filtered.")    else:        original_score = round(original_score)        push("This message was classified as %s (it scored %d%%)." % \             (original_class, original_score))    # Report whether this message has been trained or not.    push("<br>\n")    push("This message had %sbeen trained%s." % \         {False : ("", " as ham"), True : ("", " as spam"),          None : ("not ", "")}[msgstore_message.t])    # Format the clues.    push("<h2>%s Significant Tokens</h2>\n<PRE>" % len(clues))    push("<strong>")    push("token                               spamprob         #ham  #spam\n")    push("</strong>")    format = " %-12g %8s %6s\n"    fetchword = c.wordinfo.get    for word, prob in clues:        record = fetchword(word)        if record:            nham = record.hamcount            nspam = record.spamcount        else:            nham = nspam = "-"        if isinstance(word, UnicodeType):            word = word.encode('mbcs', 'replace')        else:            word = repr(word)        push(escape(word) + " " * (35-len(word)))        push(format % (prob, nham, nspam))    push("</PRE>\n")    # Now the raw text of the message, as best we can    push("<h2>Message Stream</h2>\n")    push("<PRE>\n")    msg = msgstore_message.GetEmailPackageObject(strip_mime_headers=False)    push(escape(msg.as_string(), True))    push("</PRE>\n")    # Show all the tokens in the message    from spambayes.tokenizer import tokenize    from spambayes.classifier import Set # whatever classifier uses    push("<h2>All Message Tokens</h2>\n")    # need to re-fetch, as the tokens we see may be different based on    # header stripping.    toks = Set(tokenize(        msgstore_message.GetEmailPackageObject(strip_mime_headers=True)))    # create a sorted list    toks = list(toks)    toks.sort()    push("%d unique tokens<br><br>" % len(toks))    # Use <code> instead of <pre>, as <pre> is not word-wrapped by IE    # However, <code> does not require escaping.    # could use pprint, but not worth it.    for token in toks:        if isinstance(token, UnicodeType):
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -