⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sb_imapfilter.py

📁 用python实现的邮件过滤器
💻 PY
📖 第 1 页 / 共 4 页
字号:
        We can't actually update the message with IMAP, so what we do is        create a new message and delete the old one."""        assert self.folder is not None,\               "Can't save a message that doesn't have a folder."        assert self.id, "Can't save a message that doesn't have an id."        assert self.imap_server, "Can't do anything without IMAP connection."        response = self.imap_server.uid("FETCH", self.uid,                                        "(FLAGS INTERNALDATE)")        command = "fetch %s (flags internaldate)" % (self.uid,)        response_data = self.imap_server.check_response(command, response)        data = self.imap_server.extract_fetch_data(response_data)        # The data will be a dictionary - hopefully with only one element,        # but maybe more than one.  The key is the message number, which we        # do not have (we use the UID instead).  So we look through the        # message and use the last data of the right type we find.        msg_time = self.extractTime()        flags = None        for msg_data in data.itervalues():            if "INTERNALDATE" in msg_data:                msg_time = msg_data["INTERNALDATE"]            if "FLAGS" in msg_data:                flags = msg_data["FLAGS"]                # The \Recent flag can be fetched, but cannot be stored                # We must remove it from the list if it is there.                flags = self.recent_re.sub("", flags)                        # We try to save with flags and time, then with just the        # time, then with the flags and the current time, then with just        # the current time.  The first should work, but the first three        # sometimes (due to the quirky IMAP server) fail.        for flgs, tme in [(flags, msg_time),                          (None, msg_time),                          (flags, Time2Internaldate(time.time())),                          (None, Time2Internaldate(time.time()))]:            try:                response = self.imap_server.append(self.folder.name, flgs, tme,                                                   self.as_string())            except BaseIMAP.error:                continue            try:                self.imap_server.check_response("", response)            except BadIMAPResponseError:                pass            else:                break        else:            command = "append %s %s %s %s" % (self.folder.name, flgs, tme,                                              self.as_string)            raise BadIMAPResponseError(command)        if self.previous_folder is None:            self.imap_server.SelectFolder(self.folder.name)        else:            self.imap_server.SelectFolder(self.previous_folder.name)            self.previous_folder = None        response = self.imap_server.uid("STORE", self.uid, "+FLAGS.SILENT",                                        "(\\Deleted \\Seen)")        command = "set %s to be deleted and seen" % (self.uid,)        self.imap_server.check_response(command, response)        # Not all IMAP servers immediately offer the new message, but        # we need to find it to get the new UID.  We need to wait until        # the server offers up an EXISTS command, so we no-op until that        # is the case.        # See [ 941596 ] sb_imapfilter.py not adding headers / moving messages        # We use the recent() function, which no-ops if necessary.  We try        # 100 times, and then give up.  If a message arrives independantly,        # and we are told about it before our message, then this could        # cause trouble, but that would be one weird server.        for i in xrange(100):            response = self.imap_server.recent()            data = self.imap_server.check_response("recent", response)            if data[0] is not None:                break        else:            raise BadIMAPResponseError("recent", "Cannot find saved message")        # We need to update the UID, as it will have changed.        # Although we don't use the UID to keep track of messages, we do        # have to use it for IMAP operations.        self.imap_server.SelectFolder(self.folder.name)        search_string = "(UNDELETED HEADER %s \"%s\")" % \                        (options["Headers", "mailid_header_name"],                         self.id.replace('\\',r'\\').replace('"',r'\"'))        response = self.imap_server.uid("SEARCH", search_string)        data = self.imap_server.check_response("search " + search_string,                                               response)        new_id = data[0]        # See [ 870799 ] imap trying to fetch invalid message UID        # It seems that although the save gave a "NO" response to the        # first save, the message was still saved (without the flags,        # probably).  This really isn't good behaviour on the server's        # part, but, as usual, we try and deal with it.  So, if we get        # more than one undeleted message with the same SpamBayes id,        # delete all of them apart from the last one, and use that.        multiple_ids = new_id.split()        for id_to_remove in multiple_ids[:-1]:            response = self.imap_server.uid("STORE", id_to_remove,                                            "+FLAGS.SILENT",                                            "(\\Deleted \\Seen)")            command = "silently delete and make seen %s" % (id_to_remove,)            self.imap_server.check_response(command, response)        if multiple_ids:            new_id = multiple_ids[-1]        else:            # Let's hope it doesn't, but, just in case, if the search            # turns up empty, we make the assumption that the new message            # is the last one with a recent flag.            response = self.imap_server.uid("SEARCH", "RECENT")            data = self.imap_server.check_response("search recent",                                                   response)            new_id = data[0]            if new_id.find(' ') > -1:                ids = new_id.split(' ')                new_id = ids[-1]            # Ok, now we're in trouble if we still haven't found it.            # We make a huge assumption that the new message is the one            # with the highest UID (they are sequential, so this will be            # ok as long as another message hasn't also arrived).            if new_id == "":                response = self.imap_server.uid("SEARCH", "ALL")                data = self.imap_server.check_response("search all",                                                       response)                new_id = data[0]                if new_id.find(' ') > -1:                    ids = new_id.split(' ')                    new_id = ids[-1]        self.uid = new_idclass IMAPFolder(object):    def __init__(self, folder_name, imap_server, stats):        self.name = folder_name        self.imap_server = imap_server        self.stats = stats        # Unique names for cached messages - see _generate_id below.        self.lastBaseMessageName = ''        self.uniquifier = 2    def __cmp__(self, obj):        """Two folders are equal if their names are equal."""        if obj is None:            return False        return cmp(self.name, obj.name)    def __iter__(self):        """Iterate through the messages in this IMAP folder."""        for key in self.keys():            yield self[key]    def keys(self):        '''Returns *uids* for all the messages in the folder not        marked as deleted.'''        self.imap_server.SelectFolder(self.name)        response = self.imap_server.uid("SEARCH", "UNDELETED")        data = self.imap_server.check_response("search undeleted", response)        if data[0]:            return data[0].split(' ')        else:            return []    custom_header_id_re = re.compile(re.escape(\        options["Headers", "mailid_header_name"]) + "\:\s*(\d+(?:\-\d)?)",                                     re.IGNORECASE)    message_id_re = re.compile("Message-ID\: ?\<([^\n\>]+)\>",                               re.IGNORECASE)    def __getitem__(self, key):        """Return message matching the given *uid*.        The messages returned have no substance (so this should be        reasonably quick, even with large messages).  You need to call        get_full_message() on the returned message to get the substance of        the message from the server."""        self.imap_server.SelectFolder(self.name)        # Using RFC822.HEADER.LINES would be better here, but it seems        # that not all servers accept it, even though it is in the RFC        response = self.imap_server.uid("FETCH", key, "RFC822.HEADER")        response_data = self.imap_server.check_response(\            "fetch %s rfc822.header" % (key,), response)        data = self.imap_server.extract_fetch_data(response_data)        # The data will be a dictionary - hopefully with only one element,        # but maybe more than one.  The key is the message number, which we        # do not have (we use the UID instead).  So we look through the        # message and use the first data of the right type we find.        headers = None        for msg_data in data.itervalues():            if "RFC822.HEADER" in msg_data:                headers = msg_data["RFC822.HEADER"]                break        if headers is None:            raise BadIMAPResponseError("FETCH response", response_data)        # Create a new IMAPMessage object, which will be the return value.        msg = IMAPMessage()        msg.folder = self        msg.uid = key        msg.imap_server = self.imap_server        # We use the MessageID header as the ID for the message, as long        # as it is available, and if not, we add our own.        # Search for our custom id first, for backwards compatibility.        for id_header_re in [self.custom_header_id_re, self.message_id_re]:            mo = id_header_re.search(headers)            if mo:                msg.setId(mo.group(1))                break        else:            msg.setId(self._generate_id())            # Unfortunately, we now have to re-save this message, so that            # our id is stored on the IMAP server.  The vast majority of            # messages have Message-ID headers, from what I can tell, so            # we should only rarely have to do this.  It's less often than            # with the previous solution, anyway!            msg = msg.get_full_message()            msg.Save()        if options["globals", "verbose"]:            sys.stdout.write(".")        return msg    # Lifted straight from sb_server.py (under the name getNewMessageName)    def _generate_id(self):        # The message id is the time it arrived, with a uniquifier        # appended if two arrive within one clock tick of each other.        messageName = "%10.10d" % long(time.time())        if messageName == self.lastBaseMessageName:            messageName = "%s-%d" % (messageName, self.uniquifier)            self.uniquifier += 1        else:            self.lastBaseMessageName = messageName            self.uniquifier = 2        return messageName    def Train(self, classifier, isSpam):        """Train folder as spam/ham."""        num_trained = 0        for msg in self:            if msg.GetTrained() == (not isSpam):                msg = msg.get_full_message()                if msg.could_not_retrieve:                    # Something went wrong, and we couldn't even get                    # an invalid message, so just skip this one.                    # Annoyingly, we'll try to do it every time the                    # script runs, but hopefully the user will notice                    # the errors and move it soon enough.                    continue                msg.delSBHeaders()                classifier.unlearn(msg.tokenize(), not isSpam)                if isSpam:                    old_class = options["Headers", "header_ham_string"]                else:                    old_class = options["Headers", "header_spam_string"]                # Once the message has been untrained, it's training memory                # should reflect that on the off chance that for some                # reason the training breaks.                msg.RememberTrained(None)            else:                old_class = None            if msg.GetTrained() is None:                msg = msg.get_full_message()                if msg.could_not_retrieve:                    continue                saved_headers = msg.currentSBHeaders()                msg.delSBHeaders()                classifier.learn(msg.tokenize(), isSpam)                num_trained += 1                msg.RememberTrained(isSpam)                self.stats.RecordTraining(not isSpam, old_class=old_class)                if isSpam:                    move_opt_name = "move_trained_spam_to_folder"                else:                    move_opt_name = "move_trained_ham_to_folder"                if options["imap", move_opt_name] != "":                    # We need to restore the SpamBayes headers.                    for header, value in saved_headers.items():                        msg[header] = value                    msg.MoveTo(IMAPFolder(options["imap", move_opt_name],                                           self.imap_server, self.stats))                    msg.Save()        return num_trained    def Filter(self, classifier, spamfolder, unsurefolder, hamfolder):        count = {}        count["ham"] = 0        count["spam"] = 0        count["unsure"] = 0        for msg in self:            if msg.GetClassification() is None:                msg = msg.get_full_message()                if msg.could_not_retrieve:                    # Something went wrong, and we couldn't even get                    # an invalid message, so just skip this one.                    # Annoyingly, we'll try to do it every time the                    # script runs, but hopefully the user will notice                    # the errors and move it soon enough.                    continue                (prob, clues) = classifier.spamprob(msg.tokenize(),                                                    evidence=True)                # Add headers and remember classification.                msg.delSBHeaders()                msg.addSBHeaders(prob, clues)                self.stats.RecordClassification(prob)                cls = msg.GetClassification()                if cls == options["Headers", "header_ham_string"]:                    if hamfolder:                        msg.MoveTo(hamfolder)                    # Otherwise, we leave ham alone.                    count["ham"] += 1                elif cls == options["Headers", "header_spam_string"]:                    msg.MoveTo(spamfolder)                    count["spam"] += 1

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -