📄 sb_imapfilter.py
字号:
We can't actually update the message with IMAP, so what we do is create a new message and delete the old one.""" assert self.folder is not None,\ "Can't save a message that doesn't have a folder." assert self.id, "Can't save a message that doesn't have an id." assert self.imap_server, "Can't do anything without IMAP connection." response = self.imap_server.uid("FETCH", self.uid, "(FLAGS INTERNALDATE)") command = "fetch %s (flags internaldate)" % (self.uid,) response_data = self.imap_server.check_response(command, response) data = self.imap_server.extract_fetch_data(response_data) # The data will be a dictionary - hopefully with only one element, # but maybe more than one. The key is the message number, which we # do not have (we use the UID instead). So we look through the # message and use the last data of the right type we find. msg_time = self.extractTime() flags = None for msg_data in data.itervalues(): if "INTERNALDATE" in msg_data: msg_time = msg_data["INTERNALDATE"] if "FLAGS" in msg_data: flags = msg_data["FLAGS"] # The \Recent flag can be fetched, but cannot be stored # We must remove it from the list if it is there. flags = self.recent_re.sub("", flags) # We try to save with flags and time, then with just the # time, then with the flags and the current time, then with just # the current time. The first should work, but the first three # sometimes (due to the quirky IMAP server) fail. for flgs, tme in [(flags, msg_time), (None, msg_time), (flags, Time2Internaldate(time.time())), (None, Time2Internaldate(time.time()))]: try: response = self.imap_server.append(self.folder.name, flgs, tme, self.as_string()) except BaseIMAP.error: continue try: self.imap_server.check_response("", response) except BadIMAPResponseError: pass else: break else: command = "append %s %s %s %s" % (self.folder.name, flgs, tme, self.as_string) raise BadIMAPResponseError(command) if self.previous_folder is None: self.imap_server.SelectFolder(self.folder.name) else: self.imap_server.SelectFolder(self.previous_folder.name) self.previous_folder = None response = self.imap_server.uid("STORE", self.uid, "+FLAGS.SILENT", "(\\Deleted \\Seen)") command = "set %s to be deleted and seen" % (self.uid,) self.imap_server.check_response(command, response) # Not all IMAP servers immediately offer the new message, but # we need to find it to get the new UID. We need to wait until # the server offers up an EXISTS command, so we no-op until that # is the case. # See [ 941596 ] sb_imapfilter.py not adding headers / moving messages # We use the recent() function, which no-ops if necessary. We try # 100 times, and then give up. If a message arrives independantly, # and we are told about it before our message, then this could # cause trouble, but that would be one weird server. for i in xrange(100): response = self.imap_server.recent() data = self.imap_server.check_response("recent", response) if data[0] is not None: break else: raise BadIMAPResponseError("recent", "Cannot find saved message") # We need to update the UID, as it will have changed. # Although we don't use the UID to keep track of messages, we do # have to use it for IMAP operations. self.imap_server.SelectFolder(self.folder.name) search_string = "(UNDELETED HEADER %s \"%s\")" % \ (options["Headers", "mailid_header_name"], self.id.replace('\\',r'\\').replace('"',r'\"')) response = self.imap_server.uid("SEARCH", search_string) data = self.imap_server.check_response("search " + search_string, response) new_id = data[0] # See [ 870799 ] imap trying to fetch invalid message UID # It seems that although the save gave a "NO" response to the # first save, the message was still saved (without the flags, # probably). This really isn't good behaviour on the server's # part, but, as usual, we try and deal with it. So, if we get # more than one undeleted message with the same SpamBayes id, # delete all of them apart from the last one, and use that. multiple_ids = new_id.split() for id_to_remove in multiple_ids[:-1]: response = self.imap_server.uid("STORE", id_to_remove, "+FLAGS.SILENT", "(\\Deleted \\Seen)") command = "silently delete and make seen %s" % (id_to_remove,) self.imap_server.check_response(command, response) if multiple_ids: new_id = multiple_ids[-1] else: # Let's hope it doesn't, but, just in case, if the search # turns up empty, we make the assumption that the new message # is the last one with a recent flag. response = self.imap_server.uid("SEARCH", "RECENT") data = self.imap_server.check_response("search recent", response) new_id = data[0] if new_id.find(' ') > -1: ids = new_id.split(' ') new_id = ids[-1] # Ok, now we're in trouble if we still haven't found it. # We make a huge assumption that the new message is the one # with the highest UID (they are sequential, so this will be # ok as long as another message hasn't also arrived). if new_id == "": response = self.imap_server.uid("SEARCH", "ALL") data = self.imap_server.check_response("search all", response) new_id = data[0] if new_id.find(' ') > -1: ids = new_id.split(' ') new_id = ids[-1] self.uid = new_idclass IMAPFolder(object): def __init__(self, folder_name, imap_server, stats): self.name = folder_name self.imap_server = imap_server self.stats = stats # Unique names for cached messages - see _generate_id below. self.lastBaseMessageName = '' self.uniquifier = 2 def __cmp__(self, obj): """Two folders are equal if their names are equal.""" if obj is None: return False return cmp(self.name, obj.name) def __iter__(self): """Iterate through the messages in this IMAP folder.""" for key in self.keys(): yield self[key] def keys(self): '''Returns *uids* for all the messages in the folder not marked as deleted.''' self.imap_server.SelectFolder(self.name) response = self.imap_server.uid("SEARCH", "UNDELETED") data = self.imap_server.check_response("search undeleted", response) if data[0]: return data[0].split(' ') else: return [] custom_header_id_re = re.compile(re.escape(\ options["Headers", "mailid_header_name"]) + "\:\s*(\d+(?:\-\d)?)", re.IGNORECASE) message_id_re = re.compile("Message-ID\: ?\<([^\n\>]+)\>", re.IGNORECASE) def __getitem__(self, key): """Return message matching the given *uid*. The messages returned have no substance (so this should be reasonably quick, even with large messages). You need to call get_full_message() on the returned message to get the substance of the message from the server.""" self.imap_server.SelectFolder(self.name) # Using RFC822.HEADER.LINES would be better here, but it seems # that not all servers accept it, even though it is in the RFC response = self.imap_server.uid("FETCH", key, "RFC822.HEADER") response_data = self.imap_server.check_response(\ "fetch %s rfc822.header" % (key,), response) data = self.imap_server.extract_fetch_data(response_data) # The data will be a dictionary - hopefully with only one element, # but maybe more than one. The key is the message number, which we # do not have (we use the UID instead). So we look through the # message and use the first data of the right type we find. headers = None for msg_data in data.itervalues(): if "RFC822.HEADER" in msg_data: headers = msg_data["RFC822.HEADER"] break if headers is None: raise BadIMAPResponseError("FETCH response", response_data) # Create a new IMAPMessage object, which will be the return value. msg = IMAPMessage() msg.folder = self msg.uid = key msg.imap_server = self.imap_server # We use the MessageID header as the ID for the message, as long # as it is available, and if not, we add our own. # Search for our custom id first, for backwards compatibility. for id_header_re in [self.custom_header_id_re, self.message_id_re]: mo = id_header_re.search(headers) if mo: msg.setId(mo.group(1)) break else: msg.setId(self._generate_id()) # Unfortunately, we now have to re-save this message, so that # our id is stored on the IMAP server. The vast majority of # messages have Message-ID headers, from what I can tell, so # we should only rarely have to do this. It's less often than # with the previous solution, anyway! msg = msg.get_full_message() msg.Save() if options["globals", "verbose"]: sys.stdout.write(".") return msg # Lifted straight from sb_server.py (under the name getNewMessageName) def _generate_id(self): # The message id is the time it arrived, with a uniquifier # appended if two arrive within one clock tick of each other. messageName = "%10.10d" % long(time.time()) if messageName == self.lastBaseMessageName: messageName = "%s-%d" % (messageName, self.uniquifier) self.uniquifier += 1 else: self.lastBaseMessageName = messageName self.uniquifier = 2 return messageName def Train(self, classifier, isSpam): """Train folder as spam/ham.""" num_trained = 0 for msg in self: if msg.GetTrained() == (not isSpam): msg = msg.get_full_message() if msg.could_not_retrieve: # Something went wrong, and we couldn't even get # an invalid message, so just skip this one. # Annoyingly, we'll try to do it every time the # script runs, but hopefully the user will notice # the errors and move it soon enough. continue msg.delSBHeaders() classifier.unlearn(msg.tokenize(), not isSpam) if isSpam: old_class = options["Headers", "header_ham_string"] else: old_class = options["Headers", "header_spam_string"] # Once the message has been untrained, it's training memory # should reflect that on the off chance that for some # reason the training breaks. msg.RememberTrained(None) else: old_class = None if msg.GetTrained() is None: msg = msg.get_full_message() if msg.could_not_retrieve: continue saved_headers = msg.currentSBHeaders() msg.delSBHeaders() classifier.learn(msg.tokenize(), isSpam) num_trained += 1 msg.RememberTrained(isSpam) self.stats.RecordTraining(not isSpam, old_class=old_class) if isSpam: move_opt_name = "move_trained_spam_to_folder" else: move_opt_name = "move_trained_ham_to_folder" if options["imap", move_opt_name] != "": # We need to restore the SpamBayes headers. for header, value in saved_headers.items(): msg[header] = value msg.MoveTo(IMAPFolder(options["imap", move_opt_name], self.imap_server, self.stats)) msg.Save() return num_trained def Filter(self, classifier, spamfolder, unsurefolder, hamfolder): count = {} count["ham"] = 0 count["spam"] = 0 count["unsure"] = 0 for msg in self: if msg.GetClassification() is None: msg = msg.get_full_message() if msg.could_not_retrieve: # Something went wrong, and we couldn't even get # an invalid message, so just skip this one. # Annoyingly, we'll try to do it every time the # script runs, but hopefully the user will notice # the errors and move it soon enough. continue (prob, clues) = classifier.spamprob(msg.tokenize(), evidence=True) # Add headers and remember classification. msg.delSBHeaders() msg.addSBHeaders(prob, clues) self.stats.RecordClassification(prob) cls = msg.GetClassification() if cls == options["Headers", "header_ham_string"]: if hamfolder: msg.MoveTo(hamfolder) # Otherwise, we leave ham alone. count["ham"] += 1 elif cls == options["Headers", "header_spam_string"]: msg.MoveTo(spamfolder) count["spam"] += 1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -