📄 sb_imapfilter.py
字号:
data = self.check_response("select %s" % (folder,), response) self.current_folder = folder return data number_re = re.compile(r"{\d+}") folder_re = re.compile(r"\(([\w\\ ]*)\) ") def folder_list(self): """Return a alphabetical list of all folders available on the server.""" response = self.list() try: all_folders = self.check_response("list", response) except BadIMAPResponseError: # We want to keep going, so just print out a warning, and # return an empty list. print "Could not retrieve folder list." return [] folders = [] for fol in all_folders: # Sigh. Some servers may give us back the folder name as a # literal, so we need to crunch this out. if isinstance(fol, types.TupleType): m = self.number_re.search(fol[0]) if not m: # Something is wrong here! Skip this folder. continue fol = '%s"%s"' % (fol[0][:m.start()], fol[1]) m = self.folder_re.search(fol) if not m: # Something is not good with this folder, so skip it. continue name_attributes = fol[:m.end()-1] # IMAP is a truly odd protocol. The delimiter is # only the delimiter for this particular folder - each # folder *may* have a different delimiter self.folder_delimiter = fol[m.end()+1:m.end()+2] # A bit of a hack, but we really need to know if this is # the case. if self.folder_delimiter == ',': print "WARNING: Your imap server uses a comma as the " \ "folder delimiter. This may cause unpredictable " \ "errors." folders.append(fol[m.end()+4:].strip('"')) folders.sort() return folders # A flag can have any character in the ascii range 32-126 except for # (){ %*"\ FLAG_CHARS = "" for i in range(32, 127): if not chr(i) in ['(', ')', '{', ' ', '%', '*', '"', '\\']: FLAG_CHARS += chr(i) FLAG = r"\\?[" + re.escape(FLAG_CHARS) + r"]+" # The empty flag set "()" doesn't match, so that extract_fetch_data() # returns data["FLAGS"] == None FLAGS_RE = re.compile(r"(FLAGS) (\((" + FLAG + r" )*(" + FLAG + r")\))") INTERNALDATE_RE = re.compile(r"(INTERNALDATE) (\"\d{1,2}\-[A-Za-z]{3,3}\-" + r"\d{2,4} \d{2,2}\:\d{2,2}\:\d{2,2} " + r"[\+\-]\d{4,4}\")") RFC822_RE = re.compile(r"(RFC822) (\{[\d]+\})") BODY_PEEK_RE = re.compile(r"(BODY\[\]) (\{[\d]+\})") RFC822_HEADER_RE = re.compile(r"(RFC822.HEADER) (\{[\d]+\})") UID_RE = re.compile(r"(UID) ([\d]+)") FETCH_RESPONSE_RE = re.compile(r"([0-9]+) \(([" + \ re.escape(FLAG_CHARS) + r"\"\{\}\(\)\\ ]*)\)?") LITERAL_RE = re.compile(r"^\{[\d]+\}$") def _extract_fetch_data(self, response): """This does the real work of extracting the data, for each message number. """ # We support the following FETCH items: # FLAGS # INTERNALDATE # RFC822 # UID # RFC822.HEADER # BODY.PEEK # All others are ignored. if isinstance(response, types.StringTypes): response = (response,) data = {} expected_literal = None for part in response: # We ignore parentheses by themselves, for convenience. if part == ')': continue if expected_literal: # This should be a literal of a certain size. key, expected_size = expected_literal## if len(part) != expected_size:## raise BadIMAPResponseError(\## "FETCH response (wrong size literal %d != %d)" % \## (len(part), expected_size), response) data[key] = part expected_literal = None continue # The first item will always be the message number. mo = self.FETCH_RESPONSE_RE.match(part) if mo: data["message_number"] = mo.group(1) rest = mo.group(2) else: raise BadIMAPResponseError("FETCH response", response) for r in [self.FLAGS_RE, self.INTERNALDATE_RE, self.RFC822_RE, self.UID_RE, self.RFC822_HEADER_RE, self.BODY_PEEK_RE]: mo = r.search(rest) if mo is not None: if self.LITERAL_RE.match(mo.group(2)): # The next element will be a literal. expected_literal = (mo.group(1), int(mo.group(2)[1:-1])) else: data[mo.group(1)] = mo.group(2) return data def extract_fetch_data(self, response): """Extract data from the response given to an IMAP FETCH command. The data is put into a dictionary, which is returned, where the keys are the fetch items. """ # There may be more than one message number in the response, so # handle separately. if isinstance(response, types.StringTypes): response = (response,) data = {} for msg in response: msg_data = self._extract_fetch_data(msg) if msg_data: # Maybe there are two about the same message number! num = msg_data["message_number"] if num in data: data[num].update(msg_data) else: data[num] = msg_data return data # Maximum amount of data that will be read at any one time. MAXIMUM_SAFE_READ = 4096 def safe_read(self, size): """Read data from remote, but in manageable sizes.""" data = [] while size > 0: if size < self.MAXIMUM_SAFE_READ: to_collect = size else: to_collect = self.MAXIMUM_SAFE_READ data.append(self._read(to_collect)) size -= self.MAXIMUM_SAFE_READ return "".join(data)class IMAPMessage(message.SBHeaderMessage): def __init__(self): message.Message.__init__(self) self.folder = None self.previous_folder = None self.rfc822_command = "(BODY.PEEK[])" self.rfc822_key = "BODY[]" self.got_substance = False self.invalid = False self.could_not_retrieve = False self.imap_server = None def extractTime(self): """When we create a new copy of a message, we need to specify a timestamp for the message, if we can't get the information from the IMAP server itself. If the message has a valid date header we use that. Otherwise, we use the current time.""" message_date = self["Date"] if message_date is not None: parsed_date = parsedate(message_date) if parsed_date is not None: try: return Time2Internaldate(time.mktime(parsed_date)) except ValueError: # Invalid dates can cause mktime() to raise a # ValueError, for example: # >>> time.mktime(parsedate("Mon, 06 May 0102 10:51:16 -0100")) # Traceback (most recent call last): # File "<interactive input>", line 1, in ? # ValueError: year out of range # (Why this person is getting mail from almost two # thousand years ago is another question <wink>). # In any case, we just pass and use the current date. pass except OverflowError: pass return Time2Internaldate(time.time()) def get_full_message(self): """Retrieve the RFC822 message from the IMAP server and return a new IMAPMessage object that has the same details as this message, but also has the substance.""" if self.got_substance: return self assert self.id, "Cannot get substance of message without an id" assert self.uid, "Cannot get substance of message without an UID" assert self.imap_server, "Cannot do anything without IMAP connection" # First, try to select the folder that the message is in. try: self.imap_server.SelectFolder(self.folder.name) except BadIMAPResponseError: # Can't select the folder, so getting the substance will not # work. self.could_not_retrieve = True print >>sys.stderr, "Could not select folder %s for message " \ "%s (uid %s)" % (self.folder.name, self.id, self.uid) return self # Now try to fetch the substance of the message. try: response = self.imap_server.uid("FETCH", self.uid, self.rfc822_command) except MemoryError: # Really big messages can trigger a MemoryError here. # The problem seems to be line 311 (Python 2.3) of socket.py, # which has "return "".join(buffers)". This has also caused # problems with Mac OS X 10.3, which apparently is very stingy # with memory (the malloc calls fail!). The problem then is # line 301 of socket.py which does # "data = self._sock.recv(recv_size)". # We want to handle this gracefully, although we can't really # do what we do later, and rewrite the message, since we can't # load it in the first place. Maybe an elegant solution would # be to get the message in parts, or just use the first X # characters for classification. For now, we just carry on, # warning the user and ignoring the message. self.could_not_retrieve = True print >>sys.stderr, "MemoryError with message %s (uid %s)" % \ (self.id, self.uid) return self command = "uid fetch %s" % (self.uid,) response_data = self.imap_server.check_response(command, response) data = self.imap_server.extract_fetch_data(response_data) # The data will be a dictionary - hopefully with only one element, # but maybe more than one. The key is the message number, which we # do not have (we use the UID instead). So we look through the # message and use the first data of the right type we find. rfc822_data = None for msg_data in data.itervalues(): if self.rfc822_key in msg_data: rfc822_data = msg_data[self.rfc822_key] break if rfc822_data is None: raise BadIMAPResponseError("FETCH response", response_data) try: new_msg = email.message_from_string(rfc822_data, IMAPMessage) # We use a general 'except' because the email package doesn't # always return email.Errors (it can return a TypeError, for # example) if the email is invalid. In any case, we want # to keep going, and not crash, because we might leave the # user's mailbox in a bad state if we do. Better to soldier on. except: # Yikes! Barry set this to return at this point, which # would work ok for training (IIRC, that's all he's # using it for), but for filtering, what happens is that # the message ends up blank, but ok, so the original is # flagged to be deleted, and a new (almost certainly # unsure) message, *with only the spambayes headers* is # created. The nice solution is still to do what sb_server # does and have a X-Spambayes-Exception header with the # exception data and then the original message. self.invalid = True text, details = message.insert_exception_header( rfc822_data, self.id) self.invalid_content = text self.got_substance = True # Print the exception and a traceback. print >>sys.stderr, details return self new_msg.folder = self.folder new_msg.previous_folder = self.previous_folder new_msg.rfc822_command = self.rfc822_command new_msg.rfc822_key = self.rfc822_key new_msg.imap_server = self.imap_server new_msg.uid = self.uid new_msg.setId(self.id) new_msg.got_substance = True if not new_msg.has_key(options["Headers", "mailid_header_name"]): new_msg[options["Headers", "mailid_header_name"]] = self.id if options["globals", "verbose"]: sys.stdout.write(chr(8) + "*") return new_msg def MoveTo(self, dest): '''Note that message should move to another folder. No move is carried out until Save() is called, for efficiency.''' if self.previous_folder is None: self.previous_folder = self.folder self.folder = dest def as_string(self, unixfrom=False): # Basically the same as the parent class's except that we handle # the case where the data was unparsable, so we haven't done any # filtering, and we are not actually a proper email.Message object. # We also don't mangle the from line; the server must take care of # this. if self.invalid: return self._force_CRLF(self.invalid_content) else: return message.SBHeaderMessage.as_string(self, unixfrom, mangle_from_=False) recent_re = re.compile(r"\\Recent ?| ?\\Recent") def Save(self): """Save message to IMAP server.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -