⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sb_imapfilter.py

📁 用python实现的邮件过滤器
💻 PY
📖 第 1 页 / 共 4 页
字号:
            data = self.check_response("select %s" % (folder,), response)            self.current_folder = folder            return data    number_re = re.compile(r"{\d+}")    folder_re = re.compile(r"\(([\w\\ ]*)\) ")    def folder_list(self):        """Return a alphabetical list of all folders available on the        server."""        response = self.list()        try:            all_folders = self.check_response("list", response)        except BadIMAPResponseError:            # We want to keep going, so just print out a warning, and            # return an empty list.            print "Could not retrieve folder list."            return []        folders = []        for fol in all_folders:            # Sigh.  Some servers may give us back the folder name as a            # literal, so we need to crunch this out.            if isinstance(fol, types.TupleType):                m = self.number_re.search(fol[0])                if not m:                    # Something is wrong here!  Skip this folder.                    continue                fol = '%s"%s"' % (fol[0][:m.start()], fol[1])            m = self.folder_re.search(fol)            if not m:                # Something is not good with this folder, so skip it.                continue            name_attributes = fol[:m.end()-1]            # IMAP is a truly odd protocol.  The delimiter is            # only the delimiter for this particular folder - each            # folder *may* have a different delimiter            self.folder_delimiter = fol[m.end()+1:m.end()+2]            # A bit of a hack, but we really need to know if this is            # the case.            if self.folder_delimiter == ',':                print "WARNING: Your imap server uses a comma as the " \                      "folder delimiter.  This may cause unpredictable " \                      "errors."            folders.append(fol[m.end()+4:].strip('"'))        folders.sort()        return folders    # A flag can have any character in the ascii range 32-126 except for    # (){ %*"\    FLAG_CHARS = ""    for i in range(32, 127):        if not chr(i) in ['(', ')', '{', ' ', '%', '*', '"', '\\']:            FLAG_CHARS += chr(i)    FLAG = r"\\?[" + re.escape(FLAG_CHARS) + r"]+"    # The empty flag set "()" doesn't match, so that extract_fetch_data()    # returns data["FLAGS"] == None    FLAGS_RE = re.compile(r"(FLAGS) (\((" + FLAG + r" )*(" + FLAG + r")\))")    INTERNALDATE_RE = re.compile(r"(INTERNALDATE) (\"\d{1,2}\-[A-Za-z]{3,3}\-" +                                 r"\d{2,4} \d{2,2}\:\d{2,2}\:\d{2,2} " +                                 r"[\+\-]\d{4,4}\")")    RFC822_RE = re.compile(r"(RFC822) (\{[\d]+\})")    BODY_PEEK_RE = re.compile(r"(BODY\[\]) (\{[\d]+\})")    RFC822_HEADER_RE = re.compile(r"(RFC822.HEADER) (\{[\d]+\})")    UID_RE = re.compile(r"(UID) ([\d]+)")    FETCH_RESPONSE_RE = re.compile(r"([0-9]+) \(([" + \                                   re.escape(FLAG_CHARS) + r"\"\{\}\(\)\\ ]*)\)?")    LITERAL_RE = re.compile(r"^\{[\d]+\}$")    def _extract_fetch_data(self, response):        """This does the real work of extracting the data, for each message        number.        """        # We support the following FETCH items:        #  FLAGS        #  INTERNALDATE        #  RFC822        #  UID        #  RFC822.HEADER        #  BODY.PEEK        # All others are ignored.        if isinstance(response, types.StringTypes):            response = (response,)        data = {}        expected_literal = None        for part in response:            # We ignore parentheses by themselves, for convenience.            if part == ')':                continue            if expected_literal:                # This should be a literal of a certain size.                key, expected_size = expected_literal##                if len(part) != expected_size:##                    raise BadIMAPResponseError(\##                        "FETCH response (wrong size literal %d != %d)" % \##                        (len(part), expected_size), response)                data[key] = part                expected_literal = None                continue            # The first item will always be the message number.            mo = self.FETCH_RESPONSE_RE.match(part)            if mo:                data["message_number"] = mo.group(1)                rest = mo.group(2)            else:                raise BadIMAPResponseError("FETCH response", response)                        for r in [self.FLAGS_RE, self.INTERNALDATE_RE, self.RFC822_RE,                      self.UID_RE, self.RFC822_HEADER_RE, self.BODY_PEEK_RE]:                mo = r.search(rest)                if mo is not None:                    if self.LITERAL_RE.match(mo.group(2)):                        # The next element will be a literal.                        expected_literal = (mo.group(1),                                            int(mo.group(2)[1:-1]))                    else:                        data[mo.group(1)] = mo.group(2)        return data    def extract_fetch_data(self, response):        """Extract data from the response given to an IMAP FETCH command.        The data is put into a dictionary, which is returned, where the        keys are the fetch items.        """        # There may be more than one message number in the response, so        # handle separately.        if isinstance(response, types.StringTypes):            response = (response,)        data = {}        for msg in response:            msg_data = self._extract_fetch_data(msg)            if msg_data:                # Maybe there are two about the same message number!                num = msg_data["message_number"]                if num in data:                    data[num].update(msg_data)                else:                    data[num] = msg_data        return data    # Maximum amount of data that will be read at any one time.    MAXIMUM_SAFE_READ = 4096    def safe_read(self, size):        """Read data from remote, but in manageable sizes."""        data = []        while size > 0:            if size < self.MAXIMUM_SAFE_READ:                to_collect = size            else:                to_collect = self.MAXIMUM_SAFE_READ            data.append(self._read(to_collect))            size -= self.MAXIMUM_SAFE_READ        return "".join(data)class IMAPMessage(message.SBHeaderMessage):    def __init__(self):        message.Message.__init__(self)        self.folder = None        self.previous_folder = None        self.rfc822_command = "(BODY.PEEK[])"        self.rfc822_key = "BODY[]"        self.got_substance = False        self.invalid = False        self.could_not_retrieve = False        self.imap_server = None    def extractTime(self):        """When we create a new copy of a message, we need to specify        a timestamp for the message, if we can't get the information        from the IMAP server itself.  If the message has a valid date        header we use that.  Otherwise, we use the current time."""        message_date = self["Date"]        if message_date is not None:            parsed_date = parsedate(message_date)            if parsed_date is not None:                try:                    return Time2Internaldate(time.mktime(parsed_date))                except ValueError:                    # Invalid dates can cause mktime() to raise a                    # ValueError, for example:                    #   >>> time.mktime(parsedate("Mon, 06 May 0102 10:51:16 -0100"))                    #   Traceback (most recent call last):                    #     File "<interactive input>", line 1, in ?                    #   ValueError: year out of range                    # (Why this person is getting mail from almost two                    # thousand years ago is another question <wink>).                    # In any case, we just pass and use the current date.                    pass                except OverflowError:                    pass        return Time2Internaldate(time.time())    def get_full_message(self):        """Retrieve the RFC822 message from the IMAP server and return a        new IMAPMessage object that has the same details as this message,        but also has the substance."""        if self.got_substance:            return self        assert self.id, "Cannot get substance of message without an id"        assert self.uid, "Cannot get substance of message without an UID"        assert self.imap_server, "Cannot do anything without IMAP connection"        # First, try to select the folder that the message is in.        try:            self.imap_server.SelectFolder(self.folder.name)        except BadIMAPResponseError:            # Can't select the folder, so getting the substance will not            # work.            self.could_not_retrieve = True            print >>sys.stderr, "Could not select folder %s for message " \                  "%s (uid %s)" % (self.folder.name, self.id, self.uid)            return self        # Now try to fetch the substance of the message.        try:            response = self.imap_server.uid("FETCH", self.uid,                                            self.rfc822_command)        except MemoryError:            # Really big messages can trigger a MemoryError here.            # The problem seems to be line 311 (Python 2.3) of socket.py,            # which has "return "".join(buffers)".  This has also caused            # problems with Mac OS X 10.3, which apparently is very stingy            # with memory (the malloc calls fail!).  The problem then is            # line 301 of socket.py which does            # "data = self._sock.recv(recv_size)".            # We want to handle this gracefully, although we can't really            # do what we do later, and rewrite the message, since we can't            # load it in the first place.  Maybe an elegant solution would            # be to get the message in parts, or just use the first X            # characters for classification.  For now, we just carry on,            # warning the user and ignoring the message.            self.could_not_retrieve = True            print >>sys.stderr, "MemoryError with message %s (uid %s)" % \                  (self.id, self.uid)            return self        command = "uid fetch %s" % (self.uid,)        response_data = self.imap_server.check_response(command, response)        data = self.imap_server.extract_fetch_data(response_data)        # The data will be a dictionary - hopefully with only one element,        # but maybe more than one.  The key is the message number, which we        # do not have (we use the UID instead).  So we look through the        # message and use the first data of the right type we find.        rfc822_data = None        for msg_data in data.itervalues():            if self.rfc822_key in msg_data:                rfc822_data = msg_data[self.rfc822_key]                break        if rfc822_data is None:            raise BadIMAPResponseError("FETCH response", response_data)        try:            new_msg = email.message_from_string(rfc822_data, IMAPMessage)        # We use a general 'except' because the email package doesn't        # always return email.Errors (it can return a TypeError, for        # example) if the email is invalid.  In any case, we want        # to keep going, and not crash, because we might leave the        # user's mailbox in a bad state if we do.  Better to soldier on.        except:            # Yikes!  Barry set this to return at this point, which            # would work ok for training (IIRC, that's all he's            # using it for), but for filtering, what happens is that            # the message ends up blank, but ok, so the original is            # flagged to be deleted, and a new (almost certainly            # unsure) message, *with only the spambayes headers* is            # created.  The nice solution is still to do what sb_server            # does and have a X-Spambayes-Exception header with the            # exception data and then the original message.            self.invalid = True            text, details = message.insert_exception_header(                rfc822_data, self.id)            self.invalid_content = text            self.got_substance = True            # Print the exception and a traceback.            print >>sys.stderr, details            return self                    new_msg.folder = self.folder        new_msg.previous_folder = self.previous_folder        new_msg.rfc822_command = self.rfc822_command        new_msg.rfc822_key = self.rfc822_key        new_msg.imap_server = self.imap_server        new_msg.uid = self.uid        new_msg.setId(self.id)        new_msg.got_substance = True        if not new_msg.has_key(options["Headers", "mailid_header_name"]):            new_msg[options["Headers", "mailid_header_name"]] = self.id        if options["globals", "verbose"]:            sys.stdout.write(chr(8) + "*")        return new_msg    def MoveTo(self, dest):        '''Note that message should move to another folder.  No move is        carried out until Save() is called, for efficiency.'''        if self.previous_folder is None:            self.previous_folder = self.folder        self.folder = dest    def as_string(self, unixfrom=False):        # Basically the same as the parent class's except that we handle        # the case where the data was unparsable, so we haven't done any        # filtering, and we are not actually a proper email.Message object.        # We also don't mangle the from line; the server must take care of        # this.        if self.invalid:            return self._force_CRLF(self.invalid_content)        else:            return message.SBHeaderMessage.as_string(self, unixfrom,                                                     mangle_from_=False)    recent_re = re.compile(r"\\Recent ?| ?\\Recent")    def Save(self):        """Save message to IMAP server.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -