📄 msgstore.py
字号:
def getDBKey(self): # Long lived search key. return self.searchkey def __repr__(self): if self.id is None: id_str = "(deleted/moved)" else: id_str = mapi.HexFromBin(self.id[0]), mapi.HexFromBin(self.id[1]) return "<%s, '%s' id=%s>" % (self.__class__.__name__, self.GetSubject(), id_str) # as per search-key comments above, we also "enforce" this at the Python # level. 2 different messages, but one copied from the other, will # return "==". # Not being consistent could cause subtle bugs, especially in interactions # with various test tools. # Compare the GetID() results if you need to know different messages. def __hash__(self): return hash(self.searchkey) def __eq__(self, other): ceid = self.msgstore.session.CompareEntryIDs return ceid(self.searchkey, other.searchkey) def __ne__(self, other): return not self.__eq__(other) def GetID(self): return mapi.HexFromBin(self.id[0]), mapi.HexFromBin(self.id[1]) def GetSubject(self): return self.subject def GetOutlookItem(self): hex_item_id = mapi.HexFromBin(self.id[1]) hex_store_id = mapi.HexFromBin(self.id[0]) return self.msgstore.outlook.Session.GetItemFromID(hex_item_id, hex_store_id) def IsFilterCandidate(self): # We don't attempt to filter: # * Non-mail items # * Messages that weren't actually received - this generally means user # composed messages yet to be sent, or copies of "sent items". # It does *not* exclude messages that were user composed, but still # actually received by the user (ie, when you mail yourself) # GroupWise generates IPM.Anti-Virus.Report.45 (but I'm not sure how # it manages given it is an external server, and as far as I can tell, # this does not appear in the headers. if test_suite_running: # While the test suite is running, we *only* filter test msgs. return self.subject == "SpamBayes addin auto-generated test message" class_check = self.msgclass.lower() for check in "ipm.note", "ipm.anti-virus": if class_check.startswith(check): break else: # Not matching class - no good return False # Must match msg class to get here. return self.was_received def _GetPotentiallyLargeStringProp(self, prop_id, row): return GetPotentiallyLargeStringProp(self.mapi_object, prop_id, row) def _GetMessageText(self): parts = self._GetMessageTextParts() # parts is (headers, body, html), but could possibly grow return "\n".join(parts) def _GetMessageTextParts(self): # This is almost reliable :). The only messages this now fails for # are for "forwarded" messages, where the forwards are actually # in an attachment. Later. # Note we *dont* look in plain text attachments, which we arguably # should. from spambayes import mboxutils self._EnsureObject() prop_ids = (PR_BODY_A, MYPR_BODY_HTML_A, PR_TRANSPORT_MESSAGE_HEADERS_A) hr, data = self.mapi_object.GetProps(prop_ids,0) body = self._GetPotentiallyLargeStringProp(prop_ids[0], data[0]) html = self._GetPotentiallyLargeStringProp(prop_ids[1], data[1]) headers = self._GetPotentiallyLargeStringProp(prop_ids[2], data[2]) # xxx - not sure what to do if we have both. if not html: html = GetHTMLFromRTFProperty(self.mapi_object) # Some Outlooks deliver a strange notion of headers, including # interior MIME armor. To prevent later errors, try to get rid # of stuff now that can't possibly be parsed as "real" (SMTP) # headers. headers = mboxutils.extract_headers(headers) # Mail delivered internally via Exchange Server etc may not have # headers - fake some up. if not headers: headers = self._GetFakeHeaders() # Mail delivered via the Exchange Internet Mail MTA may have # gibberish at the start of the headers - fix this. elif headers.startswith("Microsoft Mail"): headers = "X-MS-Mail-Gibberish: " + headers # This mail typically doesn't have a Received header, which # is a real PITA for running the incremental testing setup. # To make life easier, we add in the fake one that the message # would have got if it had had no headers at all. if headers.find("Received:") == -1: prop_ids = PR_MESSAGE_DELIVERY_TIME hr, data = self.mapi_object.GetProps(prop_ids, 0) value = self._format_received(data[0][1]) headers = "Received: %s\n%s" % (value, headers) if not html and not body: # Only ever seen this for "multipart/signed" messages, so # without any better clues, just handle this. # Find all attachments with # PR_ATTACH_MIME_TAG_A=multipart/signed table = self.mapi_object.GetAttachmentTable(0) restriction = (mapi.RES_PROPERTY, # a property restriction (mapi.RELOP_EQ, # check for equality PR_ATTACH_MIME_TAG_A, # of the given prop (PR_ATTACH_MIME_TAG_A, "multipart/signed"))) try: rows = mapi.HrQueryAllRows(table, (PR_ATTACH_NUM,), # columns to get restriction, # only these rows None, # any sort order is fine 0) # any # of results is fine except pythoncom.com_error: # For some reason there are no rows we can get rows = [] if len(rows) == 0: pass # Nothing we can fetch :( else: if len(rows) > 1: print "WARNING: Found %d rows with multipart/signed" \ "- using first only" % len(rows) row = rows[0] (attach_num_tag, attach_num), = row assert attach_num_tag != PT_ERROR, \ "Error fetching attach_num prop" # Open the attachment attach = self.mapi_object.OpenAttach(attach_num, None, mapi.MAPI_DEFERRED_ERRORS) prop_ids = (PR_ATTACH_DATA_BIN,) hr, data = attach.GetProps(prop_ids, 0) attach_body = GetPotentiallyLargeStringProp(attach, prop_ids[0], data[0]) # What we seem to have here now is a *complete* multi-part # mime message - that Outlook must have re-constituted on # the fly immediately after pulling it apart! - not unlike # exactly what we are doing ourselves right here - putting # it into a message object, so we can extract the text, so # we can stick it back into another one. Ahhhhh. import email msg = email.message_from_string(attach_body) assert msg.is_multipart(), "Should be multi-part: %r" % attach_body # reduce down all sub messages, collecting all text/ subtypes. # (we could make a distinction between text and html, but # it is all joined together by this method anyway.) def collect_text_parts(msg): collected = '' if msg.is_multipart(): for sub in msg.get_payload(): collected += collect_text_parts(sub) else: if msg.get_content_maintype()=='text': collected += msg.get_payload() else: #print "skipping content type", msg.get_content_type() pass return collected body = collect_text_parts(msg) return headers, body, html def _GetFakeHeaders(self): # This is designed to fake up some SMTP headers for messages # on an exchange server that do not have such headers of their own. prop_ids = PR_SUBJECT_A, PR_SENDER_NAME_A, PR_DISPLAY_TO_A, \ PR_DISPLAY_CC_A, PR_MESSAGE_DELIVERY_TIME, \ MYPR_MESSAGE_ID_A, PR_IMPORTANCE, PR_CLIENT_SUBMIT_TIME, hr, data = self.mapi_object.GetProps(prop_ids, 0) headers = ["X-Exchange-Message: true"] for header, index, potentially_large, format_func in (\ ("Subject", 0, True, None), ("From", 1, True, self._format_address), ("To", 2, True, self._format_address), ("CC", 3, True, self._format_address), ("Received", 4, False, self._format_received), ("Message-ID", 5, True, None), ("Importance", 6, False, self._format_importance), ("Date", 7, False, self._format_time), ("X-Mailer", 7, False, self._format_version), ): if potentially_large: value = self._GetPotentiallyLargeStringProp(prop_ids[index], data[index]) else: value = data[index][1] if value: if format_func: value = format_func(value) headers.append("%s: %s" % (header, value)) return "\n".join(headers) + "\n" def _format_received(self, raw): # Fake up a 'received' header. It's important that the date # is right, so that sort+group.py will work. The rest is just more # clues for the tokenizer to find. return "(via local Exchange server); %s" % (self._format_time(raw),) def _format_time(self, raw): from time import timezone from email.Utils import formatdate return formatdate(int(raw)-timezone, True) def _format_importance(self, raw): # olImportanceHigh = 2, olImportanceLow = 0, olImportanceNormal = 1 return {0 : "low", 1 : "normal", 2 : "high"}[raw] def _format_version(self, unused): return "Microsoft Exchange Client" _address_re = re.compile(r"[()<>,:@!/=; ]") def _format_address(self, raw): # Fudge up something that's in the appropriate form. We don't # have enough information available to get an actual working # email address. addresses = raw.split(";") formattedAddresses = [] for address in addresses: address = address.strip() if address.find("@") >= 0: formattedAddress = address else: formattedAddress = "\"%s\" <%s>" % \ (address, self._address_re.sub('.', address)) formattedAddresses.append(formattedAddress) return "; ".join(formattedAddresses) def _EnsureObject(self): if self.mapi_object is None: try: help_test_suite("MAPIMsgStoreMsg._EnsureObject") self.mapi_object = self.msgstore._OpenEntry(self.id) except pythoncom.com_error, details: raise MsgStoreExceptionFromCOMException(details) def GetEmailPackageObject(self, strip_mime_headers=True): # Return an email.Message object. # # strip_mime_headers is a hack, and should be left True unless you're # trying to display all the headers for diagnostic purposes. If we # figure out something better to do, it should go away entirely. # # Problem #1: suppose a msg is multipart/alternative, with # text/plain and text/html sections. The latter MIME decorations # are plain missing in what _GetMessageText() returns. If we leave # the multipart/alternative in the headers anyway, the email # package's "lax parsing" won't complain about not finding any # sections, but since the type *is* multipart/alternative then # anyway, the tokenizer finds no text/* parts at all to tokenize. # As a result, only the headers get tokenized. By stripping # Content-Type from the headers (if present), the email pkg # considers the body to be text/plain (the default), and so it # does get tokenized. #
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -