📄 test_message.py
字号:
# Test spambayes.message module.import osimport sysimport mathimport timeimport emailimport unittestimport sb_test_supportsb_test_support.fix_sys_path()from spambayes.Options import optionsfrom spambayes.tokenizer import tokenizefrom spambayes.classifier import Classifierfrom spambayes.message import MessageInfoDB, insert_exception_headerfrom spambayes.message import Message, SBHeaderMessage, MessageInfoPickle# We borrow the test messages that test_sb_server uses.# I doubt it really makes much difference, but if we wanted more than# one message of each type (the tests should all handle this ok) then# Richie's hammer.py script has code for generating any number of# randomly composed email messages.from test_sb_server import good1, spam1, malformed1try: __file__except NameError: # Python 2.2 compatibility. __file__ = sys.argv[0]TEMP_PICKLE_NAME = os.path.join(os.path.dirname(__file__), "temp.pik")TEMP_DBM_NAME = os.path.join(os.path.dirname(__file__), "temp.dbm")# The chances of anyone having files with these names in the test# directory is minute, but we don't want to wipe anything, so make# sure that they don't already exist. Our tearDown code gets rid# of our copies (whether the tests pass or fail) so they shouldn't# be ours.for fn in [TEMP_PICKLE_NAME, TEMP_DBM_NAME]: if os.path.exists(fn): print fn, "already exists. Please remove this file before " \ "running these tests (a file by that name will be " \ "created and destroyed as part of the tests)." sys.exit(1)class MessageTest(unittest.TestCase): def setUp(self): self.msg = email.message_from_string(spam1, _class=Message) def test_persistent_state(self): self.assertEqual(self.msg.stored_attributes, ['c', 't', 'date_modified']) def test_initialisation(self): self.assertEqual(self.msg.id, None) self.assertEqual(self.msg.c, None) self.assertEqual(self.msg.t, None) def test_setId(self): # Verify that you can't change the id. self.msg.id = "test" self.assertRaises(ValueError, self.msg.setId, "test2") # Verify that you can't set the id to None. self.msg.id = None self.assertRaises(ValueError, self.msg.setId, None) # Verify that id must be a string. self.assertRaises(TypeError, self.msg.setId, 1) self.assertRaises(TypeError, self.msg.setId, False) self.assertRaises(TypeError, self.msg.setId, []) id = "Test" self.msg.setId(id) self.assertEqual(self.msg.id, id) # Check info db load_msg is called. self.msg.id = None saved = self.msg.message_info_db.load_msg self.done = False try: self.msg.message_info_db.load_msg = self._fake_setState self.msg.setId(id) self.assertEqual(self.done, True) finally: self.msg.message_info_db.load_msg = saved def test_getId(self): self.assertEqual(self.msg.getId(), None) id = "test" self.msg.id = id self.assertEqual(self.msg.getId(), id) def test_tokenize(self): toks = self.msg.tokenize() self.assertEqual(tuple(tokenize(spam1)), tuple(toks)) def test_force_CRLF(self): self.assert_('\r' not in good1) lines = self.msg._force_CRLF(good1).split('\n') for line in lines: if line: self.assert_(line.endswith('\r')) def test_as_string_endings(self): self.assert_('\r' not in spam1) lines = self.msg.as_string().split('\n') for line in lines: if line: self.assert_(line.endswith('\r')) def _fake_setState(self, state): self.done = True def test_modified(self): saved = self.msg.message_info_db.store_msg try: self.msg.message_info_db.store_msg = self._fake_setState self.done = False self.msg.modified() self.assertEqual(self.done, False) self.msg.id = "Test" self.msg.modified() self.assertEqual(self.done, True) finally: self.msg.message_info_db.store_msg = saved def test_GetClassification(self): self.msg.c = 's' self.assertEqual(self.msg.GetClassification(), options['Headers','header_spam_string']) self.msg.c = 'h' self.assertEqual(self.msg.GetClassification(), options['Headers','header_ham_string']) self.msg.c = 'u' self.assertEqual(self.msg.GetClassification(), options['Headers','header_unsure_string']) self.msg.c = 'a' self.assertEqual(self.msg.GetClassification(), None) def test_RememberClassification(self): self.msg.RememberClassification(options['Headers', 'header_spam_string']) self.assertEqual(self.msg.c, 's') self.msg.RememberClassification(options['Headers', 'header_ham_string']) self.assertEqual(self.msg.c, 'h') self.msg.RememberClassification(options['Headers', 'header_unsure_string']) self.assertEqual(self.msg.c, 'u') self.assertRaises(ValueError, self.msg.RememberClassification, "a") # Check that self.msg.modified is called. saved = self.msg.modified self.done = False try: self.msg.modified = self._fake_modified self.msg.RememberClassification(options['Headers', 'header_unsure_string']) self.assertEqual(self.done, True) finally: self.msg.modified = saved def _fake_modified(self): self.done = True def test_GetAndRememberTrained(self): t = "test" saved = self.msg.modified self.done = False try: self.msg.modified = self._fake_modified self.msg.RememberTrained(t) self.assertEqual(self.done, True) finally: self.msg.modified = saved self.assertEqual(self.msg.GetTrained(), t)class SBHeaderMessageTest(unittest.TestCase): def setUp(self): self.msg = email.message_from_string(spam1, _class=SBHeaderMessage) # Get a prob and some clues. c = Classifier() self.u_prob, clues = c.spamprob(tokenize(good1), True) c.learn(tokenize(good1), False) self.g_prob, clues = c.spamprob(tokenize(good1), True) c.unlearn(tokenize(good1), False) c.learn(tokenize(spam1), True) self.s_prob, self.clues = c.spamprob(tokenize(spam1), True) self.ham = options['Headers','header_ham_string'] self.spam = options['Headers','header_spam_string'] self.unsure = options['Headers','header_unsure_string'] self.to = "tony.meyer@gmail.com;ta-meyer@ihug.co.nz" self.msg["to"] = self.to def test_setIdFromPayload(self): id = self.msg.setIdFromPayload() self.assertEqual(id, None) self.assertEqual(self.msg.id, None) msgid = "test" msg = "".join((options['Headers','mailid_header_name'], ": ", msgid, "\r\n", good1)) msg = email.message_from_string(msg, _class=SBHeaderMessage) id = msg.setIdFromPayload() self.assertEqual(id, msgid) self.assertEqual(msg.id, msgid) def test_disposition_header_ham(self): name = options['Headers','classification_header_name'] self.msg.addSBHeaders(self.g_prob, self.clues) self.assertEqual(self.msg[name], self.ham) self.assertEqual(self.msg.GetClassification(), self.ham) def test_disposition_header_spam(self): name = options['Headers','classification_header_name'] self.msg.addSBHeaders(self.s_prob, self.clues) self.assertEqual(self.msg[name], self.spam) self.assertEqual(self.msg.GetClassification(), self.spam) def test_disposition_header_unsure(self): name = options['Headers','classification_header_name'] self.msg.addSBHeaders(self.u_prob, self.clues) self.assertEqual(self.msg[name], self.unsure) self.assertEqual(self.msg.GetClassification(), self.unsure) def test_score_header_off(self): options['Headers','include_score'] = False self.msg.addSBHeaders(self.g_prob, self.clues) self.assertEqual(self.msg[options['Headers', 'score_header_name']], None) def test_score_header(self): options['Headers','include_score'] = True options["Headers", "header_score_digits"] = 21 options["Headers", "header_score_logarithm"] = False self.msg.addSBHeaders(self.g_prob, self.clues) self.assertEqual(self.msg[options['Headers', 'score_header_name']], "%.21f" % (self.g_prob,)) def test_score_header_log(self): options['Headers','include_score'] = True options["Headers", "header_score_digits"] = 21 options["Headers", "header_score_logarithm"] = True self.msg.addSBHeaders(self.s_prob, self.clues) self.assert_(self.msg[options['Headers', 'score_header_name']].\ startswith("%.21f" % (self.s_prob,))) self.assert_(self.msg[options['Headers', 'score_header_name']].\ endswith(" (%d)" % (-math.log10(1.0-self.s_prob),))) def test_thermostat_header_off(self): options['Headers','include_thermostat'] = False self.msg.addSBHeaders(self.u_prob, self.clues) self.assertEqual(self.msg[options['Headers', 'thermostat_header_name']], None) def test_thermostat_header_unsure(self): options['Headers','include_thermostat'] = True self.msg.addSBHeaders(self.u_prob, self.clues) self.assertEqual(self.msg[options['Headers', 'thermostat_header_name']], "*****") def test_thermostat_header_spam(self): options['Headers','include_thermostat'] = True self.msg.addSBHeaders(self.s_prob, self.clues) self.assertEqual(self.msg[options['Headers', 'thermostat_header_name']], "*********") def test_thermostat_header_ham(self): options['Headers','include_thermostat'] = True self.msg.addSBHeaders(self.g_prob, self.clues) self.assertEqual(self.msg[options['Headers', 'thermostat_header_name']], "") def test_evidence_header(self): options['Headers', 'include_evidence'] = True options['Headers', 'clue_mailheader_cutoff'] = 0.5 # all self.msg.addSBHeaders(self.g_prob, self.clues) header = self.msg[options['Headers', 'evidence_header_name']] header_clues = [s.split(':') for s in \ [s.strip() for s in header.split(';')]] header_clues = dict([(":".join(clue[:-1])[1:-1], float(clue[-1])) \ for clue in header_clues]) for word, score in self.clues: self.assert_(word in header_clues) self.assertEqual(round(score, 2), header_clues[word]) def test_evidence_header_partial(self): options['Headers', 'include_evidence'] = True options['Headers', 'clue_mailheader_cutoff'] = 0.1 self.msg.addSBHeaders(self.g_prob, self.clues) header = self.msg[options['Headers', 'evidence_header_name']] header_clues = [s.split(':') for s in \ [s.strip() for s in header.split(';')]] header_clues = dict([(":".join(clue[:-1])[1:-1], float(clue[-1])) \ for clue in header_clues]) for word, score in self.clues: if score <= 0.1 or score >= 0.9: self.assert_(word in header_clues) self.assertEqual(round(score, 2), header_clues[word]) else: self.assert_(word not in header_clues) def test_evidence_header_empty(self): options['Headers', 'include_evidence'] = True options['Headers', 'clue_mailheader_cutoff'] = 0.0 self.msg.addSBHeaders(self.g_prob, self.clues) header = self.msg[options['Headers','evidence_header_name']] header_clues = [s.split(':') for s in \ [s.strip() for s in header.split(';')]] header_clues = dict([(":".join(clue[:-1])[1:-1], float(clue[-1])) \ for clue in header_clues]) for word, score in self.clues: if word == "*H*" or word == "*S*": self.assert_(word in header_clues) self.assertEqual(round(score, 2), header_clues[word]) else: self.assert_(word not in header_clues) def test_evidence_header_off(self): options['Headers', 'include_evidence'] = False self.msg.addSBHeaders(self.g_prob, self.clues) self.assertEqual(self.msg[options['Headers', 'evidence_header_name']], None) def test_notate_to_off(self): options["Headers", "notate_to"] = () self.msg.addSBHeaders(self.g_prob, self.clues) self.msg.addSBHeaders(self.u_prob, self.clues) self.msg.addSBHeaders(self.s_prob, self.clues) self.assertEqual(self.msg["To"], self.to) def test_notate_to_ham(self): options["Headers", "notate_to"] = (self.ham,) self.msg.addSBHeaders(self.g_prob, self.clues) disp, orig = self.msg["To"].split(',', 1) self.assertEqual(orig, self.to) self.assertEqual(disp, "%s@spambayes.invalid" % (self.ham,)) def test_notate_to_unsure(self): options["Headers", "notate_to"] = (self.ham, self.unsure) self.msg.addSBHeaders(self.u_prob, self.clues) disp, orig = self.msg["To"].split(',', 1) self.assertEqual(orig, self.to) self.assertEqual(disp, "%s@spambayes.invalid" % (self.unsure,)) def test_notate_to_spam(self): options["Headers", "notate_to"] = (self.ham, self.spam, self.unsure) self.msg.addSBHeaders(self.s_prob, self.clues) disp, orig = self.msg["To"].split(',', 1) self.assertEqual(orig, self.to) self.assertEqual(disp, "%s@spambayes.invalid" % (self.spam,)) def test_notate_subject_off(self): subject = self.msg["Subject"] options["Headers", "notate_subject"] = () self.msg.addSBHeaders(self.g_prob, self.clues) self.msg.addSBHeaders(self.u_prob, self.clues) self.msg.addSBHeaders(self.s_prob, self.clues) self.assertEqual(self.msg["Subject"], subject) def test_notate_subject_ham(self): subject = self.msg["Subject"] options["Headers", "notate_subject"] = (self.ham,) self.msg.addSBHeaders(self.g_prob, self.clues) disp, orig = self.msg["Subject"].split(',', 1) self.assertEqual(orig, subject) self.assertEqual(disp, self.ham) def test_notate_subject_unsure(self): subject = self.msg["Subject"] options["Headers", "notate_subject"] = (self.ham, self.unsure) self.msg.addSBHeaders(self.u_prob, self.clues) disp, orig = self.msg["Subject"].split(',', 1) self.assertEqual(orig, subject) self.assertEqual(disp, self.unsure) def test_notate_subject_spam(self): subject = self.msg["Subject"]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -