⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 test_stats.py

📁 用python实现的邮件过滤器
💻 PY
📖 第 1 页 / 共 2 页
字号:
# Test spambayes.Stats module.import osimport sysimport timeimport unittestimport sb_test_supportsb_test_support.fix_sys_path()from spambayes.Stats import Statsfrom spambayes.Options import optionsfrom spambayes.message import MessageInfoPickle, Messageclass StatsTest(unittest.TestCase):    def setUp(self):        self.messageinfo_db_name = "__unittest.pik"        self.messageinfo_db = MessageInfoPickle(self.messageinfo_db_name)        self.s = Stats(options, self.messageinfo_db)        Message.message_info_db = self.messageinfo_db    def tearDown(self):        if os.path.exists(self.messageinfo_db_name):            os.remove(self.messageinfo_db_name)    def test_from_date_unset(self):        self.assertEqual(None, self.s.from_date)    def test_set_date(self):        now = time.time()        self.s.ResetTotal(permanently=True)        self.assertEqual(now, self.s.from_date)        for stat in ["num_ham", "num_spam", "num_unsure",                     "num_trained_spam", "num_trained_spam_fn",                     "num_trained_ham", "num_trained_ham_fp",]:            self.assertEqual(self.s.totals[stat], 0)        # Check that it was stored, too.        self.messageinfo_db.close()        self.messageinfo_db = MessageInfoPickle(self.messageinfo_db_name)        self.s = Stats(options, self.messageinfo_db)        self.assertEqual(now, self.s.from_date)    def test_no_messages(self):        self.assertEqual(self.s.GetStats(), ["Messages classified: 0"])    def test_reset_session(self):        self.s.RecordClassification(.2)        self.s.RecordClassification(.1)        self.s.RecordClassification(.4)        self.s.RecordClassification(.91)        self.s.RecordTraining(True, 0.1)        self.s.RecordTraining(True, 0.91)        self.s.RecordTraining(False, 0.1)        self.s.RecordTraining(False, 0.91)        self.assertNotEqual(self.s.num_ham, 0)        self.assertNotEqual(self.s.num_spam, 0)        self.assertNotEqual(self.s.num_unsure, 0)        self.assertNotEqual(self.s.num_trained_spam, 0)        self.assertNotEqual(self.s.num_trained_spam_fn, 0)        self.assertNotEqual(self.s.num_trained_ham, 0)        self.assertNotEqual(self.s.num_trained_ham_fp, 0)        self.s.Reset()        self.assertEqual(self.s.num_ham, 0)        self.assertEqual(self.s.num_spam, 0)        self.assertEqual(self.s.num_unsure, 0)        self.assertEqual(self.s.num_trained_spam, 0)        self.assertEqual(self.s.num_trained_spam_fn, 0)        self.assertEqual(self.s.num_trained_ham, 0)        self.assertEqual(self.s.num_trained_ham_fp, 0)    def test_record_ham(self):        self.s.RecordClassification(0.0)        self.assertEqual(self.s.num_ham, 1)        self.s.RecordClassification(0.0)        self.assertEqual(self.s.num_ham, 2)    def test_record_spam(self):        self.s.RecordClassification(1.0)        self.assertEqual(self.s.num_spam, 1)        self.s.RecordClassification(1.0)        self.assertEqual(self.s.num_spam, 2)    def test_record_unsure(self):        self.s.RecordClassification(0.5)        self.assertEqual(self.s.num_unsure, 1)        self.s.RecordClassification(0.5)        self.assertEqual(self.s.num_unsure, 2)    def test_record_fp(self):        self.s.RecordTraining(True, 1.0)        self.assertEqual(self.s.num_trained_ham, 1)        self.assertEqual(self.s.num_trained_ham_fp, 1)    def test_record_fn(self):        self.s.RecordTraining(False, 0.0)        self.assertEqual(self.s.num_trained_spam, 1)        self.assertEqual(self.s.num_trained_spam_fn, 1)    def test_record_fp_class(self):        self.s.RecordTraining(True,                              old_class=options["Headers",                                                "header_spam_string"])        self.assertEqual(self.s.num_trained_ham, 1)        self.assertEqual(self.s.num_trained_ham_fp, 1)    def test_record_fn_class(self):        self.s.RecordTraining(False,                              old_class=options["Headers",                                                "header_ham_string"])        self.assertEqual(self.s.num_trained_spam, 1)        self.assertEqual(self.s.num_trained_spam_fn, 1)    def test_no_record_fp(self):        self.s.RecordTraining(True)        self.assertEqual(self.s.num_trained_ham, 1)        self.assertEqual(self.s.num_trained_ham_fp, 0)    def test_no_record_fn(self):        self.s.RecordTraining(False)        self.assertEqual(self.s.num_trained_spam, 1)        self.assertEqual(self.s.num_trained_spam_fn, 0)    def test_record_train_spam(self):        self.s.RecordTraining(False, 1.0)        self.assertEqual(self.s.num_trained_spam, 1)        self.assertEqual(self.s.num_trained_spam_fn, 0)    def test_record_train_ham(self):        self.s.RecordTraining(True, 0.0)        self.assertEqual(self.s.num_trained_ham, 1)        self.assertEqual(self.s.num_trained_ham_fp, 0)    def test_calculate_persistent_stats(self):        # Make sure it is empty to start with.        for stat in ["num_ham", "num_spam", "num_unsure",                     "num_trained_spam", "num_trained_spam_fn",                     "num_trained_ham", "num_trained_ham_fp",]:            self.assertEqual(self.s.totals[stat], 0)        # Stuff some things in to calculate.        msg = Message('0')        msg.RememberTrained(True)        msg.RememberClassification(options['Headers','header_spam_string'])        msg = Message('1')        msg.RememberTrained(False)        msg.RememberClassification(options['Headers','header_spam_string'])        msg = Message('2')        msg.RememberTrained(True)        msg.RememberClassification(options['Headers','header_ham_string'])        msg = Message('3')        msg.RememberTrained(False)        msg.RememberClassification(options['Headers','header_ham_string'])        msg = Message('4')        msg.RememberClassification(options['Headers','header_ham_string'])        msg = Message('5')        msg.RememberTrained(False)        msg.RememberClassification(options['Headers','header_unsure_string'])        msg = Message('6')        msg.RememberTrained(True)        msg.RememberClassification(options['Headers','header_unsure_string'])        msg = Message('7')        msg.RememberClassification(options['Headers','header_unsure_string'])        msg = Message('8')        msg.RememberClassification(options['Headers','header_unsure_string'])        self.s.CalculatePersistentStats()        self.assertEqual(self.s.totals["num_ham"], 3)        self.assertEqual(self.s.totals["num_spam"], 2)        self.assertEqual(self.s.totals["num_unsure"], 4)        self.assertEqual(self.s.totals["num_trained_spam"], 1)        self.assertEqual(self.s.totals["num_trained_spam_fn"], 1)        self.assertEqual(self.s.totals["num_trained_ham"], 1)        self.assertEqual(self.s.totals["num_trained_ham_fp"], 1)    def test_CalculateAdditional(self):        data = {}        data["num_seen"] = 45        data["num_ham"] = 23        data["num_spam"] = 10        data["num_unsure"] = 12        data["num_trained_spam_fn"] = 4        data["num_trained_ham_fp"] = 3        data["num_trained_ham"] = 7        data["num_trained_spam"] = 5        data["num_unsure_trained_ham"] = 2        data["num_unsure_trained_spam"] = 1        new_data = self.s._CalculateAdditional(data)        self.assertEqual(new_data["perc_ham"], 100.0 * data["num_ham"] /                         data["num_seen"])        self.assertEqual(new_data["perc_spam"], 100.0 * data["num_spam"] /                         data["num_seen"])        self.assertEqual(new_data["perc_unsure"], 100.0 *                         data["num_unsure"] / data["num_seen"])        self.assertEqual(new_data["num_ham_correct"], data["num_ham"] -                         data["num_trained_spam_fn"])        self.assertEqual(new_data["num_spam_correct"], data["num_spam"] -                         data["num_trained_ham_fp"])        self.assertEqual(new_data["num_correct"],                         new_data["num_ham_correct"] +                         new_data["num_spam_correct"])        self.assertEqual(new_data["num_incorrect"],                         data["num_trained_spam_fn"] +                         data["num_trained_ham_fp"])        self.assertEqual(new_data["perc_correct"], 100.0 *                         new_data["num_correct"] / data["num_seen"])        self.assertEqual(new_data["perc_incorrect"], 100.0 *                         new_data["num_incorrect"] / data["num_seen"])        self.assertEqual(new_data["perc_fp"], 100.0 *                         data["num_trained_ham_fp"] / data["num_seen"])        self.assertEqual(new_data["perc_fn"], 100.0 *                         data["num_trained_spam_fn"] / data["num_seen"])        self.assertEqual(new_data["num_unsure_trained_ham"],                         data["num_trained_ham"] -                         data["num_trained_ham_fp"])        self.assertEqual(new_data["num_unsure_trained_spam"],                         data["num_trained_spam"] -                         data["num_trained_spam_fn"])        self.assertEqual(new_data["num_unsure_not_trained"],                         data["num_unsure"] -                         data["num_unsure_trained_ham"] -                         data["num_unsure_trained_spam"])        self.assertEqual(new_data["perc_unsure_trained_ham"], 100.0 *                         data["num_unsure_trained_ham"] /                         data["num_unsure"])        self.assertEqual(new_data["perc_unsure_trained_spam"], 100.0 *                         data["num_unsure_trained_spam"] /                         data["num_unsure"])        self.assertEqual(new_data["perc_unsure_not_trained"], 100.0 *                         new_data["num_unsure_not_trained"] /                         data["num_unsure"])        self.assertEqual(new_data["total_ham"],                         new_data["num_ham_correct"] +                         data["num_trained_ham"])        self.assertEqual(new_data["total_spam"],                         new_data["num_spam_correct"] +                         data["num_trained_spam"])        self.assertEqual(new_data["perc_ham_incorrect"], 100.0 *                         data["num_trained_ham_fp"] /                         data["total_ham"])        self.assertEqual(new_data["perc_ham_unsure"], 100.0 *                         data["num_unsure_trained_ham"] /                         data["total_ham"])        self.assertEqual(new_data["perc_ham_incorrect_or_unsure"], 100.0 *                         (data["num_trained_ham_fp"] +                          data["num_unsure_trained_ham"]) /                         data["total_ham"])        self.assertEqual(new_data["perc_spam_correct"], 100.0 *                         data["num_spam_correct"] /                         data["total_spam"])        self.assertEqual(new_data["perc_spam_unsure"], 100.0 *                         data["num_unsure_trained_spam"] /                         data["total_spam"])        self.assertEqual(new_data["perc_spam_correct_or_unsure"], 100.0 *                         (data["num_spam_correct"] +                          data["num_unsure_trained_spam"]) /                         data["total_spam"])        self.assertEqual(new_data["total_cost"],                         data["num_trained_ham_fp"] *                         options["TestDriver", "best_cutoff_fp_weight"] + \                         data["num_trained_spam_fn"] *                         options["TestDriver", "best_cutoff_fn_weight"] + \                         data["num_unsure"] *                         options["TestDriver", "best_cutoff_unsure_weight"])        self.assertEqual(new_data["cost_savings"], data["num_spam"] *                         options["TestDriver", "best_cutoff_fn_weight"] -                         data["total_cost"])    def test_AddPercentStrings(self):        for i in xrange(10):            self._test_AddPercentStrings(i)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -