⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 test_stats.py

📁 用python实现的邮件过滤器
💻 PY
📖 第 1 页 / 共 2 页
字号:
    def _test_AddPercentStrings(self, dp):        data = self.s._AddPercentStrings({}, dp)        self.assertEqual(data["perc_ham_s"],                         "%%(perc_ham).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_spam_s"],                         "%%(perc_spam).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_unsure_s"],                         "%%(perc_unsure).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_correct_s"],                         "%%(perc_correct).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_incorrect_s"],                         "%%(perc_incorrect).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_fp_s"],                         "%%(perc_fp).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_fn_s"],                         "%%(perc_fn).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_spam_correct_s"],                         "%%(perc_spam_correct).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_spam_unsure_s"],                         "%%(perc_spam_unsure).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_spam_correct_or_unsure_s"],                         "%%(perc_spam_correct_or_unsure).%df%%(perc)s" %                         (dp,))        self.assertEqual(data["perc_ham_incorrect_s"],                         "%%(perc_ham_incorrect).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_ham_unsure_s"],                         "%%(perc_ham_unsure).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_ham_incorrect_or_unsure_s"],                         "%%(perc_ham_incorrect_or_unsure).%df%%(perc)s" %                         (dp,))        self.assertEqual(data["perc_unsure_trained_ham_s"],                         "%%(perc_unsure_trained_ham).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc_unsure_trained_spam_s"],                         "%%(perc_unsure_trained_spam).%df%%(perc)s" %                         (dp,))        self.assertEqual(data["perc_unsure_not_trained_s"],                         "%%(perc_unsure_not_trained).%df%%(perc)s" % (dp,))        self.assertEqual(data["perc"], "%")            def _test_no_recovery(self, score, one, two, three, five):        self.s.RecordClassification(score)        s = self.s.GetStats()        self.assertEqual(s[0], "Messages classified: 1")        self.assertEqual(s[1], one)        self.assertEqual(s[2], two)        self.assertEqual(s[3], three)        self.assertEqual(s[4], "")        self.assertEqual(s[5], five)        self.assertEqual(s[6], "Classified incorrectly:\t0 (0.0% of total)")        self.assertEqual(s[7], "")        self.assertEqual(s[8], "Manually classified as good:\t0")        self.assertEqual(s[9], "Manually classified as spam:\t0")        self.assertEqual(s[10], "")        if options["Categorization", "ham_cutoff"] <= score < \           options["Categorization", "spam_cutoff"]:            self.assertEqual(s[11], "Unsures trained as good:\t0 (0.0% of unsures)")            self.assertEqual(s[12], "Unsures trained as spam:\t0 (0.0% of unsures)")            self.assertEqual(s[13], "Unsures not trained:\t\t1 (100.0% of unsures)")            self.assertEqual(s[14], "")            counter = 15        else:            counter = 11        try:            return s[counter]        except IndexError:            return    def test_no_recovery_unsure(self):        score = 0.2        one = "\tGood:\t0 (0.0%)"        two = "\tSpam:\t0 (0.0%)"        three = "\tUnsure:\t1 (100.0%)"        five = "Classified correctly:\t0 (0.0% of total)"        self._test_no_recovery(score, one, two, three, five)    def test_no_recovery_ham(self):        score = 0.0        one = "\tGood:\t1 (100.0%)"        two = "\tSpam:\t0 (0.0%)"        three = "\tUnsure:\t0 (0.0%)"        five = "Classified correctly:\t1 (100.0% of total)"        final = self._test_no_recovery(score, one, two, three, five)        self.assertEqual(final, "Good incorrectly identified:\t0.0% (+ 0.0% unsure)")    def test_no_recovery_spam(self):        score = 1.0        one = "\tGood:\t0 (0.0%)"        two = "\tSpam:\t1 (100.0%)"        three = "\tUnsure:\t0 (0.0%)"        five = "Classified correctly:\t1 (100.0% of total)"        final = self._test_no_recovery(score, one, two, three, five)        self.assertEqual(final, "Spam correctly identified:\t100.0% (+ 0.0% unsure)")    def test_get_stats_session_only(self):        # Record some session data.        self.s.RecordClassification(0.0)        self.s.RecordClassification(0.2)        self.s.RecordClassification(0.1)        self.s.RecordClassification(0.4)        self.s.RecordClassification(0.5)        self.s.RecordClassification(0.95)        self.s.RecordClassification(1.0)        self.s.RecordTraining(True, 0.1)        self.s.RecordTraining(True, 1.0)        self.s.RecordTraining(False, 0.1)        self.s.RecordTraining(False, 1.0)        # Put data into the totals, to ensure that we only get back the        # session data.        for stat in ["num_ham", "num_spam", "num_unsure",                     "num_trained_spam", "num_trained_spam_fn",                     "num_trained_ham", "num_trained_ham_fp",]:            self.s.totals[stat] = 4        s = self.s.GetStats(session_only=True)        self.assertEqual(s[0], "Messages classified: 7")        self.assertEqual(s[1], "\tGood:\t2 (28.6%)")        self.assertEqual(s[2], "\tSpam:\t2 (28.6%)")        self.assertEqual(s[3], "\tUnsure:\t3 (42.9%)")        self.assertEqual(s[4], "")        self.assertEqual(s[5], "Classified correctly:\t2 (28.6% of total)")        self.assertEqual(s[6], "Classified incorrectly:\t2 (28.6% of total)")        self.assertEqual(s[7], "\tFalse positives:\t1 (14.3% of total)")        self.assertEqual(s[8], "\tFalse negatives:\t1 (14.3% of total)")        self.assertEqual(s[9], "")        self.assertEqual(s[10], "Manually classified as good:\t2")        self.assertEqual(s[11], "Manually classified as spam:\t2")        self.assertEqual(s[12], "")        self.assertEqual(s[13], "Unsures trained as good:\t1 (33.3% of unsures)")        self.assertEqual(s[14], "Unsures trained as spam:\t1 (33.3% of unsures)")        self.assertEqual(s[15], "Unsures not trained:\t\t1 (33.3% of unsures)")        self.assertEqual(s[16], "")        self.assertEqual(s[17], "Spam correctly identified:\t33.3% (+ 33.3% unsure)")        self.assertEqual(s[18], "Good incorrectly identified:\t33.3% (+ 33.3% unsure)")        self.assertEqual(s[19], "")        self.assertEqual(s[20], "Total cost of spam:\t$11.60")        self.assertEqual(s[21], "SpamBayes savings:\t$-9.60")        self.assertEqual(len(s), 22)    def test_get_all_stats(self):        s = self._stuff_with_data()        self.assertEqual(s[0], "Messages classified: 16")        self.assertEqual(s[1], "\tGood:\t5 (31.3%)")        self.assertEqual(s[2], "\tSpam:\t4 (25.0%)")        self.assertEqual(s[3], "\tUnsure:\t7 (43.8%)")        self.assertEqual(s[4], "")        self.assertEqual(s[5], "Classified correctly:\t5 (31.3% of total)")        self.assertEqual(s[6], "Classified incorrectly:\t4 (25.0% of total)")        self.assertEqual(s[7], "\tFalse positives:\t2 (12.5% of total)")        self.assertEqual(s[8], "\tFalse negatives:\t2 (12.5% of total)")        self.assertEqual(s[9], "")        self.assertEqual(s[10], "Manually classified as good:\t3")        self.assertEqual(s[11], "Manually classified as spam:\t3")        self.assertEqual(s[12], "")        self.assertEqual(s[13], "Unsures trained as good:\t1 (14.3% of unsures)")        self.assertEqual(s[14], "Unsures trained as spam:\t1 (14.3% of unsures)")        self.assertEqual(s[15], "Unsures not trained:\t\t5 (71.4% of unsures)")        self.assertEqual(s[16], "")        self.assertEqual(s[17], "Spam correctly identified:\t40.0% (+ 20.0% unsure)")        self.assertEqual(s[18], "Good incorrectly identified:\t33.3% (+ 16.7% unsure)")        self.assertEqual(s[19], "")        self.assertEqual(s[20], "Total cost of spam:\t$23.40")        self.assertEqual(s[21], "SpamBayes savings:\t$-19.40")        self.assertEqual(len(s), 22)    def _stuff_with_data(self, use_html=False):        self._stuff_with_session_data()        self._stuff_with_persistent_data()        self.s.CalculatePersistentStats()        return self.s.GetStats(use_html=use_html)    def _stuff_with_session_data(self):        # Record some session data.        self.s.RecordClassification(0.0)        self.s.RecordClassification(0.2)        self.s.RecordClassification(0.1)        self.s.RecordClassification(0.4)        self.s.RecordClassification(0.5)        self.s.RecordClassification(0.95)        self.s.RecordClassification(1.0)        self.s.RecordTraining(True, 0.1)        self.s.RecordTraining(True, 1.0)        self.s.RecordTraining(False, 0.1)        self.s.RecordTraining(False, 1.0)    def _stuff_with_persistent_data(self):        # Put data into the totals.        msg = Message('0')        msg.RememberTrained(True)        msg.RememberClassification(options['Headers','header_spam_string'])        msg = Message('1')        msg.RememberTrained(False)        msg.RememberClassification(options['Headers','header_spam_string'])        msg = Message('2')        msg.RememberTrained(True)        msg.RememberClassification(options['Headers','header_ham_string'])        msg = Message('3')        msg.RememberTrained(False)        msg.RememberClassification(options['Headers','header_ham_string'])        msg = Message('4')        msg.RememberClassification(options['Headers','header_ham_string'])        msg = Message('5')        msg.RememberTrained(False)        msg.RememberClassification(options['Headers','header_unsure_string'])        msg = Message('6')        msg.RememberTrained(True)        msg.RememberClassification(options['Headers','header_unsure_string'])        msg = Message('7')        msg.RememberClassification(options['Headers','header_unsure_string'])        msg = Message('8')        msg.RememberClassification(options['Headers','header_unsure_string'])    def test_with_html(self):        s = self._stuff_with_data(True)        for line in s:            self.assert_('\t' not in line)    def test_without_html(self):        s = self._stuff_with_data(False)        for line in s:            self.assertEqual(line.find('&nbsp;'), -1)    def test_from_date_empty(self):        # Put persistent data in, but no session data.        self._stuff_with_persistent_data()        # Wait for a bit to make sure the time is later.        time.sleep(0.1)        # Set the date to now.        self.s.ResetTotal(permanently=True)        # Recalculate.        self.s.CalculatePersistentStats()        # Check.        self.assertEqual(self.s.GetStats(), ["Messages classified: 0"])    def test_from_specified_date(self):        # Put persistent data in, but no session data.        self._stuff_with_persistent_data()        # Wait for a bit to make sure the time is later.        time.sleep(0.1)        # Set the date to now.        self.s.from_date = time.time()        # Wait for a bit to make sure the time is later.        time.sleep(0.1)        # Put more data in.        msg = Message('0')        msg.RememberTrained(True)        msg.RememberClassification(options['Headers','header_spam_string'])        msg = Message('7')        msg.RememberTrained(False)        msg.RememberClassification(options['Headers','header_spam_string'])        msg = Message('2')        msg.RememberTrained(True)        msg.RememberClassification(options['Headers','header_ham_string'])        # Recalculate.        self.s.CalculatePersistentStats()        # Check that there are the right number of messages (assume that        # the rest is right - if not it should be caught by other tests).        self.assertEqual(self.s.GetStats()[0], "Messages classified: 3")        def suite():    suite = unittest.TestSuite()    for cls in (StatsTest,               ):        suite.addTest(unittest.makeSuite(cls))    return suiteif __name__=='__main__':    sb_test_support.unittest_main(argv=sys.argv + ['suite'])

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -