⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 test_storage.py

📁 用python实现的邮件过滤器
💻 PY
字号:
# Test the basic storage operations of the classifier.import unittest, os, sysimport tempfileimport cStringIO as StringIOimport sb_test_supportsb_test_support.fix_sys_path()from spambayes.storage import ZODBClassifier, CDBClassifierfrom spambayes.storage import DBDictClassifier, PickledClassifierclass _StorageTestBase(unittest.TestCase):    # Subclass must define a concrete StorageClass.    StorageClass = None    def setUp(self):        self.db_name = tempfile.mktemp("spambayestest")        self.classifier = self.StorageClass(self.db_name)    def tearDown(self):        self.classifier.close()        self.classifier = None        if os.path.isfile(self.db_name):            os.remove(self.db_name)    def testLoadAndStore(self):        # Simple test to verify that putting data in the db, storing and        # then loading gives back the same data.        c = self.classifier        c.learn(["some", "simple", "tokens"], True)        c.learn(["some", "other"], False)        c.learn(["ones"], False)        c.store()        c.close()        del self.classifier        self.classifier = self.StorageClass(self.db_name)        self._checkAllWordCounts((("some", 1, 1),                                  ("simple", 0, 1),                                  ("tokens", 0, 1),                                  ("other", 1, 0),                                  ("ones", 1, 0)), False)        self.assertEqual(self.classifier.nham, 2)        self.assertEqual(self.classifier.nspam, 1)    def testCounts(self):        # Check that nham and nspam are correctedly adjusted.        c = self.classifier        count = 30        for i in xrange(count):            c.learn(["tony"], True)            self.assertEqual(c.nspam, i+1)            self.assertEqual(c.nham, 0)        for i in xrange(count):            c.learn(["tony"], False)            self.assertEqual(c.nham, i+1)            self.assertEqual(c.nspam, count)        for i in xrange(count):            c.unlearn(["tony"], True)            self.assertEqual(c.nham, count)            self.assertEqual(c.nspam, count-i-1)        for i in xrange(count):            c.unlearn(["tony"], False)            self.assertEqual(c.nham, count-i-1)            self.assertEqual(c.nspam, 0)    def _checkWordCounts(self, word, expected_ham, expected_spam):        assert word        info = self.classifier._wordinfoget(word)        if info is None:            if expected_ham == expected_spam == 0:                return            self.fail("_CheckWordCounts for '%s' got None!")        if info.hamcount != expected_ham:            self.fail("Hamcount '%s' wrong - got %d, but expected %d" \                        % (word, info.hamcount, expected_ham))        if info.spamcount != expected_spam:            self.fail("Spamcount '%s' wrong - got %d, but expected %d" \                        % (word, info.spamcount, expected_spam))    def _checkAllWordCounts(self, counts, do_persist):        for info in counts:            self._checkWordCounts(*info)        if do_persist:            self.classifier.store()            self.classifier.load()            self._checkAllWordCounts(counts, False)    def testHapax(self):        self._dotestHapax(False)        self._dotestHapax(True)    def _dotestHapax(self, do_persist):        c = self.classifier        c.learn(["common","nearly_hapax", "hapax", ], False)        c.learn(["common","nearly_hapax"], False)        c.learn(["common"], False)        # All the words should be there.        self._checkAllWordCounts( (("common", 3, 0),                                   ("nearly_hapax", 2, 0),                                   ("hapax", 1, 0)),                                  do_persist)        # Unlearn the complete set.        c.unlearn(["common","nearly_hapax", "hapax", ], False)        # 'hapax' removed, rest still there        self._checkAllWordCounts( (("common", 2, 0),                                   ("nearly_hapax", 1, 0),                                   ("hapax", 0, 0)),                                  do_persist)        # Re-learn that set, so deleted hapax is reloaded        c.learn(["common","nearly_hapax", "hapax", ], False)        self._checkAllWordCounts( (("common", 3, 0),                                   ("nearly_hapax", 2, 0),                                   ("hapax", 1, 0)),                                  do_persist)        # Back to where we started - start unlearning all down to zero.        c.unlearn(["common","nearly_hapax", "hapax", ], False)        # 'hapax' removed, rest still there        self._checkAllWordCounts( (("common", 2, 0),                                   ("nearly_hapax", 1, 0),                                   ("hapax", 0, 0)),                                  do_persist)        # Unlearn the next set.        c.unlearn(["common","nearly_hapax"], False)        self._checkAllWordCounts( (("common", 1, 0),                                   ("nearly_hapax", 0, 0),                                   ("hapax", 0, 0)),                                  do_persist)        c.unlearn(["common"], False)        self._checkAllWordCounts( (("common", 0, 0),                                   ("nearly_hapax", 0, 0),                                   ("hapax", 0, 0)),                                  do_persist)    def test_bug777026(self):        c = self.classifier        word = "tim"        c.learn([word], False)        c.learn([word], False)        self._checkAllWordCounts([(word, 2, 0)], False)        # Clone word's WordInfo record.        record = self.classifier.wordinfo[word]        newrecord = type(record)()        newrecord.__setstate__(record.__getstate__())        self.assertEqual(newrecord.hamcount, 2)        self.assertEqual(newrecord.spamcount, 0)        # Reduce the hamcount -- this tickled an excruciatingly subtle        # bug in a DBDictClassifier's _wordinfoset, which, at the time        # this test was written, couldn't actually be provoked by the        # way _wordinfoset got called by way of learn() and unlearn()        # methods.  The code implicitly relied on that the record passed        # to _wordinfoset was always the same object as was already        # in wordinfo[word].        newrecord.hamcount -= 1        c._wordinfoset(word, newrecord)        # If the bug is present, the DBDictClassifier still believes        # the hamcount is 2.        self._checkAllWordCounts([(word, 1, 0)], False)        c.unlearn([word], False)        self._checkAllWordCounts([(word, 0, 0)], False)# Test classes for each classifier.class PickleStorageTestCase(_StorageTestBase):    StorageClass = PickledClassifierclass DBStorageTestCase(_StorageTestBase):    StorageClass = DBDictClassifier    def _fail_open_best(self, *args):        from spambayes import dbmstorage        raise dbmstorage.error("No dbm modules available!")    def testNoDBMAvailable(self):        import tempfile        from spambayes.storage import open_storage        db_name = tempfile.mktemp("nodbmtest")        DBDictClassifier_load = DBDictClassifier.load        DBDictClassifier.load = self._fail_open_best        print "This test will print out an error, which can be ignored."        try:            self.assertRaises(SystemExit, open_storage, (db_name, "dbm"))        finally:            DBDictClassifier.load = DBDictClassifier_load        if os.path.isfile(db_name):            os.remove(db_name)class CDBStorageTestCase(_StorageTestBase):    StorageClass = CDBClassifierclass ZODBStorageTestCase(_StorageTestBase):    StorageClass = ZODBClassifierdef suite():    suite = unittest.TestSuite()    clses = (PickleStorageTestCase,             CDBStorageTestCase,             )    try:        import gdbm    except ImportError:        gdbm = None    if sys.platform != "win32" or sys.version_info > (2,3):        try:            import bsddb        except ImportError:            bsddb = None    else:        bsddb = None    try:        import bsddb3    except ImportError:        bsddb3 = None    if gdbm or bsddb or bsddb3:        clses += (DBStorageTestCase,)    else:        print "Skipping dbm tests, no dbm module available"    try:        import ZODB    except ImportError:        print "Skipping ZODB tests, ZODB not available"    else:         clses += (ZODBStorageTestCase,)            for cls in clses:        suite.addTest(unittest.makeSuite(cls))    return suiteif __name__=='__main__':    sb_test_support.unittest_main(argv=sys.argv + ['suite'])

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -