⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 test_sb_dbexpimp.py

📁 用python实现的邮件过滤器
💻 PY
字号:
# Test sb_dbexpimp script.import osimport sysimport unittestfrom spambayes.tokenizer import tokenizefrom spambayes.storage import open_storagefrom spambayes.storage import PickledClassifier, DBDictClassifierimport sb_test_supportsb_test_support.fix_sys_path()import sb_dbexpimp# We borrow the test messages that test_sb_server uses.# I doubt it really makes much difference, but if we wanted more than# one message of each type (the tests should all handle this ok) then# Richie's hammer.py script has code for generating any number of# randomly composed email messages.from test_sb_server import good1, spam1try:    __file__except NameError:    # Python 2.2    __file__ = os.path.abspath(sys.argv[0])TEMP_PICKLE_NAME = os.path.join(os.path.dirname(__file__), "temp.pik")TEMP_CSV_NAME = os.path.join(os.path.dirname(__file__), "temp.csv")TEMP_DBM_NAME = os.path.join(os.path.dirname(__file__), "temp.dbm")# The chances of anyone having files with these names in the test# directory is minute, but we don't want to wipe anything, so make# sure that they don't already exist.  Our tearDown code gets rid# of our copies (whether the tests pass or fail) so they shouldn't# be ours.for fn in [TEMP_PICKLE_NAME, TEMP_CSV_NAME, TEMP_DBM_NAME]:    if os.path.exists(fn):        print fn, "already exists.  Please remove this file before " \              "running these tests (a file by that name will be " \              "created and destroyed as part of the tests)."        sys.exit(1)class dbexpimpTest(unittest.TestCase):    def tearDown(self):        try:            os.remove(TEMP_PICKLE_NAME)        except OSError:            pass        try:            os.remove(TEMP_CSV_NAME)        except OSError:            pass        try:            os.remove(TEMP_DBM_NAME)        except OSError:            pass            def test_csv_module_import(self):        """Check that we don't import the old object craft csv module."""        self.assert_(hasattr(sb_dbexpimp.csv, "reader"))    def test_pickle_export(self):        # Create a pickled classifier to export.        bayes = PickledClassifier(TEMP_PICKLE_NAME)        # Stuff some messages in it so it's not empty.        bayes.learn(tokenize(spam1), True)        bayes.learn(tokenize(good1), False)        # Save.        bayes.store()        # Export.        sb_dbexpimp.runExport(TEMP_PICKLE_NAME, "pickle", TEMP_CSV_NAME)        # Verify that the CSV holds all the original data (and, by using        # the CSV module to open it, that it is valid CSV data).        fp = open(TEMP_CSV_NAME, "rb")        reader = sb_dbexpimp.csv.reader(fp)        (nham, nspam) = reader.next()        self.assertEqual(int(nham), bayes.nham)        self.assertEqual(int(nspam), bayes.nspam)        for (word, hamcount, spamcount) in reader:            word = sb_dbexpimp.uunquote(word)            self.assert_(word in bayes._wordinfokeys())            wi = bayes._wordinfoget(word)            self.assertEqual(int(hamcount), wi.hamcount)            self.assertEqual(int(spamcount), wi.spamcount)            def test_dbm_export(self):        # Create a dbm classifier to export.        bayes = DBDictClassifier(TEMP_DBM_NAME)        # Stuff some messages in it so it's not empty.        bayes.learn(tokenize(spam1), True)        bayes.learn(tokenize(good1), False)        # Save & Close.        bayes.store()        bayes.close()        # Export.        sb_dbexpimp.runExport(TEMP_DBM_NAME, "dbm", TEMP_CSV_NAME)        # Reopen the original.        bayes = open_storage(TEMP_DBM_NAME, "dbm")        # Verify that the CSV holds all the original data (and, by using        # the CSV module to open it, that it is valid CSV data).        fp = open(TEMP_CSV_NAME, "rb")        reader = sb_dbexpimp.csv.reader(fp)        (nham, nspam) = reader.next()        self.assertEqual(int(nham), bayes.nham)        self.assertEqual(int(nspam), bayes.nspam)        for (word, hamcount, spamcount) in reader:            word = sb_dbexpimp.uunquote(word)            self.assert_(word in bayes._wordinfokeys())            wi = bayes._wordinfoget(word)            self.assertEqual(int(hamcount), wi.hamcount)            self.assertEqual(int(spamcount), wi.spamcount)    def test_import_to_pickle(self):        # Create a CSV file to import.        temp = open(TEMP_CSV_NAME, "wb")        temp.write("3,4\n")        csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),                    "of":(1,0), "the":(1,2), "import":(3,1)}        for word, (ham, spam) in csv_data.items():            temp.write("%s,%s,%s\n" % (word, ham, spam))        temp.close()        sb_dbexpimp.runImport(TEMP_PICKLE_NAME, "pickle", True,                              TEMP_CSV_NAME)        # Open the converted file and verify that it has all the data from        # the CSV file (and by opening it, that it is a valid pickle).        bayes = open_storage(TEMP_PICKLE_NAME, "pickle")        self.assertEqual(bayes.nham, 3)        self.assertEqual(bayes.nspam, 4)        for word, (ham, spam) in csv_data.items():            word = sb_dbexpimp.uquote(word)            self.assert_(word in bayes._wordinfokeys())            wi = bayes._wordinfoget(word)            self.assertEqual(wi.hamcount, ham)            self.assertEqual(wi.spamcount, spam)    def test_import_to_dbm(self):        # Create a CSV file to import.        temp = open(TEMP_CSV_NAME, "wb")        temp.write("3,4\n")        csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),                    "of":(1,0), "the":(1,2), "import":(3,1)}        for word, (ham, spam) in csv_data.items():            temp.write("%s,%s,%s\n" % (word, ham, spam))        temp.close()        sb_dbexpimp.runImport(TEMP_DBM_NAME, "dbm", True, TEMP_CSV_NAME)        # Open the converted file and verify that it has all the data from        # the CSV file (and by opening it, that it is a valid dbm file).        bayes = open_storage(TEMP_DBM_NAME, "dbm")        self.assertEqual(bayes.nham, 3)        self.assertEqual(bayes.nspam, 4)        for word, (ham, spam) in csv_data.items():            word = sb_dbexpimp.uquote(word)            self.assert_(word in bayes._wordinfokeys())            wi = bayes._wordinfoget(word)            self.assertEqual(wi.hamcount, ham)            self.assertEqual(wi.spamcount, spam)    def test_merge_to_pickle(self):        # Create a pickled classifier to merge with.        bayes = PickledClassifier(TEMP_PICKLE_NAME)        # Stuff some messages in it so it's not empty.        bayes.learn(tokenize(spam1), True)        bayes.learn(tokenize(good1), False)        # Save.        bayes.store()        # Create a CSV file to import.        nham, nspam = 3,4        temp = open(TEMP_CSV_NAME, "wb")        temp.write("%d,%d\n" % (nham, nspam))        csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),                    "of":(1,0), "the":(1,2), "import":(3,1)}        for word, (ham, spam) in csv_data.items():            temp.write("%s,%s,%s\n" % (word, ham, spam))        temp.close()        sb_dbexpimp.runImport(TEMP_PICKLE_NAME, "pickle", False,                              TEMP_CSV_NAME)        # Open the converted file and verify that it has all the data from        # the CSV file (and by opening it, that it is a valid pickle),        # and the data from the original pickle.        bayes2 = open_storage(TEMP_PICKLE_NAME, "pickle")        self.assertEqual(bayes2.nham, nham + bayes.nham)        self.assertEqual(bayes2.nspam, nspam + bayes.nspam)        words = bayes._wordinfokeys()        words.extend(csv_data.keys())        for word in words:            word = sb_dbexpimp.uquote(word)            self.assert_(word in bayes2._wordinfokeys())            h, s = csv_data.get(word, (0,0))            wi = bayes._wordinfoget(word)            if wi:                h += wi.hamcount                s += wi.spamcount            wi2 = bayes2._wordinfoget(word)            self.assertEqual(h, wi2.hamcount)            self.assertEqual(s, wi2.spamcount)    def test_merge_to_dbm(self):        # Create a dbm classifier to merge with.        bayes = DBDictClassifier(TEMP_DBM_NAME)        # Stuff some messages in it so it's not empty.        bayes.learn(tokenize(spam1), True)        bayes.learn(tokenize(good1), False)        # Save data to check against.        original_nham = bayes.nham        original_nspam = bayes.nspam        original_data = {}        for key in bayes._wordinfokeys():            original_data[key] = bayes._wordinfoget(key)        # Save & Close.        bayes.store()        bayes.close()        # Create a CSV file to import.        nham, nspam = 3,4        temp = open(TEMP_CSV_NAME, "wb")        temp.write("%d,%d\n" % (nham, nspam))        csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),                    "of":(1,0), "the":(1,2), "import":(3,1)}        for word, (ham, spam) in csv_data.items():            temp.write("%s,%s,%s\n" % (word, ham, spam))        temp.close()        sb_dbexpimp.runImport(TEMP_DBM_NAME, "dbm", False, TEMP_CSV_NAME)        # Open the converted file and verify that it has all the data from        # the CSV file (and by opening it, that it is a valid dbm file),        # and the data from the original dbm database.        bayes2 = open_storage(TEMP_DBM_NAME, "dbm")        self.assertEqual(bayes2.nham, nham + original_nham)        self.assertEqual(bayes2.nspam, nspam + original_nspam)        words = original_data.keys()[:]        words.extend(csv_data.keys())        for word in words:            word = sb_dbexpimp.uquote(word)            self.assert_(word in bayes2._wordinfokeys())            h, s = csv_data.get(word, (0,0))            wi = original_data.get(word, None)            if wi:                h += wi.hamcount                s += wi.spamcount            wi2 = bayes2._wordinfoget(word)            self.assertEqual(h, wi2.hamcount)            self.assertEqual(s, wi2.spamcount)def suite():    suite = unittest.TestSuite()    for cls in (dbexpimpTest,               ):        suite.addTest(unittest.makeSuite(cls))    return suiteif __name__=='__main__':    sb_test_support.unittest_main(argv=sys.argv + ['suite'])

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -