testtoolsui.py

来自「用python实现的邮件过滤器」· Python 代码 · 共 556 行 · 第 1/2 页

PY
556
字号
"""Testing Tools Web InterfaceClasses:    TestToolsUserInterface - Interface class for testing tools.Abstract:This module implements a browser based Spambayes user interface for thevarious testing tools.  Users may use it to interface with the tools.The following functions are currently included:  onCV - cross-validation testingTo do: o Add interface to Alex's incremental test setup. o Suggestions?"""# This module is part of the spambayes project, which is Copyright 2002-3# The Python Software Foundation and is covered by the Python Software# Foundation license.from __future__ import generators__author__ = "Tony Meyer <ta-meyer@ihug.co.nz>"__credits__ = "All the Spambayes folk."try:    True, Falseexcept NameError:    # Maintain compatibility with Python 2.2    True, False = 1, 0import osimport sysimport cgiimport globimport randomimport StringIOimport ProxyUIimport oe_mailboxfrom spambayes import msgsfrom spambayes import TestDriverfrom spambayes import OptionsClassfrom spambayes.Options import options# These are the options that will be offered on the testing page.# If the option is None, then the entry is a header and the following# options will appear in a new box on the configuration page.# These are also used to generate http request parameters and template# fields/variables.testtools_ini_map = (##    ('General Options', None),#   Put any general options that we wish to encourage people to test#   here, for example:#   ('Classifier',           'max_discriminators'),    ('Experimental Options', None),)# Dynamically add any current experimental/deprecated options.for opt in options.options(True):    sect, opt = opt[1:].split(']', 1)    if opt[:2].lower() == "x-":        testtools_ini_map += ((sect, opt),)class TestToolsUserInterface(ProxyUI.ProxyUserInterface):    """Serves the HTML user interface for the test tools."""    def onCv(self):        global testtools_ini_map        self._writePreamble("CV Test")        configTable = self.html.configForm.clone()        del configTable.configTextRow1        del configTable.configTextRow2        del configTable.configCbRow1        del configTable.configRow2        del configTable.blankRow        del configTable.folderRow        # Add some options that are only available via this page.        # (This makes displaying the options nice and easy, since        # they're just handled like everything else).        sect = 'TestToolsUI'        for newopt in [('source', 'Messages source', 'Standard test setup',                        'Select the source of the messages to test on.',                        ('Standard test setup', 'Cache', 'Outlook Express'),                        False),                       ('n', 'Number of runs', 10,                        'Select the number of cross-validation runs.',                        OptionsClass.INTEGER, False),]:            options._options[sect, newopt[0]] = OptionsClass.Option(*newopt)        testtools_ini_map += (('Testing Options', None),                              ('TestToolsUI', 'source'),                              ('TestToolsUI', 'n'),)        option_choice = self._buildConfigPageBody(\            configTable, testtools_ini_map)        option_choice.action_page.action = "cvresults"        option_choice.introduction = "Select the options for your test " \                                     "(these will be run against the " \                                     "defaults)."        option_choice.optionsPathname = "memory only"        del option_choice.restore_form        del option_choice.adv_button        option_choice.config_submit.value = "Run Test"        self.write(option_choice)        self._writePostamble()    def onCvresults(self, *args, **kwargs):        del kwargs["how"]        self._writePreamble("CV Test Results")        text = "Display the results of a cross-validation test with the " \               "current settings against the defaults."        nsets = options["TestToolsUI", "n"]        # With defaults first.        self.write("<p>Testing with defaults...</p>")        saved = {}        for opt in options.options(True):            # Ignore those that have do_not_restore as True            # (These are predominately storage options, and at least            # the cache directory ones may be needed later on).            sect, opt = opt[1:].split(']', 1)            saved[(sect, opt)] = options[(sect, opt)]            if not options.no_restore(sect, opt):                options.set(sect, opt, options.default(sect, opt))        options["TestToolsUI", "source"] = kwargs["TestToolsUI_source"]        # XXX Cache this somewhere?  If the testing data isn't changing,        # XXX and the user is running multiple tests, then it doesn't        # XXX make much sense to rerun the 'default's test over and over        # XXX again.        cv_out, errors = self.timCV(nsets)##        print errors.read()        defaults = self.rates(cv_out)        # Now with specified settings.        self.write("<p>Testing with selected settings...</p>")        for opt in options.options(True):            sect, opt = opt[1:].split(']', 1)            try:                value = kwargs["%s_%s" % (sect, opt)]            except KeyError:                # Leave as the default.                pass            else:                options.set(sect, opt, value)        cv_out, errors = self.timCV(nsets)##        print errors.read()        current = self.rates(cv_out)        # Restore the settings.        for opt in options.options(True):            sect, opt = opt[1:].split(']', 1)            options.set(sect, opt, saved[(sect, opt)])        # Do the comparison.        comp, errors = self.compare(defaults, current)##        print errors.read()        # Output the results        # XXX This is just what you'd get from running cmp.py        # XXX at the moment - it could be prettied up a bit.        comp = comp.read()        box = self._buildBox('Cross-validation test', None,                             cgi.escape(comp).replace("\n", "<br />"))        self.write(box)        self._writePostamble()    def timCV(self, nsets):        # Until we are un-lazy enough to change the code borrowed from        # timcv.py, just capture the output that normally goes to stdout        # or stderr and return it.        cout, cerr = sys.stdout, sys.stderr        sys.stdout = StringIO.StringIO()        sys.stderr = StringIO.StringIO()        if options["TestToolsUI", "source"] == "Standard test setup":            # Source the test data from the 'standard' test setup,            # as described in the testtools directory.            hamdirs  = [options["TestDriver", "ham_directories"] % \                        i for i in range(1, nsets+1)]            spamdirs = [options["TestDriver", "spam_directories"] % \                        i for i in range(1, nsets+1)]            hstream = msgs.HamStream            sstream = msgs.SpamStream        elif options["TestToolsUI", "source"] == "Cache":            # Source the test data from the cache directories            # specified in the "Storage" section of the configuration.            # This means that we have one 'ham' directory and one            # 'spam' directory (we ignore the unknown one, obviously),            # but what we really want is n directories of ham and spam.            # To overcome this without actually moving the files about            # we have a class that fakes it for us.            hamdirs  = ["%s%s%s/%s" % (options["Storage", "ham_cache"],                                       os.pathsep, i, nsets)                        for i in range(1, nsets+1)]            spamdirs = ["%s%s%s/%s" % (options["Storage", "spam_cache"],                                       os.pathsep, i, nsets)                        for i in range(1, nsets+1)]            hstream = HamCacheStream            sstream = SpamCacheStream        elif options["TestToolsUI", "source"] == "Outlook Express":            # Source the test data from Outlook Express            # Pretty crude at the moment (hard coded):            #   Ignores:            #     o Deleted Items            #     o Drafts            #     o Folders            #     o Offline            #     o Outbox            #     o Pop3uidl            #   Assumes that anything with 'spam' in the name is spam            #   (so don't have a folder called "looks like spam"!) and            #   that anything else is ham.            # No mixed dbxes!            # Each dbx is the equivilent of a directory in the 'standard'            # test setup - so it would be good if each dbx had a roughly            # equal number of messages, and if there were the same number            # of ham and spam dbxes.            dbx_dir = oe_mailbox.OEStoreRoot()            dbxes = glob.glob(os.path.join(dbx_dir, "*.dbx"))            spamdirs = []            hamdirs = []            for dbx in dbxes:                if os.path.splitext(os.path.basename(dbx))[0].lower() in \                   ["deleted items", "drafts", "folders",                    "offline", "outbox", "pop3uidl",]:                    continue                elif dbx.lower().find("spam") == -1:                    spamdirs.append(dbx)                else:                    hamdirs.append(dbx)            hstream = oe_mailbox.OEHamStream            sstream = oe_mailbox.OESpamStream        d = TestDriver.Driver()        # Train it on all sets except the first.        h = hstream("%s-%d" % (hamdirs[1], nsets), hamdirs[1:], train=1)        d.train(hstream("%s-%d" % (hamdirs[1], nsets), hamdirs[1:],                        train=1),                sstream("%s-%d" % (spamdirs[1], nsets), spamdirs[1:],                        train=1))        # Now run nsets times, predicting pair i against all except pair i.        for i in range(nsets):            h = hamdirs[i]            s = spamdirs[i]            hamstream = hstream(h, [h], train=0)            spamstream = sstream(s, [s], train=0)            if i > 0:                if options["CV Driver", "build_each_classifier_from_scratch"]:                    # Build a new classifier from the other sets.                    d.new_classifier()                    hname = "%s-%d, except %d" % (hamdirs[0], nsets, i+1)                    h2 = hamdirs[:]                    del h2[i]                    sname = "%s-%d, except %d" % (spamdirs[0], nsets, i+1)                    s2 = spamdirs[:]                    del s2[i]                    d.train(hstream(hname, h2, train=1),                            sstream(sname, s2, train=1))                else:                    # Forget this set.                    d.untrain(hamstream, spamstream)            # Predict this set.            d.test(hamstream, spamstream)            d.finishtest()            if i < nsets - 1 and not options["CV Driver",                                             "build_each_classifier_from_scratch"]:                # Add this set back in.                d.train(hamstream, spamstream)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?