⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 manager.py

📁 用python实现的邮件过滤器
💻 PY
📖 第 1 页 / 共 3 页
字号:
from __future__ import generatorsimport cPickleimport osimport sysimport errnoimport typesimport shutilimport tracebackimport operatorimport win32api, win32con, win32guiimport timer, threadimport win32com.clientimport win32com.client.gencacheimport pythoncomimport msgstoretry:    True, Falseexcept NameError:    # Maintain compatibility with Python 2.2    True, False = 1, 0# Characters valid in a filename.  Used to nuke bad chars from the profile# name (which we try and use as a filename).# We assume characters > 127 are OK as they may be unicodefilename_chars = ('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'                '0123456789'                """$%'-_@~ `!()^#&+,;=[]""")# Report a message to the user - should only be used for pretty serious errors# hence we also print a traceback.# Module level function so we can report errors creating the managerdef _GetParent():    try:        return win32gui.GetActiveWindow()    except win32gui.error:        pass    return 0def _DoMessage(message, title, flags):    return win32gui.MessageBox(_GetParent(), message, title, flags)def ReportError(message, title = None):    import traceback    print "ERROR:", repr(message)    if sys.exc_info()[0] is not None:        traceback.print_exc()    if title is None: title = "SpamBayes"    _DoMessage(message, title, win32con.MB_ICONEXCLAMATION)def ReportInformation(message, title = None):    if title is None: title = "SpamBayes"    _DoMessage(message, title, win32con.MB_ICONINFORMATION)def AskQuestion(message, title = None):    if title is None: title = "SpamBayes"    return _DoMessage(message, title, win32con.MB_YESNO | \                                      win32con.MB_ICONQUESTION) == win32con.IDYES# Non-ascii characters in file or directory names only fully work in# Python 2.3.3+, but latin-1 "compatible" filenames should work in 2.3try:    filesystem_encoding = sys.getfilesystemencoding()except AttributeError:    filesystem_encoding = "mbcs"# Work out our "application directory", which is# the directory of our main .py/.dll/.exe file we# are running from.if hasattr(sys, "frozen"):    assert sys.frozen == "dll", "outlook only supports inproc servers"    this_filename = win32api.GetModuleFileName(sys.frozendllhandle)else:    try:        this_filename = os.path.abspath(__file__)    except NameError: # no __file__ - means Py2.2 and __name__=='__main__'        this_filename = os.path.abspath(sys.argv[0])# Ensure that a bsddb module is available if we are frozen.# See if we can use the new bsddb module. (The old one is unreliable# on Windows, so we don't use that)if hasattr(sys, "frozen"):    try:        import bsddb3    except ImportError:        bsddb3 = None    try:        import bsddb    except ImportError:        bsddb = None    else:        # This name is not in the old (bad) one.        if not hasattr(bsddb, "db"):            bsddb = None    assert bsddb or bsddb3, \           "Don't build binary versions without bsddb!"# This is a little bit of a hack <wink>.  We are generally in a child# directory of the bayes code.  To help installation, we handle the# fact that this may not be on sys.path.  Note that doing these# imports is delayed, so that we can set the BAYESCUSTOMIZE envar# first (if we import anything from the core spambayes code before# setting that envar, our .ini file may have no effect).# However, we want *some* Spambayes code before the options are processed# so this is now 2 steps - get the "early" spambayes core stuff (which# must not import spambayes.Options) and sets up sys.path, and "later" core# stuff, which can include spambayes.Options, and assume sys.path in place.def import_early_core_spambayes_stuff():    global bayes_i18n    try:        from spambayes import OptionsClass    except ImportError:        parent = os.path.abspath(os.path.join(os.path.dirname(this_filename),                                              ".."))        sys.path.insert(0, parent)    from spambayes import i18n    bayes_i18n = i18ndef import_core_spambayes_stuff(ini_filenames):    global bayes_classifier, bayes_tokenize, bayes_storage, bayes_options, \           bayes_message, bayes_stats    if "spambayes.Options" in sys.modules:        # The only thing we are worried about here is spambayes.Options        # being imported before we have determined the INI files we need to        # use.        # The only way this can happen otherwise is when the addin is        # de-selected then re-selected via the Outlook GUI - and when        # running from source-code, it never appears in this list.        # So this should never happen from source-code, and if it does, then        # the developer has recently changed something that causes the early        # import        assert hasattr(sys, "frozen")        # And we don't care (we could try and reload the engine options,        # but these are very unlikely to have changed)        return    # ini_filenames may contain Unicode, but environ not unicode aware.    # Convert if necessary.    use_names = []    for name in ini_filenames:        if isinstance(name, unicode):            name = name.encode(filesystem_encoding)        use_names.append(name)    os.environ["BAYESCUSTOMIZE"] = os.pathsep.join(use_names)    from spambayes import classifier    from spambayes.tokenizer import tokenize    from spambayes import storage    from spambayes import message    from spambayes import Stats    bayes_classifier = classifier    bayes_tokenize = tokenize    bayes_storage = storage    bayes_message = message    bayes_stats = Stats    assert "spambayes.Options" in sys.modules, \        "Expected 'spambayes.Options' to be loaded here"    from spambayes.Options import options    bayes_options = options# Function to "safely" save a pickle, only overwriting# the existing file after a successful write.def SavePickle(what, filename):    temp_filename = filename + ".tmp"    file = open(temp_filename,"wb")    try:        cPickle.dump(what, file, 1)    finally:        file.close()    # now rename to the correct file.    try:        os.unlink(filename)    except os.error:        pass    os.rename(temp_filename, filename)# Base class for our "storage manager" - we choose between the pickle# and DB versions at runtime.  As our bayes uses spambayes.storage,# our base class can share common bayes loading code, and we use# spambayes.message, so the base class can share common message info# code, too.class BasicStorageManager:    db_extension = None # for pychecker - overwritten by subclass    def __init__(self, bayes_base_name, mdb_base_name):        self.bayes_filename = bayes_base_name.encode(filesystem_encoding) + \                              self.db_extension        self.mdb_filename = mdb_base_name.encode(filesystem_encoding) + \                            self.db_extension    def new_bayes(self):        # Just delete the file and do an "open"        try:            os.unlink(self.bayes_filename)        except EnvironmentError, e:            if e.errno != errno.ENOENT: raise        return self.open_bayes()    def store_bayes(self, bayes):        bayes.store()    def open_bayes(self):        return bayes_storage.open_storage(self.bayes_filename, self.klass)    def close_bayes(self, bayes):        bayes.close()    def open_mdb(self):        # MessageInfo storage types may lag behind, so use pickle if the        # matching type isn't available.        if self.klass in bayes_message._storage_types.keys():            return bayes_message.open_storage(self.mdb_filename, self.klass)        return bayes_message.open_storage(self.mdb_filename, "pickle")    def store_mdb(self, mdb):        mdb.store()    def close_mdb(self, mdb):        mdb.close()class PickleStorageManager(BasicStorageManager):    db_extension = ".pck"    klass = "pickle"    def new_mdb(self):        return {}    def is_incremental(self):        return False # False means we always save the entire DBclass DBStorageManager(BasicStorageManager):    db_extension = ".db"    klass = "dbm"    def new_mdb(self):        try:            os.unlink(self.mdb_filename)        except EnvironmentError, e:            if e.errno != errno.ENOENT: raise        return self.open_mdb()    def is_incremental(self):        return True # True means only changed records get actually writtenclass ZODBStorageManager(DBStorageManager):    db_extension = ".fs"    klass = "zodb"# Encapsulates our entire classification database# This allows a couple of different "databases" to be open at once# eg, a "temporary" one for training, etc.# The manager should contain no database state - it should all be here.class ClassifierData:    def __init__(self, db_manager, logger):        self.db_manager = db_manager        self.bayes = None        self.message_db = None        self.dirty = False        self.logger = logger # currently the manager, but needed only for logging    def Load(self):        import time        start = time.clock()        bayes = message_db = None        # Exceptions must be caught by caller.        # file-not-found handled gracefully by storage.        bayes = self.db_manager.open_bayes()        fname = self.db_manager.bayes_filename.encode("mbcs", "replace")        print "Loaded bayes database from '%s'" % (fname,)        message_db = self.db_manager.open_mdb()        fname = self.db_manager.mdb_filename.encode("mbcs", "replace")        print "Loaded message database from '%s'" % (fname,)        self.logger.LogDebug(0, "Bayes database initialized with "                   "%d spam and %d good messages" % (bayes.nspam, bayes.nham))        # Once, we checked that the message database was the same length        # as the training database here.  However, we now store information        # about messages that are classified but not trained in the message        # database, so the lengths will not be equal (unless all messages        # are trained).  That step doesn't really gain us anything, anyway,        # since it no longer would tell us useful information, so remove it.        self.bayes = bayes        self.message_db = message_db        self.dirty = False        self.logger.LogDebug(1, "Loaded databases in %gms" % ((time.clock()-start)*1000))    def InitNew(self):        if self.bayes is not None:            self.db_manager.close_bayes(self.bayes)        if self.message_db is not None:            self.db_manager.close_mdb(self.message_db)        self.bayes = self.db_manager.new_bayes()        self.message_db = self.db_manager.new_mdb()        self.dirty = True    def SavePostIncrementalTrain(self):        # Save the database after a training operation - only actually        # saves if we aren't using pickles.        if self.db_manager.is_incremental():            if self.dirty:                self.Save()            else:                self.logger.LogDebug(1, "Bayes database is not dirty - not writing")        else:            print "Using a slow database - not saving after incremental train"    def Save(self):        import time        start = time.clock()        bayes = self.bayes        if self.logger.verbose:            print "Saving bayes database with %d spam and %d good messages" %\                   (bayes.nspam, bayes.nham)            print " ->", self.db_manager.bayes_filename        self.db_manager.store_bayes(self.bayes)        if self.logger.verbose:            print " ->", self.db_manager.mdb_filename        self.db_manager.store_mdb(self.message_db)        self.dirty = False        self.logger.LogDebug(1, "Saved databases in %gms" % ((time.clock()-start)*1000))    def Close(self):        if self.dirty and self.bayes:            print "Warning: ClassifierData closed while Bayes database dirty"        if self.db_manager:            self.db_manager.close_bayes(self.bayes)            self.db_manager.close_mdb(self.message_db)            self.db_manager = None        self.bayes = None        self.logger = None    def Adopt(self, other):        assert not other.dirty, "Adopting dirty classifier data!"        other.db_manager.close_bayes(other.bayes)        other.db_manager.close_mdb(other.message_db)        self.db_manager.close_bayes(self.bayes)        self.db_manager.close_mdb(self.message_db)        # Move the files        shutil.move(other.db_manager.bayes_filename, self.db_manager.bayes_filename)        shutil.move(other.db_manager.mdb_filename, self.db_manager.mdb_filename)        # and re-open.        self.Load()def GetStorageManagerClass():    # We used to enforce this so that all binary users used bsddb, and    # unless they modified the source, so would all source users.  We    # would like more flexibility now, so we match what the rest of the    # applications do - this isn't exposed via the GUI, so Outlook users    # still get bsddb by default, and have to fiddle with a text file

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -