📄 manager.py
字号:
from __future__ import generatorsimport cPickleimport osimport sysimport errnoimport typesimport shutilimport tracebackimport operatorimport win32api, win32con, win32guiimport timer, threadimport win32com.clientimport win32com.client.gencacheimport pythoncomimport msgstoretry: True, Falseexcept NameError: # Maintain compatibility with Python 2.2 True, False = 1, 0# Characters valid in a filename. Used to nuke bad chars from the profile# name (which we try and use as a filename).# We assume characters > 127 are OK as they may be unicodefilename_chars = ('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' '0123456789' """$%'-_@~ `!()^#&+,;=[]""")# Report a message to the user - should only be used for pretty serious errors# hence we also print a traceback.# Module level function so we can report errors creating the managerdef _GetParent(): try: return win32gui.GetActiveWindow() except win32gui.error: pass return 0def _DoMessage(message, title, flags): return win32gui.MessageBox(_GetParent(), message, title, flags)def ReportError(message, title = None): import traceback print "ERROR:", repr(message) if sys.exc_info()[0] is not None: traceback.print_exc() if title is None: title = "SpamBayes" _DoMessage(message, title, win32con.MB_ICONEXCLAMATION)def ReportInformation(message, title = None): if title is None: title = "SpamBayes" _DoMessage(message, title, win32con.MB_ICONINFORMATION)def AskQuestion(message, title = None): if title is None: title = "SpamBayes" return _DoMessage(message, title, win32con.MB_YESNO | \ win32con.MB_ICONQUESTION) == win32con.IDYES# Non-ascii characters in file or directory names only fully work in# Python 2.3.3+, but latin-1 "compatible" filenames should work in 2.3try: filesystem_encoding = sys.getfilesystemencoding()except AttributeError: filesystem_encoding = "mbcs"# Work out our "application directory", which is# the directory of our main .py/.dll/.exe file we# are running from.if hasattr(sys, "frozen"): assert sys.frozen == "dll", "outlook only supports inproc servers" this_filename = win32api.GetModuleFileName(sys.frozendllhandle)else: try: this_filename = os.path.abspath(__file__) except NameError: # no __file__ - means Py2.2 and __name__=='__main__' this_filename = os.path.abspath(sys.argv[0])# Ensure that a bsddb module is available if we are frozen.# See if we can use the new bsddb module. (The old one is unreliable# on Windows, so we don't use that)if hasattr(sys, "frozen"): try: import bsddb3 except ImportError: bsddb3 = None try: import bsddb except ImportError: bsddb = None else: # This name is not in the old (bad) one. if not hasattr(bsddb, "db"): bsddb = None assert bsddb or bsddb3, \ "Don't build binary versions without bsddb!"# This is a little bit of a hack <wink>. We are generally in a child# directory of the bayes code. To help installation, we handle the# fact that this may not be on sys.path. Note that doing these# imports is delayed, so that we can set the BAYESCUSTOMIZE envar# first (if we import anything from the core spambayes code before# setting that envar, our .ini file may have no effect).# However, we want *some* Spambayes code before the options are processed# so this is now 2 steps - get the "early" spambayes core stuff (which# must not import spambayes.Options) and sets up sys.path, and "later" core# stuff, which can include spambayes.Options, and assume sys.path in place.def import_early_core_spambayes_stuff(): global bayes_i18n try: from spambayes import OptionsClass except ImportError: parent = os.path.abspath(os.path.join(os.path.dirname(this_filename), "..")) sys.path.insert(0, parent) from spambayes import i18n bayes_i18n = i18ndef import_core_spambayes_stuff(ini_filenames): global bayes_classifier, bayes_tokenize, bayes_storage, bayes_options, \ bayes_message, bayes_stats if "spambayes.Options" in sys.modules: # The only thing we are worried about here is spambayes.Options # being imported before we have determined the INI files we need to # use. # The only way this can happen otherwise is when the addin is # de-selected then re-selected via the Outlook GUI - and when # running from source-code, it never appears in this list. # So this should never happen from source-code, and if it does, then # the developer has recently changed something that causes the early # import assert hasattr(sys, "frozen") # And we don't care (we could try and reload the engine options, # but these are very unlikely to have changed) return # ini_filenames may contain Unicode, but environ not unicode aware. # Convert if necessary. use_names = [] for name in ini_filenames: if isinstance(name, unicode): name = name.encode(filesystem_encoding) use_names.append(name) os.environ["BAYESCUSTOMIZE"] = os.pathsep.join(use_names) from spambayes import classifier from spambayes.tokenizer import tokenize from spambayes import storage from spambayes import message from spambayes import Stats bayes_classifier = classifier bayes_tokenize = tokenize bayes_storage = storage bayes_message = message bayes_stats = Stats assert "spambayes.Options" in sys.modules, \ "Expected 'spambayes.Options' to be loaded here" from spambayes.Options import options bayes_options = options# Function to "safely" save a pickle, only overwriting# the existing file after a successful write.def SavePickle(what, filename): temp_filename = filename + ".tmp" file = open(temp_filename,"wb") try: cPickle.dump(what, file, 1) finally: file.close() # now rename to the correct file. try: os.unlink(filename) except os.error: pass os.rename(temp_filename, filename)# Base class for our "storage manager" - we choose between the pickle# and DB versions at runtime. As our bayes uses spambayes.storage,# our base class can share common bayes loading code, and we use# spambayes.message, so the base class can share common message info# code, too.class BasicStorageManager: db_extension = None # for pychecker - overwritten by subclass def __init__(self, bayes_base_name, mdb_base_name): self.bayes_filename = bayes_base_name.encode(filesystem_encoding) + \ self.db_extension self.mdb_filename = mdb_base_name.encode(filesystem_encoding) + \ self.db_extension def new_bayes(self): # Just delete the file and do an "open" try: os.unlink(self.bayes_filename) except EnvironmentError, e: if e.errno != errno.ENOENT: raise return self.open_bayes() def store_bayes(self, bayes): bayes.store() def open_bayes(self): return bayes_storage.open_storage(self.bayes_filename, self.klass) def close_bayes(self, bayes): bayes.close() def open_mdb(self): # MessageInfo storage types may lag behind, so use pickle if the # matching type isn't available. if self.klass in bayes_message._storage_types.keys(): return bayes_message.open_storage(self.mdb_filename, self.klass) return bayes_message.open_storage(self.mdb_filename, "pickle") def store_mdb(self, mdb): mdb.store() def close_mdb(self, mdb): mdb.close()class PickleStorageManager(BasicStorageManager): db_extension = ".pck" klass = "pickle" def new_mdb(self): return {} def is_incremental(self): return False # False means we always save the entire DBclass DBStorageManager(BasicStorageManager): db_extension = ".db" klass = "dbm" def new_mdb(self): try: os.unlink(self.mdb_filename) except EnvironmentError, e: if e.errno != errno.ENOENT: raise return self.open_mdb() def is_incremental(self): return True # True means only changed records get actually writtenclass ZODBStorageManager(DBStorageManager): db_extension = ".fs" klass = "zodb"# Encapsulates our entire classification database# This allows a couple of different "databases" to be open at once# eg, a "temporary" one for training, etc.# The manager should contain no database state - it should all be here.class ClassifierData: def __init__(self, db_manager, logger): self.db_manager = db_manager self.bayes = None self.message_db = None self.dirty = False self.logger = logger # currently the manager, but needed only for logging def Load(self): import time start = time.clock() bayes = message_db = None # Exceptions must be caught by caller. # file-not-found handled gracefully by storage. bayes = self.db_manager.open_bayes() fname = self.db_manager.bayes_filename.encode("mbcs", "replace") print "Loaded bayes database from '%s'" % (fname,) message_db = self.db_manager.open_mdb() fname = self.db_manager.mdb_filename.encode("mbcs", "replace") print "Loaded message database from '%s'" % (fname,) self.logger.LogDebug(0, "Bayes database initialized with " "%d spam and %d good messages" % (bayes.nspam, bayes.nham)) # Once, we checked that the message database was the same length # as the training database here. However, we now store information # about messages that are classified but not trained in the message # database, so the lengths will not be equal (unless all messages # are trained). That step doesn't really gain us anything, anyway, # since it no longer would tell us useful information, so remove it. self.bayes = bayes self.message_db = message_db self.dirty = False self.logger.LogDebug(1, "Loaded databases in %gms" % ((time.clock()-start)*1000)) def InitNew(self): if self.bayes is not None: self.db_manager.close_bayes(self.bayes) if self.message_db is not None: self.db_manager.close_mdb(self.message_db) self.bayes = self.db_manager.new_bayes() self.message_db = self.db_manager.new_mdb() self.dirty = True def SavePostIncrementalTrain(self): # Save the database after a training operation - only actually # saves if we aren't using pickles. if self.db_manager.is_incremental(): if self.dirty: self.Save() else: self.logger.LogDebug(1, "Bayes database is not dirty - not writing") else: print "Using a slow database - not saving after incremental train" def Save(self): import time start = time.clock() bayes = self.bayes if self.logger.verbose: print "Saving bayes database with %d spam and %d good messages" %\ (bayes.nspam, bayes.nham) print " ->", self.db_manager.bayes_filename self.db_manager.store_bayes(self.bayes) if self.logger.verbose: print " ->", self.db_manager.mdb_filename self.db_manager.store_mdb(self.message_db) self.dirty = False self.logger.LogDebug(1, "Saved databases in %gms" % ((time.clock()-start)*1000)) def Close(self): if self.dirty and self.bayes: print "Warning: ClassifierData closed while Bayes database dirty" if self.db_manager: self.db_manager.close_bayes(self.bayes) self.db_manager.close_mdb(self.message_db) self.db_manager = None self.bayes = None self.logger = None def Adopt(self, other): assert not other.dirty, "Adopting dirty classifier data!" other.db_manager.close_bayes(other.bayes) other.db_manager.close_mdb(other.message_db) self.db_manager.close_bayes(self.bayes) self.db_manager.close_mdb(self.message_db) # Move the files shutil.move(other.db_manager.bayes_filename, self.db_manager.bayes_filename) shutil.move(other.db_manager.mdb_filename, self.db_manager.mdb_filename) # and re-open. self.Load()def GetStorageManagerClass(): # We used to enforce this so that all binary users used bsddb, and # unless they modified the source, so would all source users. We # would like more flexibility now, so we match what the rest of the # applications do - this isn't exposed via the GUI, so Outlook users # still get bsddb by default, and have to fiddle with a text file
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -