📄 storage.py
字号:
object.__setattr__(self, att, value) def create_storage(self): import ZODB from ZODB.FileStorage import FileStorage self.storage = FileStorage(self.db_filename, read_only=self.mode=='r') def load(self): '''Load state from database''' import ZODB if options["globals", "verbose"]: print >> sys.stderr, "Loading state from %s (%s) database" % \ (self.db_filename, self.db_name) # If we are not closed, then we need to close first before we # reload. if not self.closed: self.close() self.create_storage() self.DB = ZODB.DB(self.storage, cache_size=10000) self.conn = self.DB.open() root = self.conn.root() self.classifier = root.get(self.db_name) if self.classifier is None: # There is no classifier, so create one. if options["globals", "verbose"]: print >> sys.stderr, self.db_name, 'is a new ZODB' self.classifier = root[self.db_name] = self.ClassifierClass() else: if options["globals", "verbose"]: print >> sys.stderr, '%s is an existing ZODB, with %d ' \ 'ham and %d spam' % (self.db_name, self.nham, self.nspam) self.closed = False def store(self): '''Place state into persistent store''' try: import ZODB import ZODB.Transaction except ImportError: import transaction commit = transaction.commit abort = transaction.abort else: commit = ZODB.Transaction.get_transaction().commit abort = ZODB.Transaction.get_transaction().abort from ZODB.POSException import ConflictError from ZODB.POSException import TransactionFailedError assert self.closed == False, "Can't store a closed database" if options["globals", "verbose"]: print >> sys.stderr, 'Persisting', self.db_name, 'state in database' try: commit() except ConflictError: # We'll save it next time, or on close. It'll be lost if we # hard-crash, but that's unlikely, and not a particularly big # deal. if options["globals", "verbose"]: print >> sys.stderr, "Conflict on commit", self.db_name abort() except TransactionFailedError: # Saving isn't working. Try to abort, but chances are that # restarting is needed. print >> sys.stderr, "Storing failed. Need to restart.", \ self.db_name abort() def close(self): # Ensure that the db is saved before closing. Alternatively, we # could abort any waiting transaction. We need to do *something* # with it, though, or it will be still around after the db is # closed and cause problems. For now, saving seems to make sense # (and we can always add abort methods if they are ever needed). if self.mode != 'r': self.store() # Do the closing. self.DB.close() # We don't make any use of the 'undo' capabilities of the # FileStorage at the moment, so might as well pack the database # each time it is closed, to save as much disk space as possible. # Pack it up to where it was 'yesterday'. # XXX What is the 'referencesf' parameter for pack()? It doesn't # XXX seem to do anything according to the source. if self.mode != 'r' and hasattr(self.storage, "pack"): self.storage.pack(time.time()-60*60*24, None) self.storage.close() # Ensure that we cannot continue to use this classifier. delattr(self, "classifier") self.closed = True if options["globals", "verbose"]: print >> sys.stderr, 'Closed', self.db_name, 'database'class ZEOClassifier(ZODBClassifier): def __init__(self, data_source_name): source_info = data_source_name.split() self.host = "localhost" self.port = None db_name = "SpamBayes" for info in source_info: if info.startswith("host"): self.host = info[5:] elif info.startswith("port"): self.port = int(info[5:]) elif info.startswith("dbname"): db_name = info[7:] ZODBClassifier.__init__(self, db_name) def create_storage(self): from ZEO.ClientStorage import ClientStorage if self.port: addr = self.host, self.port else: addr = self.host self.storage = ClientStorage(addr)# Flags that the Trainer will recognise. These should be or'able integer# values (i.e. 1, 2, 4, 8, etc.).NO_TRAINING_FLAG = 1class Trainer: '''Associates a Classifier object and one or more Corpora, \ is an observer of the corpora''' def __init__(self, bayes, is_spam, updateprobs=NO_UPDATEPROBS): '''Constructor(Classifier, is_spam(True|False), updprobs(True|False)''' self.bayes = bayes self.is_spam = is_spam self.updateprobs = updateprobs def onAddMessage(self, message, flags=0): '''A message is being added to an observed corpus.''' if not (flags & NO_TRAINING_FLAG): self.train(message) def train(self, message): '''Train the database with the message''' if options["globals", "verbose"]: print >> sys.stderr, 'training with',message.key() self.bayes.learn(message.tokenize(), self.is_spam)# self.updateprobs) message.setId(message.key()) message.RememberTrained(self.is_spam) def onRemoveMessage(self, message, flags=0): '''A message is being removed from an observed corpus.''' # If a message is being expired from the corpus, we do # *NOT* want to untrain it, because that's not what's happening. # If this is the case, then flags will include NO_TRAINING_FLAG. # There are no other flags we currently use. if not (flags & NO_TRAINING_FLAG): self.untrain(message) def untrain(self, message): '''Untrain the database with the message''' if options["globals", "verbose"]: print >> sys.stderr, 'untraining with',message.key() self.bayes.unlearn(message.tokenize(), self.is_spam)# self.updateprobs) # can raise ValueError if database is fouled. If this is the case, # then retraining is the only recovery option. message.RememberTrained(None) def trainAll(self, corpus): '''Train all the messages in the corpus''' for msg in corpus: self.train(msg) def untrainAll(self, corpus): '''Untrain all the messages in the corpus''' for msg in corpus: self.untrain(msg)class SpamTrainer(Trainer): '''Trainer for spam''' def __init__(self, bayes, updateprobs=NO_UPDATEPROBS): '''Constructor''' Trainer.__init__(self, bayes, True, updateprobs)class HamTrainer(Trainer): '''Trainer for ham''' def __init__(self, bayes, updateprobs=NO_UPDATEPROBS): '''Constructor''' Trainer.__init__(self, bayes, False, updateprobs)class NoSuchClassifierError(Exception): def __init__(self, invalid_name): self.invalid_name = invalid_name def __str__(self): return repr(self.invalid_name)class MutuallyExclusiveError(Exception): def __str__(self): return "Only one type of database can be specified"# values are classifier class, True if it accepts a mode# arg, and True if the argument is a pathname_storage_types = {"dbm" : (DBDictClassifier, True, True), "pickle" : (PickledClassifier, False, True), "pgsql" : (PGClassifier, False, False), "mysql" : (mySQLClassifier, False, False), "cdb" : (CDBClassifier, False, True), "zodb" : (ZODBClassifier, True, True), "zeo" : (ZEOClassifier, False, False), }def open_storage(data_source_name, db_type="dbm", mode=None): """Return a storage object appropriate to the given parameters. By centralizing this code here, all the applications will behave the same given the same options. """ try: klass, supports_mode, unused = _storage_types[db_type] except KeyError: raise NoSuchClassifierError(db_type) try: if supports_mode and mode is not None: return klass(data_source_name, mode) else: return klass(data_source_name) except dbmstorage.error, e: if str(e) == "No dbm modules available!": # We expect this to hit a fair few people, so warn them nicely, # rather than just printing the trackback. print >> sys.stderr, "\nYou do not have a dbm module available " \ "to use. You need to either use a pickle (see the FAQ)" \ ", use Python 2.3 (or above), or install a dbm module " \ "such as bsddb (see http://sf.net/projects/pybsddb)." sys.exit() raise# The different database types that are available.# The key should be the command-line switch that is used to select this# type, and the value should be the name of the type (which# must be a valid key for the _storage_types dictionary)._storage_options = { "-p" : "pickle", "-d" : "dbm", }def database_type(opts, default_type=("Storage", "persistent_use_database"), default_name=("Storage", "persistent_storage_file")): """Return the name of the database and the type to use. The output of this function can be used as the db_type parameter for the open_storage function, for example: [standard getopts code] db_name, db_type = database_type(opts) storage = open_storage(db_name, db_type) The selection is made based on the options passed, or, if the appropriate options are not present, the options in the global options object. Currently supports: -p : pickle -d : dbm """ nm, typ = None, None for opt, arg in opts: if _storage_options.has_key(opt): if nm is None and typ is None: nm, typ = arg, _storage_options[opt] else: raise MutuallyExclusiveError() if nm is None and typ is None: typ = options[default_type] try: unused, unused, is_path = _storage_types[typ] except KeyError: raise NoSuchClassifierError(db_type) if is_path: nm = get_pathname_option(*default_name) else: nm = options[default_name] return nm, typdef convert(old_name=None, old_type=None, new_name=None, new_type=None): # The expected need is to convert the existing hammie.db dbm # database to a hammie.fs ZODB database. if old_name is None: old_name = "hammie.db" if old_type is None: old_type = "dbm" if new_name is None or new_type is None: auto_name, auto_type = database_type({}) if new_name is None: new_name = auto_name if new_type is None: new_type = auto_type old_bayes = open_storage(old_name, old_type, 'r') new_bayes = open_storage(new_name, new_type) words = old_bayes._wordinfokeys() try: new_bayes.nham = old_bayes.nham except AttributeError: new_bayes.nham = 0 try: new_bayes.nspam = old_bayes.nspam except AttributeError: new_bayes.nspam = 0 print >> sys.stderr, "Converting %s (%s database) to " \ "%s (%s database)." % (old_name, old_type, new_name, new_type) print >> sys.stderr, "Database has %s ham, %s spam, and %s words." % \ (new_bayes.nham, new_bayes.nspam, len(words)) for word in words: new_bayes._wordinfoset(word, old_bayes._wordinfoget(word)) old_bayes.close() print >> sys.stderr, "Storing database, please be patient..." new_bayes.store() print >> sys.stderr, "Conversion complete." new_bayes.close()def ensureDir(dirname): """Ensure that the given directory exists - in other words, if it does not exist, attempt to create it.""" try: os.mkdir(dirname) if options["globals", "verbose"]: print >>sys.stderr, "Creating directory", dirname except OSError, e: if e.errno != errno.EEXIST: raiseif __name__ == '__main__': print >> sys.stderr, __doc__
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -