⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 storage.py

📁 用python实现的邮件过滤器
💻 PY
📖 第 1 页 / 共 3 页
字号:
            object.__setattr__(self, att, value)    def create_storage(self):        import ZODB        from ZODB.FileStorage import FileStorage        self.storage = FileStorage(self.db_filename,                                   read_only=self.mode=='r')    def load(self):        '''Load state from database'''        import ZODB        if options["globals", "verbose"]:            print >> sys.stderr, "Loading state from %s (%s) database" % \                  (self.db_filename, self.db_name)        # If we are not closed, then we need to close first before we        # reload.        if not self.closed:            self.close()        self.create_storage()        self.DB = ZODB.DB(self.storage, cache_size=10000)        self.conn = self.DB.open()        root = self.conn.root()        self.classifier = root.get(self.db_name)        if self.classifier is None:            # There is no classifier, so create one.            if options["globals", "verbose"]:                print >> sys.stderr, self.db_name, 'is a new ZODB'            self.classifier = root[self.db_name] = self.ClassifierClass()        else:            if options["globals", "verbose"]:                print >> sys.stderr, '%s is an existing ZODB, with %d ' \                      'ham and %d spam' % (self.db_name, self.nham,                                           self.nspam)        self.closed = False    def store(self):        '''Place state into persistent store'''        try:            import ZODB            import ZODB.Transaction        except ImportError:            import transaction            commit = transaction.commit            abort = transaction.abort        else:            commit = ZODB.Transaction.get_transaction().commit            abort = ZODB.Transaction.get_transaction().abort        from ZODB.POSException import ConflictError        from ZODB.POSException import TransactionFailedError        assert self.closed == False, "Can't store a closed database"        if options["globals", "verbose"]:            print >> sys.stderr, 'Persisting', self.db_name, 'state in database'        try:            commit()        except ConflictError:            # We'll save it next time, or on close.  It'll be lost if we            # hard-crash, but that's unlikely, and not a particularly big            # deal.            if options["globals", "verbose"]:                print >> sys.stderr, "Conflict on commit", self.db_name            abort()        except TransactionFailedError:            # Saving isn't working.  Try to abort, but chances are that            # restarting is needed.            print >> sys.stderr, "Storing failed.  Need to restart.", \                  self.db_name            abort()    def close(self):        # Ensure that the db is saved before closing.  Alternatively, we        # could abort any waiting transaction.  We need to do *something*        # with it, though, or it will be still around after the db is        # closed and cause problems.  For now, saving seems to make sense        # (and we can always add abort methods if they are ever needed).        if self.mode != 'r':            self.store()        # Do the closing.                self.DB.close()        # We don't make any use of the 'undo' capabilities of the        # FileStorage at the moment, so might as well pack the database        # each time it is closed, to save as much disk space as possible.        # Pack it up to where it was 'yesterday'.        # XXX What is the 'referencesf' parameter for pack()?  It doesn't        # XXX seem to do anything according to the source.        if self.mode != 'r' and hasattr(self.storage, "pack"):            self.storage.pack(time.time()-60*60*24, None)        self.storage.close()        # Ensure that we cannot continue to use this classifier.        delattr(self, "classifier")        self.closed = True        if options["globals", "verbose"]:            print >> sys.stderr, 'Closed', self.db_name, 'database'class ZEOClassifier(ZODBClassifier):    def __init__(self, data_source_name):        source_info = data_source_name.split()        self.host = "localhost"        self.port = None        db_name = "SpamBayes"        for info in source_info:            if info.startswith("host"):                self.host = info[5:]            elif info.startswith("port"):                self.port = int(info[5:])            elif info.startswith("dbname"):                db_name = info[7:]        ZODBClassifier.__init__(self, db_name)    def create_storage(self):        from ZEO.ClientStorage import ClientStorage        if self.port:            addr = self.host, self.port        else:            addr = self.host        self.storage = ClientStorage(addr)# Flags that the Trainer will recognise.  These should be or'able integer# values (i.e. 1, 2, 4, 8, etc.).NO_TRAINING_FLAG = 1class Trainer:    '''Associates a Classifier object and one or more Corpora, \    is an observer of the corpora'''    def __init__(self, bayes, is_spam, updateprobs=NO_UPDATEPROBS):        '''Constructor(Classifier, is_spam(True|False), updprobs(True|False)'''        self.bayes = bayes        self.is_spam = is_spam        self.updateprobs = updateprobs    def onAddMessage(self, message, flags=0):        '''A message is being added to an observed corpus.'''        if not (flags & NO_TRAINING_FLAG):            self.train(message)    def train(self, message):        '''Train the database with the message'''        if options["globals", "verbose"]:            print >> sys.stderr, 'training with',message.key()        self.bayes.learn(message.tokenize(), self.is_spam)#                         self.updateprobs)        message.setId(message.key())        message.RememberTrained(self.is_spam)    def onRemoveMessage(self, message, flags=0):        '''A message is being removed from an observed corpus.'''        # If a message is being expired from the corpus, we do        # *NOT* want to untrain it, because that's not what's happening.        # If this is the case, then flags will include NO_TRAINING_FLAG.        # There are no other flags we currently use.        if not (flags & NO_TRAINING_FLAG):            self.untrain(message)    def untrain(self, message):        '''Untrain the database with the message'''        if options["globals", "verbose"]:            print >> sys.stderr, 'untraining with',message.key()        self.bayes.unlearn(message.tokenize(), self.is_spam)#                           self.updateprobs)        # can raise ValueError if database is fouled.  If this is the case,        # then retraining is the only recovery option.        message.RememberTrained(None)    def trainAll(self, corpus):        '''Train all the messages in the corpus'''        for msg in corpus:            self.train(msg)    def untrainAll(self, corpus):        '''Untrain all the messages in the corpus'''        for msg in corpus:            self.untrain(msg)class SpamTrainer(Trainer):    '''Trainer for spam'''    def __init__(self, bayes, updateprobs=NO_UPDATEPROBS):        '''Constructor'''        Trainer.__init__(self, bayes, True, updateprobs)class HamTrainer(Trainer):    '''Trainer for ham'''    def __init__(self, bayes, updateprobs=NO_UPDATEPROBS):        '''Constructor'''        Trainer.__init__(self, bayes, False, updateprobs)class NoSuchClassifierError(Exception):    def __init__(self, invalid_name):        self.invalid_name = invalid_name    def __str__(self):        return repr(self.invalid_name)class MutuallyExclusiveError(Exception):    def __str__(self):        return "Only one type of database can be specified"# values are classifier class, True if it accepts a mode# arg, and True if the argument is a pathname_storage_types = {"dbm" : (DBDictClassifier, True, True),                  "pickle" : (PickledClassifier, False, True),                  "pgsql" : (PGClassifier, False, False),                  "mysql" : (mySQLClassifier, False, False),                  "cdb" : (CDBClassifier, False, True),                  "zodb" : (ZODBClassifier, True, True),                  "zeo" : (ZEOClassifier, False, False),                  }def open_storage(data_source_name, db_type="dbm", mode=None):    """Return a storage object appropriate to the given parameters.    By centralizing this code here, all the applications will behave    the same given the same options.    """    try:        klass, supports_mode, unused = _storage_types[db_type]    except KeyError:        raise NoSuchClassifierError(db_type)    try:        if supports_mode and mode is not None:            return klass(data_source_name, mode)        else:            return klass(data_source_name)    except dbmstorage.error, e:        if str(e) == "No dbm modules available!":            # We expect this to hit a fair few people, so warn them nicely,            # rather than just printing the trackback.            print >> sys.stderr, "\nYou do not have a dbm module available " \                  "to use.  You need to either use a pickle (see the FAQ)" \                  ", use Python 2.3 (or above), or install a dbm module " \                  "such as bsddb (see http://sf.net/projects/pybsddb)."            sys.exit()        raise# The different database types that are available.# The key should be the command-line switch that is used to select this# type, and the value should be the name of the type (which# must be a valid key for the _storage_types dictionary)._storage_options = { "-p" : "pickle",                     "-d" : "dbm",                     }def database_type(opts, default_type=("Storage", "persistent_use_database"),                  default_name=("Storage", "persistent_storage_file")):    """Return the name of the database and the type to use.  The output of    this function can be used as the db_type parameter for the open_storage    function, for example:        [standard getopts code]        db_name, db_type = database_type(opts)        storage = open_storage(db_name, db_type)    The selection is made based on the options passed, or, if the    appropriate options are not present, the options in the global    options object.    Currently supports:       -p  :  pickle       -d  :  dbm    """    nm, typ = None, None    for opt, arg in opts:        if _storage_options.has_key(opt):            if nm is None and typ is None:                nm, typ = arg, _storage_options[opt]            else:                raise MutuallyExclusiveError()    if nm is None and typ is None:        typ = options[default_type]        try:            unused, unused, is_path = _storage_types[typ]        except KeyError:            raise NoSuchClassifierError(db_type)        if is_path:            nm = get_pathname_option(*default_name)        else:            nm = options[default_name]    return nm, typdef convert(old_name=None, old_type=None, new_name=None, new_type=None):    # The expected need is to convert the existing hammie.db dbm    # database to a hammie.fs ZODB database.    if old_name is None:        old_name = "hammie.db"    if old_type is None:        old_type = "dbm"    if new_name is None or new_type is None:        auto_name, auto_type = database_type({})        if new_name is None:            new_name = auto_name        if new_type is None:            new_type = auto_type    old_bayes = open_storage(old_name, old_type, 'r')    new_bayes = open_storage(new_name, new_type)    words = old_bayes._wordinfokeys()    try:        new_bayes.nham = old_bayes.nham    except AttributeError:        new_bayes.nham = 0    try:        new_bayes.nspam = old_bayes.nspam    except AttributeError:        new_bayes.nspam = 0    print >> sys.stderr, "Converting %s (%s database) to " \          "%s (%s database)." % (old_name, old_type, new_name, new_type)    print >> sys.stderr, "Database has %s ham, %s spam, and %s words." % \          (new_bayes.nham, new_bayes.nspam, len(words))    for word in words:        new_bayes._wordinfoset(word, old_bayes._wordinfoget(word))    old_bayes.close()    print >> sys.stderr, "Storing database, please be patient..."    new_bayes.store()    print >> sys.stderr, "Conversion complete."    new_bayes.close()def ensureDir(dirname):    """Ensure that the given directory exists - in other words, if it    does not exist, attempt to create it."""    try:        os.mkdir(dirname)        if options["globals", "verbose"]:            print >>sys.stderr, "Creating directory", dirname    except OSError, e:        if e.errno != errno.EEXIST:            raiseif __name__ == '__main__':    print >> sys.stderr, __doc__

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -