📄 storage.py

📁 用python实现的邮件过滤器
💻 PY
📖 第 1 页 / 共 3 页
字号:
        '''Save state to the database'''        self._set_row(self.statekey, self.nspam, self.nham)    def cursor(self):        '''Return a new db cursor'''        raise NotImplementedError, "must be implemented in subclass"    def fetchall(self, c):        '''Return all rows as a dict'''        raise NotImplementedError, "must be implemented in subclass"    def commit(self, c):        '''Commit the current transaction - may commit at db or cursor'''        raise NotImplementedError, "must be implemented in subclass"    def create_bayes(self):        '''Create a new bayes table'''        c = self.cursor()        c.execute(self.table_definition)        self.commit(c)    def _get_row(self, word):        '''Return row matching word'''        try:            c = self.cursor()            c.execute("select * from bayes"                      "  where word=%s",                      (word,))        except Exception, e:            print >> sys.stderr, "error:", (e, word)            raise        rows = self.fetchall(c)        if rows:            return rows[0]        else:            return {}    def _set_row(self, word, nspam, nham):        c = self.cursor()        if self._has_key(word):            c.execute("update bayes"                      "  set nspam=%s,nham=%s"                      "  where word=%s",                      (nspam, nham, word))        else:            c.execute("insert into bayes"                      "  (nspam, nham, word)"                      "  values (%s, %s, %s)",                      (nspam, nham, word))        self.commit(c)    def _delete_row(self, word):        c = self.cursor()        c.execute("delete from bayes"                  "  where word=%s",                  (word,))        self.commit(c)    def _has_key(self, key):        c = self.cursor()        c.execute("select word from bayes"                  "  where word=%s",                  (key,))        return len(self.fetchall(c)) > 0    def _wordinfoget(self, word):        if isinstance(word, unicode):            word = word.encode("utf-8")        row = self._get_row(word)        if row:            item = self.WordInfoClass()            item.__setstate__((row["nspam"], row["nham"]))            return item        else:            return self.WordInfoClass()    def _wordinfoset(self, word, record):        if isinstance(word, unicode):            word = word.encode("utf-8")        self._set_row(word, record.spamcount, record.hamcount)    def _wordinfodel(self, word):        if isinstance(word, unicode):            word = word.encode("utf-8")        self._delete_row(word)    def _wordinfokeys(self):        c = self.cursor()        c.execute("select word from bayes")        rows = self.fetchall(c)        return [r[0] for r in rows]class PGClassifier(SQLClassifier):    '''Classifier object persisted in a Postgres database'''    def __init__(self, db_name):        self.table_definition = ("create table bayes ("                                 "  word bytea not null default '',"                                 "  nspam integer not null default 0,"                                 "  nham integer not null default 0,"                                 "  primary key(word)"                                 ")")        SQLClassifier.__init__(self, db_name)    def cursor(self):        return self.db.cursor()    def fetchall(self, c):        return c.dictfetchall()    def commit(self, c):        self.db.commit()    def load(self):        '''Load state from database'''        import psycopg        if options["globals", "verbose"]:            print >> sys.stderr, 'Loading state from',self.db_name,'database'        self.db = psycopg.connect(self.db_name)        c = self.cursor()        try:            c.execute("select count(*) from bayes")        except psycopg.ProgrammingError:            self.db.rollback()            self.create_bayes()        if self._has_key(self.statekey):            row = self._get_row(self.statekey)            self.nspam = row["nspam"]            self.nham = row["nham"]            if options["globals", "verbose"]:                print >> sys.stderr, ('%s is an existing database,'                                      ' with %d spam and %d ham') \                      % (self.db_name, self.nspam, self.nham)        else:            # new database            if options["globals", "verbose"]:                print >> sys.stderr, self.db_name,'is a new database'            self.nspam = 0            self.nham = 0class mySQLClassifier(SQLClassifier):    '''Classifier object persisted in a mySQL database    It is assumed that the database already exists, and that the mySQL    server is currently running.'''    def __init__(self, data_source_name):        self.table_definition = ("create table bayes ("                                 "  word varchar(255) not null default '',"                                 "  nspam integer not null default 0,"                                 "  nham integer not null default 0,"                                 "  primary key(word)"                                 ");")        self.host = "localhost"        self.username = "root"        self.password = ""        db_name = "spambayes"        source_info = data_source_name.split()        for info in source_info:            if info.startswith("host"):                self.host = info[5:]            elif info.startswith("user"):                self.username = info[5:]            elif info.startswith("pass"):                self.password = info[5:]            elif info.startswith("dbname"):                db_name = info[7:]        SQLClassifier.__init__(self, db_name)    def cursor(self):        return self.db.cursor()    def fetchall(self, c):        return c.fetchall()    def commit(self, c):        self.db.commit()    def load(self):        '''Load state from database'''        import MySQLdb        if options["globals", "verbose"]:            print >> sys.stderr, 'Loading state from',self.db_name,'database'        self.db = MySQLdb.connect(host=self.host, db=self.db_name,                                  user=self.username, passwd=self.password)        c = self.cursor()        try:            c.execute("select count(*) from bayes")        except MySQLdb.ProgrammingError:            try:                self.db.rollback()            except MySQLdb.NotSupportedError:                # Server doesn't support rollback, so just assume that                # we can keep going and create the db.  This should only                # happen once, anyway.                pass            self.create_bayes()        if self._has_key(self.statekey):            row = self._get_row(self.statekey)            self.nspam = int(row[1])            self.nham = int(row[2])            if options["globals", "verbose"]:                print >> sys.stderr, ('%s is an existing database,'                                      ' with %d spam and %d ham') \                      % (self.db_name, self.nspam, self.nham)        else:            # new database            if options["globals", "verbose"]:                print >> sys.stderr, self.db_name,'is a new database'            self.nspam = 0            self.nham = 0    def _wordinfoget(self, word):        if isinstance(word, unicode):            word = word.encode("utf-8")        row = self._get_row(word)        if row:            item = self.WordInfoClass()            item.__setstate__((row[1], row[2]))            return item        else:            return Noneclass CDBClassifier(classifier.Classifier):    """A classifier that uses a CDB database.    A CDB wordinfo database is quite small and fast but is slow to update.    It is appropriate if training is done rarely (e.g. monthly or weekly    using archived ham and spam).    """    def __init__(self, db_name):        classifier.Classifier.__init__(self)        self.db_name = db_name        self.statekey = STATE_KEY        self.load()    def _WordInfoFactory(self, counts):        # For whatever reason, WordInfo's cannot be created with        # constructor ham/spam counts, so we do the work here.        # Since we're doing the work, we accept the ham/spam count        # in the form of a comma-delimited string, as that's what        # we get.        ham, spam = counts.split(',')        wi = classifier.WordInfo()        wi.hamcount = int(ham)        wi.spamcount = int(spam)        return wi    # Stolen from sb_dbexpimp.py    # Heaven only knows what encoding non-ASCII stuff will be in    # Try a few common western encodings and punt if they all fail    def uunquote(self, s):        for encoding in ("utf-8", "cp1252", "iso-8859-1"):            try:                return unicode(s, encoding)            except UnicodeDecodeError:                pass        # punt        return s    def load(self):        if os.path.exists(self.db_name):            db = open(self.db_name, "rb")            data = dict(cdb.Cdb(db))            db.close()            self.nham, self.nspam = [int(i) for i in \                                     data[self.statekey].split(',')]            self.wordinfo = dict([(self.uunquote(k),                                   self._WordInfoFactory(v)) \                                  for k, v in data.iteritems() \                                      if k != self.statekey])            if options["globals", "verbose"]:                print >> sys.stderr, ('%s is an existing CDB,'                                      ' with %d ham and %d spam') \                                      % (self.db_name, self.nham,                                         self.nspam)        else:            if options["globals", "verbose"]:                print >> sys.stderr, self.db_name, 'is a new CDB'            self.wordinfo = {}            self.nham = 0            self.nspam = 0    def store(self):        items = [(self.statekey, "%d,%d" % (self.nham, self.nspam))]        for word, wi in self.wordinfo.iteritems():            if isinstance(word, types.UnicodeType):                word = word.encode("utf-8")            items.append((word, "%d,%d" % (wi.hamcount, wi.spamcount)))        db = open(self.db_name, "wb")        cdb.cdb_make(db, items)        db.close()    def close(self):        # We keep no resources open - nothing to do.        pass# If ZODB isn't available, then this class won't be useable, but we# still need to be able to import this module.  So we pretend that all# is ok.try:    from persistent import Persistentexcept ImportError:    try:        from ZODB import Persistent    except ImportError:        Persistent = objectclass _PersistentClassifier(classifier.Classifier, Persistent):    def __init__(self):        import ZODB        from BTrees.OOBTree import OOBTree        classifier.Classifier.__init__(self)        self.wordinfo = OOBTree()class ZODBClassifier(object):    # Allow subclasses to override classifier class.    ClassifierClass = _PersistentClassifier    def __init__(self, db_name, mode='c'):        self.db_filename = db_name        self.db_name = os.path.basename(db_name)        self.closed = True        self.mode = mode        self.load()    def __getattr__(self, att):        # We pretend that we are a classifier subclass.        if hasattr(self, "classifier") and hasattr(self.classifier, att):            return getattr(self.classifier, att)        raise AttributeError("ZODBClassifier object has no attribute '%s'"                             % (att,))    def __setattr__(self, att, value):        # For some attributes, we change the classifier instead.        if att in ("nham", "nspam") and hasattr(self, "classifier"):            setattr(self.classifier, att, value)        else:
💿 文件大小 1791 K
👤 上传用户 guigong
📂 所属分类数学计算
🏷️ 相关标签

#python #邮件过滤
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -