⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 class.naivebayesianstorage.php

📁 贝叶斯算法
💻 PHP
字号:
<?php
/*
  ***** BEGIN LICENSE BLOCK *****
   This file is part of PHP Naive Bayesian Filter.

   The Initial Developer of the Original Code is
   Loic d'Anterroches [loic_at_xhtml.net].
   Portions created by the Initial Developer are Copyright (C) 2003
   the Initial Developer. All Rights Reserved.

   Contributor(s):

   PHP Naive Bayesian Filter is free software; you can redistribute it
   and/or modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of
   the License, or (at your option) any later version.

   PHP Naive Bayesian Filter is distributed in the hope that it will
   be useful, but WITHOUT ANY WARRANTY; without even the implied
   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
   See the GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with Foobar; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

   Alternatively, the contents of this file may be used under the terms of
   the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
   in which case the provisions of the LGPL are applicable instead
   of those above.

  ***** END LICENSE BLOCK *****
*/

/** Access to the storage of the data for the filter.

To avoid dependency with respect to any database, this class handle all the
access to the data storage. You can provide your own class as long as
all the methods are available. The current one rely on a MySQL database.

methods:
    - array getCategories()
    - bool  wordExists(string $word)
    - array getWord(string $word, string $categoryid)

*/
class NaiveBayesianStorage
{
    var $con = null;

    function NaiveBayesianStorage($user, $pwd , $server, $dbname)
    {
    	include_once dirname(__FILE__).'/class.mysql.php';
    	$this->con = new Connection($user, $pwd , $server, $dbname);
    	return true;

    }

    /** get the list of categories with basic data.
    
        @return array key = category ids, values = array(keys = 'probability', 'word_count')
    */
    function getCategories()
    {
        $categories = array();
        $rs = $this->con->select('SELECT * FROM nb_categories');
        while (!$rs->EOF()) {
            $categories[$rs->f('category_id')] = array('probability' => $rs->f('probability'),
                                                       'word_count'  => $rs->f('word_count')
                                                );
            $rs->moveNext();
        }
        return $categories;
    }

    /** see if the word is an already learnt word.
        @return bool
        @param string word
    */
    function wordExists($word)
    {
        $rs = $this->con->select("SELECT * FROM nb_wordfreqs WHERE word='".$this->con->escapeStr($word)."'");
        return !$rs->isEmpty();
    }

    /** get details of a word in a category.
        @return array ('count' => count)
        @param  string word
        @param  string category id
    */
    function getWord($word, $category_id)
    {
        $details = array();
        $rs = $this->con->select("SELECT * FROM nb_wordfreqs WHERE
                                    word='".$this->con->escapeStr($word)."' AND
                                    category_id='".$this->con->escapeStr($category_id)."'");
        if ($rs->isEmpty()) $details['count'] = 0;
        else $details['count'] = $rs->f('count');
        return $details;
    }

    /** update a word in a category.
    If the word is new in this category it is added, else only the count is updated.

        @return bool success
        @param string word
        @param int    count
        @paran string category id
    */
    function updateWord($word, $count, $category_id)
    {
    	$oldword = $this->getWord($word, $category_id);
    	if (0 == $oldword['count']) {
            return $this->con->execute("INSERT INTO nb_wordfreqs (word, category_id, count) VALUES
                                ('".$this->con->escapeStr($word)."',
                                 '".$this->con->escapeStr($category_id)."',
                                 '".$this->con->escapeStr((int)$count)."')");
        } else {
            return $this->con->execute("UPDATE nb_wordfreqs SET count+=".(int)$count."
                                        WHERE category_id = '".$this->con->escapeStr($category_id)."'
                                        AND word = '".$this->con->escapeStr($word)."'");
        }
    }

    /** remove a word from a category.

        @return bool success
        @param string word
        @param int  count
        @param string category id
    */
    function removeWord($word, $count, $category_id)
    {
    	$oldword = $this->getWord($word, $category_id);
    	if (0 != $oldword['count'] && 0 >= ($oldword['count']-$count)) {
            return $this->con->execute("DELETE FROM nb_wordfreqs WHERE
                                word='".$this->con->escapeStr($word)."' AND
                                category_id='".$this->con->escapeStr($category_id)."'");
        } else {
            return $this->con->execute("UPDATE nb_wordfreqs SET count-=".(int)$count."
                                        WHERE category_id = '".$this->con->escapeStr($category_id)."'
                                        AND word = '".$this->con->escapeStr($word)."'");
        }
    }

    /** update the probabilities of the categories and word count.
    This function must be run after a set of training

        @return bool sucess
    */
    function updateProbabilities()
    {
    	// first update the word count of each category
        $rs = $this->con->select("SELECT category_id, SUM(count) AS total FROM nb_wordfreqs WHERE 1 GROUP BY category_id");
        $total_words = 0;
        while (!$rs->EOF()) {
            $total_words += $rs->f('total');
            $rs->moveNext();
        }
        $rs->moveStart();
        if ($total_words == 0) {
            $this->con->execute("UPDATE nb_categories SET word_count=0, probability=0 WHERE 1");
            return true;
        }
        while (!$rs->EOF()) {
            $proba = $rs->f('total')/$total_words;
            $this->con->execute("UPDATE nb_categories SET word_count=".(int)$rs->f('total').",
                                        probability=".$proba."
                                        WHERE category_id = '".$rs->f('category_id')."'");
            $rs->moveNext();
        }
        return true;
    }

    /** save a reference in the database.

        @return bool success
        @param  string reference if, must be unique
        @param  string category id
        @param  string content of the reference
    */
    function saveReference($doc_id, $category_id, $content)
    {
        return $this->con->execute("INSERT INTO nb_references (id, category_id, content) VALUES
                                ('".$this->con->escapeStr($doc_id)."',
                                 '".$this->con->escapeStr($category_id)."',
                                 '".$this->con->escapeStr($content)."')");
    }

    /** get a reference from the database.

        @return array  reference( category_id => ...., content => ....)
        @param  string id
    */
    function getReference($doc_id)
    {
        $ref = array();
        $rs = $this->con->select("SELECT * FROM nb_references WHERE id='".$this->con->escapeStr($doc_id)."'");
        if ($rs->isEmpty()) return $ref;
        $ref['category_id'] = $rs->f('category_id');
        $ref['content'] = $rs->f('content');
        $ref['id'] = $rs->f('id');
        return $ref;
    }

    /** remove a reference from the database

        @return bool sucess
        @param  string reference id
    */
    function removeReference($doc_id)
    {
        return $this->con->execute("DELETE FROM nb_references WHERE id='".$this->con->escapeStr($doc_id)."'");
    }



}

?>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -