📄 index.php
字号:
<?php
/*
***** BEGIN LICENSE BLOCK *****
This file is part of PHP Naive Bayesian Filter.
The Initial Developer of the Original Code is
Loic d'Anterroches [loic_at_xhtml.net].
Portions created by the Initial Developer are Copyright (C) 2003
the Initial Developer.
Contributor(s): J Wynia - English Translation
PHP Naive Bayesian Filter is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
PHP Naive Bayesian Filter is distributed in the hope that it will
be useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PHP Naive Bayesian Filter; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
***** END LICENSE BLOCK *****
*/
/*
This is a small example to give you an idea of how to use the class
You can create the necessary tables with the database definition file mysql.sql.
This file shouldn't be used on a live web server. It's just a proof of concept and example
to present the use of the filter. It doesn't have proper error handling or security...
*/
/* BEGIN CONFIGURATION */
$login = 'root';
$pass = '';
$db = 'bayes';
$server = 'localhost';
/* END CONFIGURATION */
include_once 'class.naivebayesian.php';
include_once 'class.naivebayesianstorage.php';
include_once 'class.mysql.php';
$nbs = new NaiveBayesianStorage($login, $pass, $server, $db);
$nb = new NaiveBayesian($nbs);
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<title>PHP Naive Bayesian Filter</title>
<style>
.success { font-weight: 600; color: #00CC00; }
.error { font-weight: 600; color: #CC0000; }
</style>
</head>
<body>
<h1>PHP Naive Bayesian Filter</h1>
<?php
switch ($_REQUEST['action']) {
case 'addcat':
addcat();
break;
case 'remcat':
remcat();
break;
case 'train':
train();
break;
case 'untrain':
untrain();
break;
case 'cat':
cat();
break;
}
function addcat()
{
global $_REQUEST, $login, $pass, $server, $db;
$cat = trim(strip_tags($_REQUEST['cat']));
$cat = strtr($cat, ' ', '');
if (strlen($cat) == 0) {
echo '<p class="error"><strong>Error:</strong> You must provide a category name.</p>';
} else {
$con = new Connection($login, $pass, $server, $db);
$con->execute("INSERT INTO nb_categories (category_id) VALUES ('".$con->escapeStr($cat)."')");
echo "<p class='success'>The category has been just added.</p>";
}
}
function remcat()
{
global $_REQUEST, $login, $pass, $server, $db, $nb;
$cat = trim(strip_tags($_REQUEST['cat']));
$cat = strtr($cat, ' ', '');
if (strlen($cat) == 0) {
echo '<p class="error"><strong>Error:</strong> You must provide a category name.</p>';
} else {
$con = new Connection($login, $pass, $server, $db);
$con->execute("DELETE FROM nb_categories WHERE category_id='".$con->escapeStr($cat)."'");
$con->execute("DELETE FROM nb_references WHERE category_id='".$con->escapeStr($cat)."'");
$con->execute("DELETE FROM nb_wordfreqs WHERE category_id='".$con->escapeStr($cat)."'");
$nb->updateProbabilities();
echo "<p class='success'>The category has been just removed.</p>";
}
}
function train()
{
global $_REQUEST, $login, $pass, $server, $db, $nb;
$docid = trim(strip_tags($_REQUEST['docid']));
$docid = strtr($docid, ' ', '');
if (strlen($docid) == 0) {
echo '<p class="error"><strong>Error:</strong> You must provide an indentifier for the document..</p>';
return;
}
$cat = trim(strip_tags($_REQUEST['cat']));
$cat = strtr($cat, ' ', '');
if (strlen($cat) == 0) {
echo '<p class="error"><strong>Error:</strong> You must give an identifier for the category.</p>';
return;
}
$doc = trim($_REQUEST['document']);
if (strlen($doc) == 0) {
echo '<p class="error"><strong>Error:</strong> You must provide a document.</p>';
return;
}
if ($nb->train($docid, $cat, $doc)) {
$nb->updateProbabilities();
echo "<p class='success'>The filter has been trained.</p>";
} else {
echo "<p class='error'>Error: Error training the filter.</p>";
}
}
function untrain()
{
global $_REQUEST, $login, $pass, $server, $db, $nb;
$docid = trim(strip_tags($_REQUEST['docid']));
$docid = strtr($docid, ' ', '');
if (strlen($docid) == 0) {
echo '<p class="error"><strong>Error:</strong> You must provide an identifier for the document.</p>';
return;
}
if ($nb->untrain($docid, $cat, $doc)) {
$nb->updateProbabilities();
echo "<p class='success'>The filter has been untrained.</p>";
} else {
echo "<p class='error'>Error: Problem untraining the filter</p>";
}
}
function cat()
{
global $_REQUEST, $login, $pass, $server, $db, $nb;
$doc = trim($_REQUEST['document']);
if (strlen($doc) == 0) {
echo '<p class="error"><strong>Error:</strong> You must supply a document.</p>';
return;
}
$scores = $nb->categorize($doc);
echo "<table><caption>Scores</caption>\n";
echo "<tr><th>Category</th><th>Score</th></tr>\n";
while(list($cat,$score) = each($scores)) {
echo "<tr><td>$cat</td><td>$score</td></tr>\n";
}
echo "</table>";
}
?>
<?php
$cats = $nbs->getCategories();
?>
<p>This file is an English translation of the sample that comes with the package. The translation of this file, the readme, etc. was done by the J Wynia at <a href="http://www.phpgeek.com">PHPGeek.com</a>, where this English version of the package is maintained. This file is only a sample to show you how to use the API. It provides a fairly easy way to set up your initial categories as well as start with the training and test out classifications. However, to get the most out of this package, you should integrate it into your own script that feeds it documents for training and classification. As the most common use for Bayesian classification right now is for spam filtering, that's the context that's used for the examples.</p>
<h2>Overview</h2>
<p> You need at least have two categories to be able to make a comparison. For example <strong>spam</strong > and <strong>nonspam</strong >. The identifiers for these categores must not have spaces and must contain only letters and numbers. </p > <p>Once your categories are created, you can train your filter. You will take a series of spam emails, will choose your <strong>spam</strong> category and train the filter (tell it that these documents are "spam"). You should also take some emalls which are not spam, choose your <strong>nonspam</strong > category and train the filter (tell it that these documents are "nonspam"). </p > <p> Now you can take any email randomly, and try to see whether it is a spam or if it is a nonspam. For that, use the function of categorization. The higher the score is, the greater chance your message belongs to this category. There is an automatic standard score, that gives 0 or 1 if you have only 2 categories. If you have questions, post them on <A href="http://www.xhtml.net/">xhtml.net</a >. </p >
<h2>Add a Category</h2>
<form action='index.php' method='POST'>
<fieldset>
<input type='hidden' name='action' value='addcat'/>
Category Name : <input type='text' name='cat' value='' />
<input type='submit' name='Submit' value='Add Category' />
</fieldset>
</form>
<h2>Train the Filter</h2>
<form action='index.php' method='POST'>
<fieldset>
<input type='hidden' name='action' value='train'/>
Document Identifier : <input type='text' name='docid' value='' /> (it must be unique)<br />
Document Category:
<select name='cat'>
<?php
reset($cats);
while(list($key,$val) = each($cats)) {
echo "<option value='$key'>$key</option>\n";
}
?>
</select>
<br />
Paste the document text :<br />
<textarea name="document" cols='50' rows='20'></textarea><br />
<input type='submit' name='Submit' value='Train Filter' />
</fieldset>
</form>
<h2>Determine category for a document</h2>
<form action='index.php' method='POST'>
<fieldset>
<input type='hidden' name='action' value='cat'/>
Paste the document text :<br />
<textarea name="document" cols='50' rows='20'></textarea><br />
<input type='submit' name='Submit' value='Determine Category' />
</fieldset>
</form>
<h2>Remove a category</h2>
<form action='index.php' method='POST'>
<fieldset>
<input type='hidden' name='action' value='remcat'/>
Category to remove :
<select name='cat'>
<?php
reset($cats);
while(list($key,$val) = each($cats)) {
echo "<option value='$key'>$key</option>\n";
}
?>
</select>
<input type='submit' name='Submit' value='Remove Category' />
</fieldset>
</form>
<h2>Remove a Document</h2>
<form action='index.php' method='POST'>
<fieldset>
<input type='hidden' name='action' value='untrain'/>
Document to remove :
<select name='docid'>
<?php
$con = new Connection($login, $pass, $server, $db);
$rs = $con->select("SELECT * FROM nb_references");
while (!$rs->EOF()) {
echo "<option value='".$rs->f('id')."'>".$rs->f('id')." - ".$rs->f('category_id')."</option>\n";
$rs->moveNext();
}
?>
</select>
<input type='submit' name='Submit' value='Remove Document' />
</fieldset>
</form>
<pre>
This file is part of PHP Naive Bayesian Filter.
The Initial Developer of the Original Code is
Loic d'Anterroches [loic xhtml.net].
Portions created by the Initial Developer are Copyright (C) 2003
the Initial Developer.
Contributor(s): J Wynia - English Translation
PHP Naive Bayesian Filter is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
PHP Naive Bayesian Filter is distributed in the hope that it will
be useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Foobar; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
</pre>
</body>
</html>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -