📄 bayesianclassifiertest.java
字号:
/*
* ====================================================================
*
* The Apache Software License, Version 1.1
*
* Copyright (c) 2003 Nick Lothian. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution, if
* any, must include the following acknowlegement:
* "This product includes software developed by the
* developers of Classifier4J (http://classifier4j.sf.net/)."
* Alternately, this acknowlegement may appear in the software itself,
* if and wherever such third-party acknowlegements normally appear.
*
* 4. The name "Classifier4J" must not be used to endorse or promote
* products derived from this software without prior written
* permission. For written permission, please contact
* http://sourceforge.net/users/nicklothian/.
*
* 5. Products derived from this software may not be called
* "Classifier4J", nor may "Classifier4J" appear in their names
* without prior written permission. For written permission, please
* contact http://sourceforge.net/users/nicklothian/.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*/
package net.sf.classifier4J.bayesian;
import junit.framework.TestCase;
import junit.textui.TestRunner;
import net.sf.classifier4J.DefaultStopWordsProvider;
import net.sf.classifier4J.DefaultTokenizer;
import net.sf.classifier4J.ICategorisedClassifier;
import net.sf.classifier4J.IClassifier;
import net.sf.classifier4J.IStopWordProvider;
import net.sf.classifier4J.ITokenizer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/*
* @author Nick Lothian
* @author Peter Leschev
*/
public class BayesianClassifierTest extends TestCase {
private Log log = LogFactory.getLog(this.getClass());
public BayesianClassifierTest(String name) {
super(name);
}
public void testClassify() throws Exception {
SimpleWordsDataSource wds = new SimpleWordsDataSource();
BayesianClassifier classifier = new BayesianClassifier(wds);
String sentence[] = { "This", "is", "a", "sentence", "about", "java" };
assertEquals(IClassifier.NEUTRAL_PROBABILITY, classifier.classify(ICategorisedClassifier.DEFAULT_CATEGORY, sentence), 0d);
wds.setWordProbability(new WordProbability("This", 0.5d));
wds.setWordProbability(new WordProbability("is", 0.5d));
wds.setWordProbability(new WordProbability("a", 0.5d));
wds.setWordProbability(new WordProbability("sentence", 0.2d));
wds.setWordProbability(new WordProbability("about", 0.5d));
wds.setWordProbability(new WordProbability("java", 0.99d));
assertEquals(0.96d, classifier.classify(ICategorisedClassifier.DEFAULT_CATEGORY, sentence), 0.009d);
}
public void testTeaching() throws Exception {
BayesianClassifier classifier = new BayesianClassifier();
String sentence1[] = {"The", "menu", "tag", "library", "manages", "the",
"complex", "process", "of", "creating", "menus", "in",
"JavaScript", "The", "menu", "tag", "itself", "is",
"an", "abstract", "class", "that", "extends", "the",
"TagSupport", "class", "and", "overrides", "the",
"doStartTag", "and", "doEndTag", "methods.", "The",
"getMenu", "method,", "which", "is", "a", "template",
"method", "and", "should", "be", "overridden", "in",
"the", "subclasses,", "provides", "JavaScript", "to",
"add", "menu", "items", "in", "the", "menu",
"structure", "created", "in", "the", "doStartTag",
"method", "Subclasses", "of", "the", "menu", "tag",
"override", "the", "getMenu", "method,", "which",
"uses", "menu", "builders", "to", "render", "menu",
"data", "from", "the", "data", "source"};
String sentence2[] = {"I", "witness", "a", "more", "subtle",
"demonstration", "of", "real", "time", "physics",
"simulation", "at", "the", "tiny", "Palo", "Alto",
"office", "of", "Havok", "a", "competing", "physics",
"engine", "shop", "On", "the", "screen", "a",
"computer", "generated", "sailboat", "floats", "in",
"a", "stone", "lined", "pool", "of", "water", "The",
"company's", "genial", "Irish", "born", "cofounder",
"Hugh", "Reynolds", "shows", "me", "how", "to",
"push", "the", "boat", "with", "a", "mouse", "When",
"I", "nudge", "it", "air", "fills", "the", "sail",
"causing", "the", "ship", "to", "tilt", "leeward",
"Ripples", "in", "the", "water", "deflect", "off",
"the", "stones", "intersecting", "with", "one",
"another", "I", "urge", "the", "boat", "onward",
"and", "it", "glides", "effortlessly", "into", "the",
"wall", "Reynolds", "tosses", "in", "a", "handful",
"of", "virtual", "coins", "they", "spin", "through",
"the", "air,", "splash", "into", "the", "water,",
"and", "sink"};
String sentence3[] = {"The", "New", "Input", "Output", "NIO", "libraries",
"introduced", "in", "Java", "2", "Platform",
"Standard", "Edition", "J2SE", "1.4", "address",
"this", "problem", "NIO", "uses", "a", "buffer",
"oriented", "model", "That", "is", "NIO", "deals",
"with", "data", "primarily", "in", "large", "blocks",
"This", "eliminates", "the", "overhead", "caused",
"by", "the", "stream", "model", "and", "even", "makes",
"use", "of", "OS", "level", "facilities", "where",
"possible", "to", "maximize", "throughput"};
String sentence4[] = {"As", "governments", "scramble", "to", "contain",
"SARS", "the", "World", "Health", "Organisation",
"said", "it", "was", "extending", "the", "scope", "of",
"its", "April", "2", "travel", "alert", "to",
"include", "Beijing", "and", "the", "northern",
"Chinese", "province", "of", "Shanxi", "together",
"with", "Toronto", "the", "epicentre", "of", "the",
"SARS", "outbreak", "in", "Canada"};
String sentence5[] = {"That", "was", "our", "worst", "problem", "I",
"tried", "to", "see", "it", "the", "XP", "way", "Well",
"what", "we", "can", "do", "is", "implement",
"something", "I", "can't", "give", "any", "guarantees",
"as", "to", "how", "much", "of", "it", "will", "be",
"implemented", "in", "a", "month", "I", "won't",
"even", "hazard", "a", "guess", "as", "to", "how",
"long", "it", "would", "take", "to", "implement", "as",
"a", "whole", "I", "can't", "draw", "UML", "diagrams",
"for", "it", "or", "write", "technical", "specs",
"that", "would", "take", "time", "from", "coding",
"it", "which", "we", "can't", "afford", "Oh", "and",
"I", "have", "two", "kids", "I", "can't", "do", "much",
"OverTime", "But", "I", "should", "be", "able", "to",
"do", "something", "simple", "that", "will", "have",
"very", "few", "bugs", "and", "show", "a", "working",
"program", "early", "and", "often"};
classifier.teachMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence1);
classifier.teachNonMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence2);
classifier.teachMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence3);
classifier.teachNonMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence4);
classifier.teachMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence5);
assertTrue(classifier.isMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence1));
assertTrue(!classifier.isMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence2));
assertTrue(classifier.isMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence3));
assertTrue(!classifier.isMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence4));
assertTrue(classifier.isMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence5));
}
public void testGetWordsDataSource() throws Exception {
SimpleWordsDataSource wds = new SimpleWordsDataSource();
BayesianClassifier classifier = new BayesianClassifier(wds);
assertEquals(wds, classifier.getWordsDataSource());
}
public void testGetTokenizer() throws Exception {
SimpleWordsDataSource wds = new SimpleWordsDataSource();
ITokenizer tokenizer = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
BayesianClassifier classifier = new BayesianClassifier(wds, tokenizer);
assertEquals(tokenizer, classifier.getTokenizer());
}
public void testGetStopWordProvider() throws Exception {
SimpleWordsDataSource wds = new SimpleWordsDataSource();
ITokenizer tokenizer = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
IStopWordProvider stopWordProvider = new DefaultStopWordsProvider();
BayesianClassifier classifier = new BayesianClassifier(wds, tokenizer, stopWordProvider);
assertEquals(stopWordProvider, classifier.getStopWordProvider());
}
public void testCaseSensitive() throws Exception {
BayesianClassifier classifier = new BayesianClassifier();
assertFalse(classifier.isCaseSensitive()); // case insensitive by default;
classifier.setCaseSensitive(true);
assertTrue(classifier.isCaseSensitive());
}
public void testTransformWord() throws Exception {
BayesianClassifier classifier = new BayesianClassifier();
assertFalse(classifier.isCaseSensitive());
String word = null;
try {
classifier.transformWord(word);
fail("No exception thrown when null passed");
} catch (IllegalArgumentException e) {
// do nothing - this should be thrown
}
word = "myWord";
assertEquals(word.toLowerCase(), classifier.transformWord(word));
classifier.setCaseSensitive(true);
assertNotSame(word.toLowerCase(), classifier.transformWord(word));
assertEquals(word, classifier.transformWord(word));
}
public void testCalculateOverallProbability() throws Exception {
double prob = 0.3d;
WordProbability wp1 = new WordProbability("myWord1", prob);
WordProbability wp2 = new WordProbability("myWord2", prob);
WordProbability wp3 = new WordProbability("myWord3", prob);
WordProbability[] wps = {wp1, wp2, wp3};
double errorMargin = 0.0001d;
double xy = (prob * prob * prob);
double z = (1-prob)*(1-prob)*(1-prob);
double result = xy/(xy + z);
BayesianClassifier classifier = new BayesianClassifier();
assertEquals(result, classifier.calculateOverallProbability(wps), errorMargin);
}
public static void main(String[] args) throws Exception {
TestRunner.run(BayesianClassifierTest.class);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -