⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 naivebayessimplesparse.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    NaiveBayesSimpleSparse.java *    Copyright (C) 1999 Eibe Frank *    Adapted for SparseInstance's by Mikhail Bilenko  2002 * */package weka.classifiers.sparse;import weka.classifiers.Classifier;import weka.classifiers.DistributionClassifier;import weka.classifiers.Evaluation;import weka.classifiers.bayes.NaiveBayesSimple;import java.io.*;import java.util.*;import weka.core.*;/** * Class for building and using a simple Naive Bayes classifier that is * adapted for Sparse Instances assuming attribute values are counts of the * presence of a descriptive token (e.g. frequency of a word in text categorization)  * and assuming a multinomial model for generation of examples/documents. * See: *  T. Mitchell, Machine Learning, McGraw Hill, 1997, section 6.9 & 6.10 *  and/or *  Andrew McCallum and Kamal Nigam, "A Comparison of Event Models for Naive Bayes Text *    Classification", Papers from the AAAI-98 Workshop on Text Categorization, 1998,  *    pp. 41--48 * * @author Eibe Frank (eibe@cs.waikato.ac.nz), Mikhail Bilenko (mbilenko@cs.utexas.edu), *         Ray Mooney (mooney@cs.utexas.edu*/public class NaiveBayesSimpleSparse extends DistributionClassifierimplements OptionHandler,  WeightedInstancesHandler {    /** The prior probabilities of the classes. */    protected double [] m_priors;    /** Conditional probabilities of each attribute given each class */    protected double[][] m_condProbs;    /** The instances used for training. */    protected Instances m_instances;    /** The number of classes */    protected int m_numClasses;    /** Attribute index for class attribute */    protected int m_classIndex;    /** The total number of features */    protected int m_numAttributes;     /** m parameter for Laplace m estimate, corresponding to size of pseudosample */    protected double m_m = 1.0;    /** A debug flag */    protected boolean m_debug = false;    /**     * Generates the classifier.     *     * @param instances set of instances serving as training data      * @exception Exception if the classifier has not been generated successfully     */    public void buildClassifier(Instances instances) throws Exception {	if (instances.checkForStringAttributes()) {	    throw new UnsupportedAttributeTypeException("Sparse Instances are optimized for non-string attributes!");	}	if (instances.checkForNominalAttributes()) {	    throw new UnsupportedAttributeTypeException("Sparse Instances are optimized for non-nominal attributes!");	}	if (instances.classAttribute().isNumeric()) {	    throw new UnsupportedClassTypeException("Sparse Naive Bayes: Class is numeric!");	}	if (m_debug) {	    System.out.println("Training on " + instances.numInstances() + " instances");	} 	m_instances = instances;	m_classIndex = instances.classIndex();	m_numClasses = instances.numClasses();	m_numAttributes = instances.numAttributes();	int numTrainingInstances = 0;	// Reserve space	m_priors = new double[m_numClasses];	m_condProbs = new double[m_numAttributes][m_numClasses];	double[] totalCounts = new double[m_numClasses];    // stores total count of all tokens in each category	// Compute counts and sums	Enumeration enumInsts = instances.enumerateInstances();	while (enumInsts.hasMoreElements()) {	    SparseInstance instance = (SparseInstance) enumInsts.nextElement();	    int classIdx = (int)instance.classValue();	    if (!instance.classIsMissing()) {		for (int i = 0; i < instance.numValues(); i++) {		    int attrIdx = instance.index(i);		    if (attrIdx == m_classIndex) continue;		    double value = instance.valueSparse(i);		    if (Instance.isMissingValue(value))			throw new NoSupportForMissingValuesException("Sparse instance should not have missing value");		    // Get the array of counts per value per class		    double incr = value * instance.weight();		    m_condProbs[attrIdx][classIdx] += incr;		    totalCounts[classIdx] += incr;		}		m_priors[classIdx] += instance.weight();		numTrainingInstances += instance.weight();	    }	}	// Compute log probabilities for each attribute	for (int i = 0; i < m_numAttributes; i++) {	    if (i == m_classIndex) continue;	    double[] countArray = m_condProbs[i];	    for(int j = 0; j < m_numClasses; j++){		// Laplace smoothing		countArray[j] = Math.log((countArray[j] + (m_m / m_numAttributes))/(totalCounts[j]+ m_m));	    }	}    	// Calculate priors	for (int i = 0; i < m_numClasses; i++) {		m_priors[i] = Math.log((m_priors[i] + (m_m / m_numClasses)) / (numTrainingInstances + m_m));	}	if (m_debug) {	    System.out.print("Priors: [");	    for (int i = 0; i < m_priors.length; i++) 		System.out.print(m_priors[i] + "(" + Math.exp(m_priors[i]) + ") ");	    System.out.println("]");	}    }    /**     * Calculates the class membership probabilities for the given test instance.     *     * @param instance the instance to be classified - must a SparseInstance     * @return predicted class probability distribution     * @exception Exception if distribution can't be computed or if the instance is not a SparseInstance     */    public double[] unNormalizedDistributionForInstance(Instance _instance) throws Exception {	if (! (_instance instanceof SparseInstance)) {	    throw new Exception ("NaiveBayesSimpleSparse works only with SparseInstance's!");	}	SparseInstance instance = (SparseInstance) _instance;		double [] probs = (double[]) m_priors.clone();	for (int i = 0; i < instance.numValues(); i++) {	    int attrIdx = instance.index(i);	    if (attrIdx == m_classIndex) continue;	    double value = instance.valueSparse(i);	    double[] condProb = m_condProbs[attrIdx];	    for (int j = 0; j < m_numClasses; j++) {		probs[j] += value * condProb[j];   	    }	}	if (m_debug) {	    System.out.print("Computed class probabilities:\n[ ");	    for (int i = 0; i < probs.length; i++) 		System.out.print(probs[i] + "(" + Math.exp(probs[i]) + ") ");	    System.out.println("]");	}	return probs;    }    /**     * Calculates the class membership probabilities for the given test instance.     *     * @param instance the instance to be classified     * @return predicted class probability distribution     * @exception Exception if distribution can't be computed     */    public double[] distributionForInstance(Instance instance) throws Exception {	double[] logProbs = unNormalizedDistributionForInstance(instance);	NaiveBayesSimple.normalizeLogs(logProbs);	return logProbs;    }    /** Get Laplace m parameter that controls amouont of smoothing */    public double getM () {	return m_m;    }    /** Set Laplace m parameter that controls amouont of smoothing */    public void setM(double m) {	m_m = m;    }    public String mTipText() {	return "set amount of smoothing (m in m-estimate)";    }    public String globalInfo() {	return "NaiveBayes for sparse instances (e.g.text) using a multinomial model";    }    /**     * Returns a description of the classifier.     *     * @return a description of the classifier as a string.     */    public String toString() {	if (m_instances == null) {	    return "Sparse Naive Bayes: No model built yet.";	}	try {	    StringBuffer text = new StringBuffer("Sparse Naive Bayes:\n");	    text.append("Prior class probabilities:\n");	    for (int i = 0; i < m_priors.length; i++) {		text.append(Utils.doubleToString(m_priors[i], 10, 8) + "(" + Utils.doubleToString(Math.exp(m_priors[i]), 10, 8) + ")\t");	    }		    	    // Only print out probabilities for each attribute in debug mode	    if (m_debug) { 		// Go through all attributes, printing out their conditional probabilities for all classes		for (int i = 0; i < m_numAttributes; i++) {		    if (i == m_classIndex) continue;		    double[] condProb = m_condProbs[i];		    Attribute attribute = m_instances.attribute(i);		    text.append("Attribute " + attribute.name() + ": ");		    text.append("[ ");		    for (int k = 0; k < m_numClasses; k++) {			text.append(Utils.doubleToString(condProb[k], 10, 8) + "\t");		    }		    text.append(" ]\n");		    text.append("\n");		}	    }	    return text.toString();	} catch (Exception e) {	    e.printStackTrace();	    return new String("Can't print Sparse Naive Bayes classifier: " + e);	}    }    /**     * Parses a given list of options. Valid options are:<p>     *     * -M num <br>     * Set amount of Laplace m estimate smoothing (size of pseudo sample)     *     * @param options the list of options as an array of strings     * @exception Exception if an option is not supported     */    public void setOptions(String[] options) throws Exception {	String mString = Utils.getOption('M', options);	if (mString.length() != 0) {	    setM(Double.parseDouble(mString));	}    }    /**     * Gets the current settings of NaiveBayesSimpleSparse.     *     * @return an array of strings suitable for passing to setOptions()     */    public String [] getOptions() {	String[] options = new String [2];	options[0] = "-M";	options[1] = "" + getM();	return options;    }    /**     * Returns an enumeration describing the available options.     *     * @return an enumeration of all the available options.     */    public Enumeration listOptions() {	Vector newVector = new Vector(1);	newVector.addElement(new Option(					"\tM: Controls amount of Laplace smoothing " +					"\t(Default = 1)",					"M", 1,"-M <value>"));	return newVector.elements();    }        /**     * Main method for testing this class.     *     * @param argv the options     */    public static void main(String [] argv) {	Classifier scheme;	try {	    scheme = new NaiveBayesSimpleSparse();	    System.out.println(Evaluation.evaluateModel(scheme, argv));	} catch (Exception e) {	    System.err.println(e.getMessage());	}    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -