⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 semisupem.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */package weka.classifiers.bayes;import weka.classifiers.*;import weka.classifiers.sparse.*;import java.io.*;import java.util.*;import weka.core.*;/** * Semi supervised learner that uses EM initialized with labeled data and then * runs EM iterations on the unlabeled data to improve the model. * * See: Kamal Nigam, Andrew McCallum, Sebastian Thrun and Tom * Mitchell. Text Classification from Labeled and Unlabeled Documents * using EM. Machine Learning, 39(2/3). pp. 103-134. 2000. * * Assumes use of a base classifier that is a SoftClassifer that * accepts training data with a soft class distribution rather than * a hard assignment, i.e. SoftClassifiedInstances. Sample soft * classifiers are NaiveBayesSimpleSoft and NaiveBayesSimpleSparseSoft * * @author Ray Mooney  (mooney@cs.utexas.edu)*/public class SemiSupEM extends DistributionClassifier implements SemiSupClassifier, OptionHandler{    /** Original set of unlabeled Instances */    protected Instances m_UnlabeledData;    /** Soft labeled version of unlabeled data */    protected SoftClassifiedInstances m_UnlabeledInstances;    /** Hard Labeled data */    protected Instances m_LabeledInstances;    /** Complete set of labeled and unlabeled instances for EM */    protected SoftClassifiedInstances m_AllInstances;    /** Base classifier that supports soft classified instances */    protected SoftClassifier m_Classifier = new NaiveBayesSimpleSoft();    /** Weight of unlabeled examples during EM training versus labeled examples (see Nigam et al.)*/    protected double m_Lambda = 1.0;    /** random numbers and seed */    protected Random m_Random;    protected int m_rseed;        /** maximum iterations to perform */    protected int m_max_iterations;    /** Create soft labeled Seed for unseen classes */    protected boolean m_seedUnseenClasses;    /** Verbose? */    protected boolean m_verbose;    protected static double m_minLogLikelihoodIncr = 1e-6;    /** The minimum values for numeric attributes. */    protected double [] m_MinArray;        /** The maximum values for numeric attributes. */    protected double [] m_MaxArray;    /**     * Returns a string describing this clusterer     * @return a description of the evaluator suitable for     * displaying in the explorer/experimenter gui     */    public String globalInfo() {	return "Classifier trained using both labeled and unlabeled data using EM";    }    /**     * Returns an enumeration describing the available options.. <p>     *     * Valid options are:<p>     *     * -V <br>     * Verbose. <p>     *     * -I <max iterations> <br>     * Terminate after this many iterations if EM has not converged. <p>     *     * -S <seed> <br>     * Specify random number seed. <p>     *     * -M <num> <br>     *  Set the minimum allowable standard deviation for normal density      * calculation. <p>     *     * @return an enumeration of all the available options.     *     **/    public Enumeration listOptions () {	Vector newVector = new Vector(7);	newVector.addElement(new Option(					"\tFull name of classifier to boost.\n"					+"\teg: weka.classifiers.bayes.NaiveBayes",					"W", 1, "-W <class name>"));	newVector.addElement(new Option("\tLambda weight for unlabeled data.\n(default 1)", "L"					, 1, "-L <num>"));	newVector.addElement(new Option("\tmax iterations.\n(default 100)", "I"					, 1, "-I <num>"));	newVector.addElement(new Option("\trandom number seed.\n(default 1)"					, "S", 1, "-S <num>"));	newVector.addElement(new Option("\tverbose.", "V", 0, "-V"));	newVector.addElement(new Option("\tSeed unseen classes.", "U", 0, "-U"));	if ((m_Classifier != null) &&	    (m_Classifier instanceof OptionHandler)) {	    newVector.addElement(new Option(					    "",					    "", 0, "\nOptions specific to classifier "					    + m_Classifier.getClass().getName() + ":"));	    Enumeration enum = ((OptionHandler)m_Classifier).listOptions();	    while (enum.hasMoreElements()) {		newVector.addElement(enum.nextElement());	    }	}	return  newVector.elements();    }    /**     * Parses a given list of options.     * @param options the list of options as an array of strings     * @exception Exception if an option is not supported     *     **/    public void setOptions (String[] options)	throws Exception    {	resetOptions();	String classifierName = Utils.getOption('W', options);	if (classifierName.length() == 0) {	    throw new Exception("A classifier must be specified with"				+ " the -W option.");	}	setClassifier((SoftClassifier)Classifier.forName(classifierName,							 Utils.partitionOptions(options)));	setDebug(Utils.getFlag('V', options));	setSeedUnseenClasses(Utils.getFlag('U', options));	String optionString = Utils.getOption('I', options);	if (optionString.length() != 0) {	    setMaxIterations(Integer.parseInt(optionString));	}		optionString = Utils.getOption('S', options);	if (optionString.length() != 0) {	    setSeed(Integer.parseInt(optionString));	}	optionString = Utils.getOption('L', options);	if (optionString.length() != 0) {	    setLambda(Double.parseDouble(optionString));	}    }    /**     * Reset to default options     */    protected void resetOptions () {	m_max_iterations = 100;	m_rseed = 100;	m_verbose = false;	m_seedUnseenClasses = false;	m_Classifier = new NaiveBayesSimpleSoft();	m_Lambda = 1.0;    }    /**     * Returns the tip text for this property     * @return tip text for this property suitable for     * displaying in the explorer/experimenter gui     */    public String seedTipText() {	return "random number seed";    }    /**     * Set the random number seed     *     * @param s the seed     */    public void setSeed (int s) {	m_rseed = s;    }    /**     * Get the random number seed     *     * @return the seed     */    public int getSeed () {	return  m_rseed;    }    /**     * Returns the tip text for this property     * @return tip text for this property suitable for     * displaying in the explorer/experimenter gui     */    public String maxIterationsTipText() {	return "maximum number of EM iterations";    }    /**     * Set the maximum number of iterations to perform     *     * @param i the number of iterations     * @exception Exception if i is less than 1     */    public void setMaxIterations (int i)	throws Exception    {	if (i < 1) {	    throw  new Exception("Maximum number of iterations must be > 0!");	}	m_max_iterations = i;    }    /**     * Get the maximum number of iterations     *     * @return the number of iterations     */    public int getMaxIterations () {	return  m_max_iterations;    }    /**     * Set debug mode - verbose output     *     * @param v true for verbose output     */    public void setDebug (boolean v) {	m_verbose = v;    }    /**     * Get debug mode     *     * @return true if debug mode is set     */    public boolean getDebug () {	return  m_verbose;    }    public void setSeedUnseenClasses (boolean v) {	m_seedUnseenClasses = v;    }    public boolean getSeedUnseenClasses () {	return m_seedUnseenClasses;    }    public String seedUnseenClassesTipText() {	return "create soft seeds for unseen classes using farthest-first";    }    public void setLambda (double v) {	m_Lambda = v;    }    public double getLambda () {	return m_Lambda;    }    public String lambdaTipText() {	return "set weight of unlabeled examples vs. labeled";    }    /**     * Set the classifier for boosting.      *     * @param newClassifier the Classifier to use.     */    public void setClassifier(SoftClassifier newClassifier) {	m_Classifier = newClassifier;    }    /**     * Get the classifier used as the classifier     *     * @return the classifier used as the classifier     */    public SoftClassifier getClassifier() {	return m_Classifier;    }    public String classifierTipText() {	return "Base SoftClassifier to use for underlying probabilistic classification";    }    /**     * Gets the current settings of EM.     *     * @return an array of strings suitable for passing to setOptions()     */    public String[] getOptions () {	String [] classifierOptions = new String [0];	if ((m_Classifier != null) && 	    (m_Classifier instanceof OptionHandler)) {	    classifierOptions = ((OptionHandler)m_Classifier).getOptions();	}		String [] options = new String [classifierOptions.length + 10];	int current = 0;	if (m_verbose) {	    options[current++] = "-V";	}	if (m_seedUnseenClasses) {	    options[current++] = "-U";	}	options[current++] = "-I";	options[current++] = "" + m_max_iterations;	options[current++] = "-S";	options[current++] = "" + m_rseed;	options[current++] = "-L";	options[current++] = "" + m_Lambda;	if (getClassifier() != null) {	    options[current++] = "-W";	    options[current++] = getClassifier().getClass().getName();	}	options[current++] = "--";		System.arraycopy(classifierOptions, 0, options, current, 			 classifierOptions.length);	current += classifierOptions.length;		while (current < options.length) {	    options[current++] = "";	}	return  options;    }    /**      * Provide unlabeled data to the classifier.     * @unlabeled the unlabeled Instances     */    public void setUnlabeled(Instances unlabeled){	m_UnlabeledData = unlabeled;    }    /** Simple constructor, must set options using command line or GUI */    public SemiSupEM() {	resetOptions();    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -