📄 .#semisupclusterersplitevaluator.java.1.11

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 11
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* *    SemiSupClustererSplitEvaluator.java *    Copyright (C) 2002 Sugato Basu * */package weka.experiment;import java.io.*;import java.util.*;import weka.core.*;import weka.clusterers.*;/** * A SplitEvaluator that produces results for a semi-supervised clustering scheme * on a nominal class attribute. * * -W clustername <br> * Specify the full class name of the clusterer to evaluate. <p> * * -C class index <br> * The index of the class for which statistics are to * be output. (default 1) <p> * * @author Sugato Basu */public class SemiSupClustererSplitEvaluator implements SplitEvaluator,   OptionHandler {    /** The semi-supervised clusterer used for evaluation */  protected Clusterer m_Clusterer = new MPCKMeans();  /** Holds the statistics for the most recent application of the clusterer */  protected String m_result = null;  /** The clusterer options (if any) */  protected String m_ClustererOptions = "";  /** The clusterer version */  protected String m_ClustererVersion = "";  /** The length of a key */  private static final int KEY_SIZE = 3;  /** The length of a result */  private static final int RESULT_SIZE = 14;  /** Class index for information retrieval statistics (default 0) */  private int m_IRclass = 0;  /**   * No args constructor.   */  public SemiSupClustererSplitEvaluator() {    updateOptions();  }  /** Does nothing, since cluster evaluation does not allow additional measures */  public void setAdditionalMeasures(String [] additionalMeasures){}    /**   * Returns a string describing this split evaluator   * @return a description of the split evaluator suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return " A SplitEvaluator that produces results for a semi-supervised "      + "clustering scheme on a nominal class attribute.";  }  /**   * Returns an enumeration describing the available options..   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector newVector = new Vector(2);    newVector.addElement(new Option(	     "\tThe full class name of the clusterer.\n"	      +"\teg: weka.clusterers.SimpleKMeans", 	     "W", 1, 	     "-W <class name>"));    newVector.addElement(new Option(	     "\tThe index of the class for which IR statistics\n" +	     "\tare to be output. (default 1)",	     "C", 1, 	     "-C <index>"));    if ((m_Clusterer != null) &&	(m_Clusterer instanceof OptionHandler)) {      newVector.addElement(new Option(	     "",	     "", 0, "\nOptions specific to clusterer "	     + m_Clusterer.getClass().getName() + ":"));      Enumeration enum = ((OptionHandler)m_Clusterer).listOptions();      while (enum.hasMoreElements()) {	newVector.addElement(enum.nextElement());      }    }    return newVector.elements();  }  /**   * Parses a given list of options. Valid options are:<p>   *   * -W classname <br>   * Specify the full class name of the clusterer to evaluate. <p>   *   * -C class index <br>   * The index of the class for which IR statistics are to   * be output. (default 1) <p>   *   * All option after -- will be passed to the clusterer.   *   * @param options the list of options as an array of strings   * @exception Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {        String cName = Utils.getOption('W', options);    if (cName.length() == 0) {      throw new Exception("A clusterer must be specified with"			  + " the -W option.");    }    // Do it first without options, so if an exception is thrown during    // the option setting, listOptions will contain options for the actual    // Clusterer.    setClusterer(Clusterer.forName(cName, null));    if (getClusterer() instanceof OptionHandler) {      ((OptionHandler) getClusterer())	.setOptions(Utils.partitionOptions(options));      updateOptions();    }    String indexName = Utils.getOption('C', options);    if (indexName.length() != 0) {      m_IRclass = (new Integer(indexName)).intValue() - 1;    } else {      m_IRclass = 0;    }  }  /**   * Gets the current settings of the Clusterer.   *   * @return an array of strings suitable for passing to setOptions   */  public String [] getOptions() {    String [] clustererOptions = new String [0];    if ((m_Clusterer != null) && 	(m_Clusterer instanceof OptionHandler)) {      clustererOptions = ((OptionHandler)m_Clusterer).getOptions();    }        String [] options = new String [clustererOptions.length + 5];    int current = 0;    if (getClusterer() != null) {      options[current++] = "-W";      options[current++] = getClusterer().getClass().getName();    }    options[current++] = "-C";     options[current++] = "" + (m_IRclass + 1);    options[current++] = "--";    System.arraycopy(clustererOptions, 0, options, current, 		     clustererOptions.length);    current += clustererOptions.length;    while (current < options.length) {      options[current++] = "";    }    return options;  }  /**   * Gets the data types of each of the key columns produced for a single run.   * The number of key fields must be constant   * for a given SplitEvaluator.   *   * @return an array containing objects of the type of each key column. The    * objects should be Strings, or Doubles.   */  public Object [] getKeyTypes() {    Object [] keyTypes = new Object[KEY_SIZE];    keyTypes[0] = "";    keyTypes[1] = "";    keyTypes[2] = "";    return keyTypes;  }  /**   * Gets the names of each of the key columns produced for a single run.   * The number of key fields must be constant   * for a given SplitEvaluator.   *   * @return an array containing the name of each key column   */  public String [] getKeyNames() {    String [] keyNames = new String[KEY_SIZE];    keyNames[0] = "Scheme";    keyNames[1] = "Scheme_options";    keyNames[2] = "Scheme_version_ID";    return keyNames;  }  /**   * Gets the key describing the current SplitEvaluator. For example   * This may contain the name of the clusterer used for clusterer   * predictive evaluation. The number of key fields must be constant   * for a given SplitEvaluator.   *   * @return an array of objects containing the key.   */  public Object [] getKey(){    Object [] key = new Object[KEY_SIZE];    key[0] = m_Clusterer.getClass().getName();    key[1] = m_ClustererOptions;    key[2] = m_ClustererVersion;    return key;  }  /**   * Gets the data types of each of the result columns produced for a    * single run. The number of result fields must be constant   * for a given SplitEvaluator.   *   * @return an array containing objects of the type of each result column.    * The objects should be Strings, or Doubles.   */  public Object [] getResultTypes() {    int overall_length = RESULT_SIZE;    Object [] resultTypes = new Object[overall_length];    Double doub = new Double(0);    int current = 0;        // Unsupervised stats: 3    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    // Supervised stats: 3    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    // Training data stats: 2      resultTypes[current++] = doub;    resultTypes[current++] = doub;    // IR stats: 3    resultTypes[current++] = doub;    resultTypes[current++] = doub;    resultTypes[current++] = doub;    // Timing stats: 2    resultTypes[current++] = doub;    resultTypes[current++] = doub;    // Clusterer defined extras: 1    resultTypes[current++] = "";    if (current != overall_length) {      throw new Error("ResultTypes didn't fit RESULT_SIZE");    }    return resultTypes;  }  /**   * Gets the names of each of the result columns produced for a single run.   * The number of result fields must be constant for a given SplitEvaluator.   *   * @return an array containing the name of each result column   */  public String [] getResultNames() {    int overall_length = RESULT_SIZE;    String [] resultNames = new String[overall_length];    int current = 0;    // Unsupervised stats: 3    resultNames[current++] = "Purity";    resultNames[current++] = "Entropy";    resultNames[current++] = "Objective_function";    // Supervised stats: 3    resultNames[current++] = "KL_divergence";    resultNames[current++] = "Mutual_information";    resultNames[current++] = "Supervised_dispersion";    // Training data stats: 2      resultNames[current++] = "SameClassPairs";    resultNames[current++] = "DiffClassPairs";    // IR stats: 3    resultNames[current++] = "Pairwise_ir_precision";    resultNames[current++] = "Pairwise_ir_recall";    resultNames[current++] = "Pairwise_f_measure";    // Timing stats: 2    resultNames[current++] = "Time_training";    resultNames[current++] = "Time_testing";    // Clusterer defined extras: 1    resultNames[current++] = "Summary";    if (current != overall_length) {      throw new Error("ResultNames didn't fit RESULT_SIZE");    }    return resultNames;  }  /** Dummy function, exists just for compatibility with SplitEvaluator interface   */  public Object [] getResult(Instances unlabeledTrain, Instances test) {    try {      return getResult(null, unlabeledTrain, test, test.numClasses(), -1); // labeled set is null    }    catch (Exception e) {      e.printStackTrace();    }    return null;  }  /**   * Gets the results for the supplied train and test datasets.   *   * @param labeledTrainPairs the constraint pairs having labels on them   * @param labeledTrain the labeled training Instances.   * @param unlabeledData the unlabeled training (+ test for transductive) Instances.   * @param test the testing Instances.   * @param startingIndexOfTest from where test data starts in unlabeledData, useful if clustering is transductive   * @return the results stored in an array. The objects stored in   * the array may be Strings, Doubles, or null (for the missing value).   * @exception Exception if a problem occurs while getting the results   */  public Object [] getResult(ArrayList labeledTrainPairs, Instances labeledTrain, Instances unlabeledData, Instances test, Instances unlabeledTest, HashMap homologHash) throws Exception{    if (m_Clusterer == null) {      throw new WekaException("No clusterer has been specified");    }    if (!(m_Clusterer instanceof SemiSupClusterer)) {      throw new WekaException("Clusterer should implement SemiSupClusterer interface!!\n"); // KLUGE (we could not make m_Clusterer of type SemiSupClusterer, since SemiSupClusterer is an interface and not an abstract class ... so we have to make the check here)    }    int overall_length = RESULT_SIZE;    Object [] result = new Object[overall_length];    long trainTimeStart = System.currentTimeMillis();    if (m_Clusterer instanceof PCKMeans) {      ((PCKMeans)m_Clusterer).buildClusterer(labeledTrainPairs, unlabeledData, labeledTrain, labeledTrain.numInstances()); // KLUGE: have to generalize later    } else if (m_Clusterer instanceof MPCKMeans) {      // Couldn't bypass it any more!!! Adding test to labeledTrain ...      if (test.instance(0) instanceof SparseInstance) {	for (int i=0; i<test.numInstances(); i++) {	  labeledTrain.add(new SparseInstance(test.instance(i)));	}      } else {	for (int i=0; i<test.numInstances(); i++) {	  labeledTrain.add(new Instance(test.instance(i)));
12 下一页
💿 文件大小 12323 K
👤 上传用户 ilovexzhu
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#university #supervised #learning #wekaUT
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -