📄 .#semisupclustererevaluation.java.1.9

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 9
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    SemiSupClustererEvaluation.java *    Copyright (C) 2002 Sugato Basu, Misha Bilenko * */package  weka.clusterers;import  java.util.*;import  java.io.*;import  weka.core.*;import  weka.filters.Filter;import  weka.filters.unsupervised.attribute.Remove;/** * Class for evaluating clustering models - extends ClusterEvaluation.java<p> * Implements different clustering evaluation metrics * * @author   Sugato Basu, Misha Bilenko */public class SemiSupClustererEvaluation extends ClusterEvaluation {  /** Purity of the clustering */  protected double m_Purity;  /** Entropy of the clustering */  protected double m_Entropy;  /** Objective function of the clustering */  protected double m_Objective;  /** MI Metric the clustering */  protected double m_MIMetric;  /** KL Divergence of the clustering */  protected double m_KLDivergence;  /** The number of underlying classes */  protected int m_NumClasses;  /** The number of produced clusters */  protected int m_NumClusters;  /** All labeled training instances */  protected Instances m_LabeledTrain;  /** All unlabaled training instances */  protected Instances m_UnlabeledTrain;  /** All test instances */  protected Instances m_Test;  /** Training pairs */  protected ArrayList m_labeledTrainPairs;   /** The weight of all incorrectly categorized test instances. */  protected double m_WeightTestIncorrect;  /** The weight of all correctly categorized test instances. */  protected double m_WeightTestCorrect;  /** The weight of all uncategorized test instances. */  protected double m_WeightTestUnclassified;  /** The weight of test instances that had a class assigned to them. */  protected double m_WeightTestWithClass;  /** Array for storing the confusion matrix. */  protected double [][] m_ConfusionMatrix;  /** The names of the classes. */  protected String [] m_ClassNames;  /** Is the class nominal or numeric? */  protected boolean m_ClassIsNominal;  /** If the class is not nominal, we do not need the confusion matrix but do pairs counts directly */  protected int m_totalPairs;  protected int m_goodPairs;  protected int m_trueGoodPairs;    /** The total cost of predictions (includes instance weights) */  protected double m_TotalCost;  public String toSummaryString() {    return super.toString();  }  /**   * Returns a string describing this evaluator   * @return a description of the evaluator suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return " A clusterer evaluator that evaluates results of running a "      + "semi-supervised clustering algorithm.";  }  public SemiSupClustererEvaluation (Instances test, int numClasses, int numClusters) {    m_NumClasses = numClasses;    m_NumClusters = numClusters;    m_ClassIsNominal = test.classAttribute().isNominal();    if (m_ClassIsNominal) {      m_ConfusionMatrix = new double [m_NumClusters][m_NumClasses];      m_ClassNames = new String [m_NumClasses];      for(int i = 0; i < m_NumClasses; i++) {	m_ClassNames[i] = test.classAttribute().value(i);      }    }   }  public SemiSupClustererEvaluation (ArrayList labeledTrainPairs, Instances test, int numClasses, int numClusters) {    this (test,numClasses,numClusters);    m_labeledTrainPairs = labeledTrainPairs;  }    /**   * Evaluates the semi-sup clusterer on a given set of test instances   *   * @param clusterer semi-supervised clusterer    * @param testInstances set of test instances for evaluation   * @exception Exception if model could not be evaluated successfully   */  public void evaluateModel (Clusterer clusterer, Instances testInstances, Instances unlabeledTest) throws Exception {    if (m_ClassIsNominal) {       m_Test = testInstances;      m_Objective = ((SemiSupClusterer) clusterer).objectiveFunction(); // Assuming transductive clustering here ... will need to generalize in future      System.out.println("Evaluating cluster results ...");      for (int i = 0; i < unlabeledTest.numInstances(); i++) {	evaluateModelOnce(clusterer, unlabeledTest.instance(i), (int) (testInstances.instance(i)).classValue());      }    } else { // string-based class attributes      int numInstances = testInstances.numInstances();      Attribute classAttr = testInstances.classAttribute();      int [][] sharedClass = new int[numInstances][numInstances];      HashSet dontCareSet = new HashSet();      final int HAVE_SHARED_CLASS = 0;      final int NO_SHARED_CLASS = 1;      final int DONT_CARE = 2;       m_totalPairs = 0;      m_goodPairs = 0;      // calculate the number of true pairs      m_trueGoodPairs = 0;      HashSet [] classSets = new HashSet[numInstances];      for (int i = 0; i < numInstances; i++) {	  System.out.println("Classattr: " + classAttr);	String classList = testInstances.instance(i).stringValue(classAttr);	if (classList.length() != 0) { // skip unassigned instances	  // parse the list of classes into a hashset	  HashSet classSet = new HashSet();	  StringTokenizer tokenizer = new StringTokenizer(classList, "_");	  while (tokenizer.hasMoreTokens()) {	    classSet.add(tokenizer.nextToken());	  }	  classSets[i] = classSet;	  for (int j = 0; j < i; j++) {	    if (classSets[j] != null) { // skip unassigned instances	      HashSet prevSet = (HashSet) classSets[j];	      Iterator iterator = prevSet.iterator();	      boolean shareClass = false;	      // go through previously assigned instance's classes and see if current class list contains any	      while (iterator.hasNext() && !shareClass) {		String classString = (String) iterator.next();		if (classSet.contains(classString)) {		  shareClass = true;		} 	      }	      if (shareClass) {		m_trueGoodPairs++;		sharedClass[i][j] = sharedClass[j][i] = HAVE_SHARED_CLASS;	      } else {		sharedClass[i][j] = sharedClass[j][i] = NO_SHARED_CLASS;	      } 	    }	  }	} else { // all pairs with this instance are don't care	  dontCareSet.add(new Integer(i));	  for (int j = 0; j < numInstances; j++) {	    sharedClass[i][j] = sharedClass[j][i] = DONT_CARE;	  } 	}       }      // now cluster and evaluate precision      ArrayList[] classLists = new ArrayList[m_NumClasses];      for (int i = 0; i < classLists.length; i++) {	classLists[i] = new ArrayList();      }       for (int i = 0; i < unlabeledTest.numInstances(); i++) {	if (!dontCareSet.contains(new Integer(i))) { 	  int clusterIdx = clusterer.clusterInstance(unlabeledTest.instance(i));	  // go through all instances previously assigned to the same cluster and check whether they have common classes	  for (int j = 0; j < classLists[clusterIdx].size(); j++) {	    int sameClusterInstanceIdx = ((Integer) classLists[clusterIdx].get(j)).intValue();	    if (sharedClass[j][sameClusterInstanceIdx] == HAVE_SHARED_CLASS) {	      m_goodPairs++;	    } 	    m_totalPairs++; 	  }	  classLists[clusterIdx].add(new Integer(i));	}      }    }   }  /**   * Evaluates the semi-sup clusterer on a given test instance   *   * @param clusterer semi-supervised clusterer    * @param test test instance for evaluation   * @exception Exception if model could not be evaluated successfully   */  public void evaluateModelOnce (Clusterer clusterer, Instance testWithoutLabel, int classValue) throws Exception {    double [] pred;    if (m_ClassIsNominal) {      if (clusterer instanceof DistributionClusterer) {	pred = ((DistributionClusterer) clusterer).distributionForInstance(testWithoutLabel);      }      else {	pred = makeDistribution(clusterer.clusterInstance(testWithoutLabel));      }      updateStatsForClusterer(pred, classValue);    }  }  /**   * Convert a single prediction into a probability distribution   * with all zero probabilities except the predicted value which   * has probability 1.0;   *   * @param predictedClass the index of the predicted class   * @return the probability distribution   */  protected double [] makeDistribution(int predictedCluster) {    double [] result = new double [m_NumClasses];    if (m_ClassIsNominal) {      result[predictedCluster] = 1.0;    }     else {      result[0] = predictedCluster;    }    return result;  }   /**   * Updates all the statistics about a clusterer performance for    * the current test instance.   *   * @param distrib the probabilities assigned to each class   * @param test the test instance
12 下一页
💿 文件大小 12323 K
👤 上传用户 ilovexzhu
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#university #supervised #learning #wekaUT
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -