📄 clusterevaluation.java

📁 数据挖掘中聚类的算法
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
      clusterer.buildClusterer(train);      test = data.testCV(numFolds, i);            for (int j = 0; j < test.numInstances(); j++) {	try {	  foldAv += ((DensityBasedClusterer)clusterer).	    logDensityForInstance(test.instance(j));	  //	  sumOW += test.instance(j).weight();	  //	double temp = Utils.sum(tempDist);	} catch (Exception ex) {	  // unclustered instances	}      }    }       //    return foldAv / sumOW;    return foldAv / data.numInstances();  }  /**   * Performs a cross-validation    * for a DensityBasedClusterer clusterer on a set of instances.   *   * @param clustererString a string naming the class of the clusterer   * @param data the data on which the cross-validation is to be    * performed    * @param numFolds the number of folds for the cross-validation   * @param options the options to the clusterer   * @param random a random number generator   * @return a string containing the cross validated log likelihood   * @throws Exception if a clusterer could not be generated    */  public static String crossValidateModel (String clustererString, 					   Instances data, 					   int numFolds, 					   String[] options,					   Random random)    throws Exception {    Clusterer clusterer = null;    String[] savedOptions = null;    double CvAv = 0.0;    StringBuffer CvString = new StringBuffer();    if (options != null) {      savedOptions = new String[options.length];    }    data = new Instances(data);    // create clusterer    try {      clusterer = (Clusterer)Class.forName(clustererString).newInstance();    }    catch (Exception e) {      throw  new Exception("Can't find class with name " 			   + clustererString + '.');    }    if (!(clusterer instanceof DensityBasedClusterer)) {      throw  new Exception(clustererString 			   + " must be a distrinbution " 			   + "clusterer.");    }    // Save options    if (options != null) {      System.arraycopy(options, 0, savedOptions, 0, options.length);    }    // Parse options    if (clusterer instanceof OptionHandler) {      try {	((OptionHandler)clusterer).setOptions(savedOptions);	Utils.checkForRemainingOptions(savedOptions);      }      catch (Exception e) {	throw  new Exception("Can't parse given options in " 			     + "cross-validation!");      }    }    CvAv = crossValidateModel((DensityBasedClusterer)clusterer, data, numFolds, random);    CvString.append("\n" + numFolds 		    + " fold CV Log Likelihood: " 		    + Utils.doubleToString(CvAv, 6, 4) 		    + "\n");    return  CvString.toString();  }  // ===============  // Private methods  // ===============  /**   * Print the cluster statistics for either the training   * or the testing data.   *   * @param clusterer the clusterer to use for generating statistics.   * @param fileName the file to load   * @return a string containing cluster statistics.   * @throws Exception if statistics can't be generated.   */  private static String printClusterStats (Clusterer clusterer, 					   String fileName)    throws Exception {    StringBuffer text = new StringBuffer();    int i = 0;    int cnum;    double loglk = 0.0;    int cc = clusterer.numberOfClusters();    double[] instanceStats = new double[cc];    int unclusteredInstances = 0;    if (fileName.length() != 0) {      DataSource source = new DataSource(fileName);      Instances structure = source.getStructure();      Instance inst;      while (source.hasMoreElements(structure)) {	inst = source.nextElement(structure);	try {	  cnum = clusterer.clusterInstance(inst);	  if (clusterer instanceof DensityBasedClusterer) {	    loglk += ((DensityBasedClusterer)clusterer).	      logDensityForInstance(inst);	    //	    temp = Utils.sum(dist);	  }	  instanceStats[cnum]++;	}	catch (Exception e) {	  unclusteredInstances++;	}	i++;      }      /*      // count the actual number of used clusters      int count = 0;      for (i = 0; i < cc; i++) {	if (instanceStats[i] > 0) {	  count++;	}      }      if (count > 0) {	double[] tempStats = new double [count];	count=0;	for (i=0;i<cc;i++) {	  if (instanceStats[i] > 0) {	    tempStats[count++] = instanceStats[i];	}	}	instanceStats = tempStats;	cc = instanceStats.length;	} */      int clustFieldWidth = (int)((Math.log(cc)/Math.log(10))+1);      int numInstFieldWidth = (int)((Math.log(i)/Math.log(10))+1);      double sum = Utils.sum(instanceStats);      loglk /= sum;      text.append("Clustered Instances\n");      for (i = 0; i < cc; i++) {	if (instanceStats[i] > 0) {	  text.append(Utils.doubleToString((double)i, 					   clustFieldWidth, 0) 		      + "      " 		      + Utils.doubleToString(instanceStats[i], 					     numInstFieldWidth, 0) 		      + " (" 		    + Utils.doubleToString((instanceStats[i]/sum*100.0)					   , 3, 0) + "%)\n");	}      }      if (unclusteredInstances > 0) {	text.append("\nUnclustered Instances : "+unclusteredInstances);      }      if (clusterer instanceof DensityBasedClusterer) {	text.append("\n\nLog likelihood: " 		    + Utils.doubleToString(loglk, 1, 5) 		    + "\n");      }    }    return text.toString();  }  /**   * Print the cluster assignments for either the training   * or the testing data.   *   * @param clusterer the clusterer to use for cluster assignments   * @param trainFileName the train file   * @param testFileName an optional test file   * @param attributesToOutput the attributes to print   * @return a string containing the instance indexes and cluster assigns.   * @throws Exception if cluster assignments can't be printed   */  private static String printClusterings (Clusterer clusterer, String trainFileName,					  String testFileName, Range attributesToOutput)    throws Exception {    StringBuffer text = new StringBuffer();    int i = 0;    int cnum;    DataSource source = null;    Instance inst;    Instances structure;        if (testFileName.length() != 0)      source = new DataSource(testFileName);    else      source = new DataSource(trainFileName);        structure = source.getStructure();    while (source.hasMoreElements(structure)) {      inst = source.nextElement(structure);      try {	cnum = clusterer.clusterInstance(inst);		text.append(i + " " + cnum + " "	    + attributeValuesString(inst, attributesToOutput) + "\n");      }      catch (Exception e) {	/*	  throw  new Exception('\n' + "Unable to cluster instance\n" 	 + e.getMessage()); */	text.append(i + " Unclustered "	    + attributeValuesString(inst, attributesToOutput) + "\n");      }      i++;    }        return text.toString();  }  /**   * Builds a string listing the attribute values in a specified range of indices,   * separated by commas and enclosed in brackets.   *   * @param instance the instance to print the values from   * @param attRange the range of the attributes to list   * @return a string listing values of the attributes in the range   */  private static String attributeValuesString(Instance instance, Range attRange) {    StringBuffer text = new StringBuffer();    if (attRange != null) {      boolean firstOutput = true;      attRange.setUpper(instance.numAttributes() - 1);      for (int i=0; i<instance.numAttributes(); i++)	if (attRange.isInRange(i)) {	  if (firstOutput) text.append("(");	  else text.append(",");	  text.append(instance.toString(i));	  firstOutput = false;	}      if (!firstOutput) text.append(")");    }    return text.toString();  }  /**   * Make up the help string giving all the command line options   *   * @param clusterer the clusterer to include options for   * @return a string detailing the valid command line options   */  private static String makeOptionString (Clusterer clusterer) {    StringBuffer optionsText = new StringBuffer("");    // General options    optionsText.append("\n\nGeneral options:\n\n");    optionsText.append("-t <name of training file>\n");    optionsText.append("\tSets training file.\n");    optionsText.append("-T <name of test file>\n");    optionsText.append("\tSets test file.\n");    optionsText.append("-l <name of input file>\n");    optionsText.append("\tSets model input file.\n");    optionsText.append("-d <name of output file>\n");    optionsText.append("\tSets model output file.\n");    optionsText.append("-p <attribute range>\n");    optionsText.append("\tOutput predictions. Predictions are for " 		       + "training file" 		       + "\n\tif only training file is specified," 		       + "\n\totherwise predictions are for the test file."		       + "\n\tThe range specifies attribute values to be output"		       + "\n\twith the predictions. Use '-p 0' for none.\n");    optionsText.append("-x <number of folds>\n");    optionsText.append("\tOnly Distribution Clusterers can be cross validated.\n");    optionsText.append("-s <random number seed>\n");    optionsText.append("\tSets the seed for randomizing the data in cross-validation\n");    optionsText.append("-c <class index>\n");    optionsText.append("\tSet class attribute. If supplied, class is ignored");    optionsText.append("\n\tduring clustering but is used in a classes to");    optionsText.append("\n\tclusters evaluation.\n");    if (clusterer instanceof Drawable) {      optionsText.append("-g <name of graph file>\n");      optionsText.append("\tOutputs the graph representation of the clusterer to the file.\n");    }    // Get scheme-specific options    if (clusterer instanceof OptionHandler) {      optionsText.append("\nOptions specific to " 			 + clusterer.getClass().getName() + ":\n\n");      Enumeration enu = ((OptionHandler)clusterer).listOptions();      while (enu.hasMoreElements()) {	Option option = (Option)enu.nextElement();	optionsText.append(option.synopsis() + '\n');	optionsText.append(option.description() + "\n");      }    }    return  optionsText.toString();  }  /**   * Tests whether the current evaluation object is equal to another   * evaluation object   *   * @param obj the object to compare against   * @return true if the two objects are equal   */  public boolean equals(Object obj) {    if ((obj == null) || !(obj.getClass().equals(this.getClass())))      return false;        ClusterEvaluation cmp = (ClusterEvaluation) obj;        if ((m_classToCluster != null) != (cmp.m_classToCluster != null)) return false;    if (m_classToCluster != null) {      for (int i = 0; i < m_classToCluster.length; i++) {        if (m_classToCluster[i] != cmp.m_classToCluster[i])  	return false;      }    }        if ((m_clusterAssignments != null) != (cmp.m_clusterAssignments != null)) return false;    if (m_clusterAssignments != null) {      for (int i = 0; i < m_clusterAssignments.length; i++) {        if (m_clusterAssignments[i] != cmp.m_clusterAssignments[i])  	return false;      }    }    if (Double.isNaN(m_logL) != Double.isNaN(cmp.m_logL)) return false;    if (!Double.isNaN(m_logL)) {      if (m_logL != cmp.m_logL) return false;    }        if (m_numClusters != cmp.m_numClusters) return false;        // TODO: better comparison? via members?    String clusteringResults1 = m_clusteringResults.toString().replaceAll("Elapsed time.*", "");    String clusteringResults2 = cmp.m_clusteringResults.toString().replaceAll("Elapsed time.*", "");    if (!clusteringResults1.equals(clusteringResults2)) return false;        return true;  }  /**   * Main method for testing this class.   *   * @param args the options   */  public static void main (String[] args) {    try {      if (args.length == 0) {	throw  new Exception("The first argument must be the name of a " 			     + "clusterer");      }      String ClustererString = args[0];      args[0] = "";      Clusterer newClusterer = Clusterer.forName(ClustererString, null);      System.out.println(evaluateClusterer(newClusterer, args));    }    catch (Exception e) {      System.out.println(e.getMessage());    }  }}
上一页 1 23
💿 文件大小 124 K
👤 上传用户 wuseyue
📂 所属分类数学计算
🏷️ 相关标签

#数据挖掘 #聚类 #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -