📄 clusterevaluation.java
字号:
clusterer.buildClusterer(train); test = data.testCV(numFolds, i); for (int j = 0; j < test.numInstances(); j++) { try { foldAv += ((DensityBasedClusterer)clusterer). logDensityForInstance(test.instance(j)); // sumOW += test.instance(j).weight(); // double temp = Utils.sum(tempDist); } catch (Exception ex) { // unclustered instances } } } // return foldAv / sumOW; return foldAv / data.numInstances(); } /** * Performs a cross-validation * for a DensityBasedClusterer clusterer on a set of instances. * * @param clustererString a string naming the class of the clusterer * @param data the data on which the cross-validation is to be * performed * @param numFolds the number of folds for the cross-validation * @param options the options to the clusterer * @param random a random number generator * @return a string containing the cross validated log likelihood * @throws Exception if a clusterer could not be generated */ public static String crossValidateModel (String clustererString, Instances data, int numFolds, String[] options, Random random) throws Exception { Clusterer clusterer = null; String[] savedOptions = null; double CvAv = 0.0; StringBuffer CvString = new StringBuffer(); if (options != null) { savedOptions = new String[options.length]; } data = new Instances(data); // create clusterer try { clusterer = (Clusterer)Class.forName(clustererString).newInstance(); } catch (Exception e) { throw new Exception("Can't find class with name " + clustererString + '.'); } if (!(clusterer instanceof DensityBasedClusterer)) { throw new Exception(clustererString + " must be a distrinbution " + "clusterer."); } // Save options if (options != null) { System.arraycopy(options, 0, savedOptions, 0, options.length); } // Parse options if (clusterer instanceof OptionHandler) { try { ((OptionHandler)clusterer).setOptions(savedOptions); Utils.checkForRemainingOptions(savedOptions); } catch (Exception e) { throw new Exception("Can't parse given options in " + "cross-validation!"); } } CvAv = crossValidateModel((DensityBasedClusterer)clusterer, data, numFolds, random); CvString.append("\n" + numFolds + " fold CV Log Likelihood: " + Utils.doubleToString(CvAv, 6, 4) + "\n"); return CvString.toString(); } // =============== // Private methods // =============== /** * Print the cluster statistics for either the training * or the testing data. * * @param clusterer the clusterer to use for generating statistics. * @param fileName the file to load * @return a string containing cluster statistics. * @throws Exception if statistics can't be generated. */ private static String printClusterStats (Clusterer clusterer, String fileName) throws Exception { StringBuffer text = new StringBuffer(); int i = 0; int cnum; double loglk = 0.0; int cc = clusterer.numberOfClusters(); double[] instanceStats = new double[cc]; int unclusteredInstances = 0; if (fileName.length() != 0) { DataSource source = new DataSource(fileName); Instances structure = source.getStructure(); Instance inst; while (source.hasMoreElements(structure)) { inst = source.nextElement(structure); try { cnum = clusterer.clusterInstance(inst); if (clusterer instanceof DensityBasedClusterer) { loglk += ((DensityBasedClusterer)clusterer). logDensityForInstance(inst); // temp = Utils.sum(dist); } instanceStats[cnum]++; } catch (Exception e) { unclusteredInstances++; } i++; } /* // count the actual number of used clusters int count = 0; for (i = 0; i < cc; i++) { if (instanceStats[i] > 0) { count++; } } if (count > 0) { double[] tempStats = new double [count]; count=0; for (i=0;i<cc;i++) { if (instanceStats[i] > 0) { tempStats[count++] = instanceStats[i]; } } instanceStats = tempStats; cc = instanceStats.length; } */ int clustFieldWidth = (int)((Math.log(cc)/Math.log(10))+1); int numInstFieldWidth = (int)((Math.log(i)/Math.log(10))+1); double sum = Utils.sum(instanceStats); loglk /= sum; text.append("Clustered Instances\n"); for (i = 0; i < cc; i++) { if (instanceStats[i] > 0) { text.append(Utils.doubleToString((double)i, clustFieldWidth, 0) + " " + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0) + " (" + Utils.doubleToString((instanceStats[i]/sum*100.0) , 3, 0) + "%)\n"); } } if (unclusteredInstances > 0) { text.append("\nUnclustered Instances : "+unclusteredInstances); } if (clusterer instanceof DensityBasedClusterer) { text.append("\n\nLog likelihood: " + Utils.doubleToString(loglk, 1, 5) + "\n"); } } return text.toString(); } /** * Print the cluster assignments for either the training * or the testing data. * * @param clusterer the clusterer to use for cluster assignments * @param trainFileName the train file * @param testFileName an optional test file * @param attributesToOutput the attributes to print * @return a string containing the instance indexes and cluster assigns. * @throws Exception if cluster assignments can't be printed */ private static String printClusterings (Clusterer clusterer, String trainFileName, String testFileName, Range attributesToOutput) throws Exception { StringBuffer text = new StringBuffer(); int i = 0; int cnum; DataSource source = null; Instance inst; Instances structure; if (testFileName.length() != 0) source = new DataSource(testFileName); else source = new DataSource(trainFileName); structure = source.getStructure(); while (source.hasMoreElements(structure)) { inst = source.nextElement(structure); try { cnum = clusterer.clusterInstance(inst); text.append(i + " " + cnum + " " + attributeValuesString(inst, attributesToOutput) + "\n"); } catch (Exception e) { /* throw new Exception('\n' + "Unable to cluster instance\n" + e.getMessage()); */ text.append(i + " Unclustered " + attributeValuesString(inst, attributesToOutput) + "\n"); } i++; } return text.toString(); } /** * Builds a string listing the attribute values in a specified range of indices, * separated by commas and enclosed in brackets. * * @param instance the instance to print the values from * @param attRange the range of the attributes to list * @return a string listing values of the attributes in the range */ private static String attributeValuesString(Instance instance, Range attRange) { StringBuffer text = new StringBuffer(); if (attRange != null) { boolean firstOutput = true; attRange.setUpper(instance.numAttributes() - 1); for (int i=0; i<instance.numAttributes(); i++) if (attRange.isInRange(i)) { if (firstOutput) text.append("("); else text.append(","); text.append(instance.toString(i)); firstOutput = false; } if (!firstOutput) text.append(")"); } return text.toString(); } /** * Make up the help string giving all the command line options * * @param clusterer the clusterer to include options for * @return a string detailing the valid command line options */ private static String makeOptionString (Clusterer clusterer) { StringBuffer optionsText = new StringBuffer(""); // General options optionsText.append("\n\nGeneral options:\n\n"); optionsText.append("-t <name of training file>\n"); optionsText.append("\tSets training file.\n"); optionsText.append("-T <name of test file>\n"); optionsText.append("\tSets test file.\n"); optionsText.append("-l <name of input file>\n"); optionsText.append("\tSets model input file.\n"); optionsText.append("-d <name of output file>\n"); optionsText.append("\tSets model output file.\n"); optionsText.append("-p <attribute range>\n"); optionsText.append("\tOutput predictions. Predictions are for " + "training file" + "\n\tif only training file is specified," + "\n\totherwise predictions are for the test file." + "\n\tThe range specifies attribute values to be output" + "\n\twith the predictions. Use '-p 0' for none.\n"); optionsText.append("-x <number of folds>\n"); optionsText.append("\tOnly Distribution Clusterers can be cross validated.\n"); optionsText.append("-s <random number seed>\n"); optionsText.append("\tSets the seed for randomizing the data in cross-validation\n"); optionsText.append("-c <class index>\n"); optionsText.append("\tSet class attribute. If supplied, class is ignored"); optionsText.append("\n\tduring clustering but is used in a classes to"); optionsText.append("\n\tclusters evaluation.\n"); if (clusterer instanceof Drawable) { optionsText.append("-g <name of graph file>\n"); optionsText.append("\tOutputs the graph representation of the clusterer to the file.\n"); } // Get scheme-specific options if (clusterer instanceof OptionHandler) { optionsText.append("\nOptions specific to " + clusterer.getClass().getName() + ":\n\n"); Enumeration enu = ((OptionHandler)clusterer).listOptions(); while (enu.hasMoreElements()) { Option option = (Option)enu.nextElement(); optionsText.append(option.synopsis() + '\n'); optionsText.append(option.description() + "\n"); } } return optionsText.toString(); } /** * Tests whether the current evaluation object is equal to another * evaluation object * * @param obj the object to compare against * @return true if the two objects are equal */ public boolean equals(Object obj) { if ((obj == null) || !(obj.getClass().equals(this.getClass()))) return false; ClusterEvaluation cmp = (ClusterEvaluation) obj; if ((m_classToCluster != null) != (cmp.m_classToCluster != null)) return false; if (m_classToCluster != null) { for (int i = 0; i < m_classToCluster.length; i++) { if (m_classToCluster[i] != cmp.m_classToCluster[i]) return false; } } if ((m_clusterAssignments != null) != (cmp.m_clusterAssignments != null)) return false; if (m_clusterAssignments != null) { for (int i = 0; i < m_clusterAssignments.length; i++) { if (m_clusterAssignments[i] != cmp.m_clusterAssignments[i]) return false; } } if (Double.isNaN(m_logL) != Double.isNaN(cmp.m_logL)) return false; if (!Double.isNaN(m_logL)) { if (m_logL != cmp.m_logL) return false; } if (m_numClusters != cmp.m_numClusters) return false; // TODO: better comparison? via members? String clusteringResults1 = m_clusteringResults.toString().replaceAll("Elapsed time.*", ""); String clusteringResults2 = cmp.m_clusteringResults.toString().replaceAll("Elapsed time.*", ""); if (!clusteringResults1.equals(clusteringResults2)) return false; return true; } /** * Main method for testing this class. * * @param args the options */ public static void main (String[] args) { try { if (args.length == 0) { throw new Exception("The first argument must be the name of a " + "clusterer"); } String ClustererString = args[0]; args[0] = ""; Clusterer newClusterer = Clusterer.forName(ClustererString, null); System.out.println(evaluateClusterer(newClusterer, args)); } catch (Exception e) { System.out.println(e.getMessage()); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -