📄 clusterevaluation.java
字号:
double CvAv = 0.0; double[] tempDist; StringBuffer CvString = new StringBuffer(); if (options != null) { savedOptions = new String[options.length]; } data = new Instances(data); for (int i = 0; i < numFolds; i++) { // create clusterer try { clusterer = (Clusterer)Class.forName(clustererString).newInstance(); } catch (Exception e) { throw new Exception("Can't find class with name " + clustererString + '.'); } if (!(clusterer instanceof DistributionClusterer)) { throw new Exception(clustererString + " must be a distrinbution " + "clusterer."); } // Save options if (options != null) { System.arraycopy(options, 0, savedOptions, 0, options.length); } // Parse options if (clusterer instanceof OptionHandler) { try { ((OptionHandler)clusterer).setOptions(savedOptions); Utils.checkForRemainingOptions(savedOptions); } catch (Exception e) { throw new Exception("Can't parse given options in " + "cross-validation!"); } } // Build and test classifier train = data.trainCV(numFolds, i); clusterer.buildClusterer(train); test = data.testCV(numFolds, i); foldAv = 0.0; for (int j = 0; j < test.numInstances(); j++) { try { double temp = ((DistributionClusterer)clusterer). densityForInstance(test.instance(j)); // double temp = Utils.sum(tempDist); if (temp > 0) { foldAv += Math.log(temp); } } catch (Exception ex) { // unclustered instances } } CvAv += (foldAv/test.numInstances()); } CvAv /= numFolds; CvString.append("\n" + numFolds + " fold CV Log Likelihood: " + Utils.doubleToString(CvAv, 6, 4) + "\n"); return CvString.toString(); } // =============== // Private methods // =============== /** * Print the cluster statistics for either the training * or the testing data. * * @param clusterer the clusterer to use for generating statistics. * @return a string containing cluster statistics. * @exception if statistics can't be generated. */ private static String printClusterStats (Clusterer clusterer, String fileName) throws Exception { StringBuffer text = new StringBuffer(); int i = 0; int cnum; double loglk = 0.0; double[] dist; double temp; int cc = clusterer.numberOfClusters(); double[] instanceStats = new double[cc]; int unclusteredInstances = 0; if (fileName.length() != 0) { BufferedReader inStream = null; try { inStream = new BufferedReader(new FileReader(fileName)); } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } Instances inst = new Instances(inStream, 1); while (inst.readInstance(inStream)) { try { cnum = clusterer.clusterInstance(inst.instance(0)); if (clusterer instanceof DistributionClusterer) { temp = ((DistributionClusterer)clusterer). densityForInstance(inst.instance(0)); // temp = Utils.sum(dist); if (temp > 0) { loglk += Math.log(temp); } } instanceStats[cnum]++; } catch (Exception e) { unclusteredInstances++; } inst.delete(0); i++; } /* // count the actual number of used clusters int count = 0; for (i = 0; i < cc; i++) { if (instanceStats[i] > 0) { count++; } } if (count > 0) { double [] tempStats = new double [count]; count=0; for (i=0;i<cc;i++) { if (instanceStats[i] > 0) { tempStats[count++] = instanceStats[i]; } } instanceStats = tempStats; cc = instanceStats.length; } */ int clustFieldWidth = (int)((Math.log(cc)/Math.log(10))+1); int numInstFieldWidth = (int)((Math.log(i)/Math.log(10))+1); double sum = Utils.sum(instanceStats); loglk /= sum; text.append("Clustered Instances\n"); for (i = 0; i < cc; i++) { if (instanceStats[i] > 0) { text.append(Utils.doubleToString((double)i, clustFieldWidth, 0) + " " + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0) + " (" + Utils.doubleToString((instanceStats[i]/sum*100.0) , 3, 0) + "%)\n"); } } if (unclusteredInstances > 0) { text.append("\nUnclustered Instances : "+unclusteredInstances); } if (clusterer instanceof DistributionClusterer) { text.append("\n\nLog likelihood: " + Utils.doubleToString(loglk, 1, 5) + "\n"); } } return text.toString(); } /** * Print the cluster assignments for either the training * or the testing data. * * @param clusterer the clusterer to use for cluster assignments * @return a string containing the instance indexes and cluster assigns. * @exception if cluster assignments can't be printed */ private static String printClusterings (Clusterer clusterer, Instances train, String testFileName, Range attributesToOutput) throws Exception { StringBuffer text = new StringBuffer(); int i = 0; int cnum; if (testFileName.length() != 0) { BufferedReader testStream = null; try { testStream = new BufferedReader(new FileReader(testFileName)); } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } Instances test = new Instances(testStream, 1); while (test.readInstance(testStream)) { try { cnum = clusterer.clusterInstance(test.instance(0)); text.append(i + " " + cnum + " " + attributeValuesString(test.instance(0), attributesToOutput) + "\n"); } catch (Exception e) { /* throw new Exception('\n' + "Unable to cluster instance\n" + e.getMessage()); */ text.append(i + " Unclustered " + attributeValuesString(test.instance(0), attributesToOutput) + "\n"); } test.delete(0); i++; } } else// output for training data { for (i = 0; i < train.numInstances(); i++) { try { cnum = clusterer.clusterInstance(train.instance(i)); text.append(i + " " + cnum + " " + attributeValuesString(train.instance(i), attributesToOutput) + "\n"); } catch (Exception e) { /* throw new Exception('\n' + "Unable to cluster instance\n" + e.getMessage()); */ text.append(i + " Unclustered " + attributeValuesString(train.instance(i), attributesToOutput) + "\n"); } } } return text.toString(); } /** * Builds a string listing the attribute values in a specified range of indices, * separated by commas and enclosed in brackets. * * @param instance the instance to print the values from * @param attributes the range of the attributes to list * @return a string listing values of the attributes in the range */ private static String attributeValuesString(Instance instance, Range attRange) { StringBuffer text = new StringBuffer(); if (attRange != null) { boolean firstOutput = true; attRange.setUpper(instance.numAttributes() - 1); for (int i=0; i<instance.numAttributes(); i++) if (attRange.isInRange(i)) { if (firstOutput) text.append("("); else text.append(","); text.append(instance.toString(i)); firstOutput = false; } if (!firstOutput) text.append(")"); } return text.toString(); } /** * Make up the help string giving all the command line options * * @param clusterer the clusterer to include options for * @return a string detailing the valid command line options */ private static String makeOptionString (Clusterer clusterer) { StringBuffer optionsText = new StringBuffer(""); // General options optionsText.append("\n\nGeneral options:\n\n"); optionsText.append("-t <name of training file>\n"); optionsText.append("\tSets training file.\n"); optionsText.append("-T <name of test file>\n"); optionsText.append("-l <name of input file>\n"); optionsText.append("\tSets model input file.\n"); optionsText.append("-d <name of output file>\n"); optionsText.append("\tSets model output file.\n"); optionsText.append("-p <attribute range>\n"); optionsText.append("\tOutput predictions. Predictions are for " + "training file" + "\n\tif only training file is specified," + "\n\totherwise predictions are for the test file." + "\n\tThe range specifies attribute values to be output" + "\n\twith the predictions. Use '-p 0' for none.\n"); optionsText.append("-x <number of folds>\n"); optionsText.append("\tOnly Distribution Clusterers can be cross " + "validated.\n"); optionsText.append("-s <random number seed>\n"); optionsText.append("-c <class index>\n"); optionsText.append("\tSet class attribute. If supplied, class is ignored"); optionsText.append("\n\tduring clustering but is used in a classes to"); optionsText.append("\n\tclusters evaluation.\n"); // Get scheme-specific options if (clusterer instanceof OptionHandler) { optionsText.append("\nOptions specific to " + clusterer.getClass().getName() + ":\n\n"); Enumeration enum = ((OptionHandler)clusterer).listOptions(); while (enum.hasMoreElements()) { Option option = (Option)enum.nextElement(); optionsText.append(option.synopsis() + '\n'); optionsText.append(option.description() + "\n"); } } return optionsText.toString(); } /** * Main method for testing this class. * * @param args the options */ public static void main (String[] args) { try { if (args.length == 0) { throw new Exception("The first argument must be the name of a " + "clusterer"); } String ClustererString = args[0]; args[0] = ""; Clusterer newClusterer = Clusterer.forName(ClustererString, null); System.out.println(evaluateClusterer(newClusterer, args)); } catch (Exception e) { System.out.println(e.getMessage()); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -