📄 clusterevaluation.java
字号:
for (int j = 0; j < test.numInstances(); j++) {
try {
double temp = ((DistributionClusterer)clusterer).
densityForInstance(test.instance(j));
// double temp = Utils.sum(tempDist);
if (temp > 0) {
foldAv += Math.log(temp);
}
} catch (Exception ex) {
// unclustered instances
}
}
CvAv += (foldAv/test.numInstances());
}
CvAv /= numFolds;
CvString.append("\n" + numFolds
+ " fold CV Log Likelihood: "
+ Utils.doubleToString(CvAv, 6, 4)
+ "\n");
return CvString.toString();
}
// ===============
// Private methods
// ===============
/**
* Print the cluster statistics for either the training
* or the testing data.
*
* @param clusterer the clusterer to use for generating statistics.
* @return a string containing cluster statistics.
* @exception if statistics can't be generated.
*/
private static String printClusterStats (Clusterer clusterer,
String fileName)
throws Exception
{
StringBuffer text = new StringBuffer();
int i = 0;
int cnum;
double loglk = 0.0;
double[] dist;
double temp;
int cc = clusterer.numberOfClusters();
double[] instanceStats = new double[cc];
int unclusteredInstances = 0;
if (fileName.length() != 0) {
BufferedReader inStream = null;
try {
inStream = new BufferedReader(new FileReader(fileName));
}
catch (Exception e) {
throw new Exception("Can't open file " + e.getMessage() + '.');
}
Instances inst = new Instances(inStream, 1);
while (inst.readInstance(inStream)) {
try {
cnum = clusterer.clusterInstance(inst.instance(0));
if (clusterer instanceof DistributionClusterer) {
temp = ((DistributionClusterer)clusterer).
densityForInstance(inst.instance(0));
// temp = Utils.sum(dist);
if (temp > 0) {
loglk += Math.log(temp);
}
}
instanceStats[cnum]++;
}
catch (Exception e) {
unclusteredInstances++;
}
inst.delete(0);
i++;
}
/*
// count the actual number of used clusters
int count = 0;
for (i = 0; i < cc; i++) {
if (instanceStats[i] > 0) {
count++;
}
}
if (count > 0) {
double [] tempStats = new double [count];
count=0;
for (i=0;i<cc;i++) {
if (instanceStats[i] > 0) {
tempStats[count++] = instanceStats[i];
}
}
instanceStats = tempStats;
cc = instanceStats.length;
} */
int clustFieldWidth = (int)((Math.log(cc)/Math.log(10))+1);
int numInstFieldWidth = (int)((Math.log(i)/Math.log(10))+1);
double sum = Utils.sum(instanceStats);
loglk /= sum;
text.append("Clustered Instances\n");
for (i = 0; i < cc; i++) {
if (instanceStats[i] > 0) {
text.append(Utils.doubleToString((double)i,
clustFieldWidth, 0)
+ " "
+ Utils.doubleToString(instanceStats[i],
numInstFieldWidth, 0)
+ " ("
+ Utils.doubleToString((instanceStats[i]/sum*100.0)
, 3, 0) + "%)\n");
}
}
if (unclusteredInstances > 0) {
text.append("\nUnclustered Instances : "+unclusteredInstances);
}
if (clusterer instanceof DistributionClusterer) {
text.append("\n\nLog likelihood: "
+ Utils.doubleToString(loglk, 1, 5)
+ "\n");
}
}
return text.toString();
}
/**
* Print the cluster statistics for either the training
* or the testing data.
*
* @param clusterer the clusterer to use for generating statistics.
* @return a string containing cluster statistics.
* @exception if statistics can't be generated.
*/
private static String printClusterStats (Clusterer clusterer, Instances data)
throws Exception {
StringBuffer text = new StringBuffer();
int i = 0;
int cnum;
double loglk = 0.0;
double[] dist;
double temp;
int cc = clusterer.numberOfClusters();
double[] instanceStats = new double[cc];
int unclusteredInstances = 0;
Instances inst = data;
Enumeration instEnumeration = inst.enumerateInstances();
while (instEnumeration.hasMoreElements()) {
try {
cnum = clusterer.clusterInstance(inst.instance(0));
if (clusterer instanceof DistributionClusterer) {
temp = ((DistributionClusterer)clusterer).
densityForInstance(inst.instance(0));
// temp = Utils.sum(dist);
if (temp > 0) {
loglk += Math.log(temp);
}
}
instanceStats[cnum]++;
}
catch (Exception e) {
unclusteredInstances++;
}
inst.delete(0);
i++;
}
/*
// count the actual number of used clusters
int count = 0;
for (i = 0; i < cc; i++) {
if (instanceStats[i] > 0) {
count++;
}
}
if (count > 0) {
double [] tempStats = new double [count];
count=0;
for (i=0;i<cc;i++) {
if (instanceStats[i] > 0) {
tempStats[count++] = instanceStats[i];
}
}
instanceStats = tempStats;
cc = instanceStats.length;
} */
int clustFieldWidth = (int)((Math.log(cc)/Math.log(10))+1);
int numInstFieldWidth = (int)((Math.log(i)/Math.log(10))+1);
double sum = Utils.sum(instanceStats);
loglk /= sum;
text.append("Clustered Instances\n");
for (i = 0; i < cc; i++) {
if (instanceStats[i] > 0) {
text.append(Utils.doubleToString((double)i,
clustFieldWidth, 0)
+ " "
+ Utils.doubleToString(instanceStats[i],
numInstFieldWidth, 0)
+ " ("
+ Utils.doubleToString((instanceStats[i]/sum*100.0)
, 3, 0) + "%)\n");
}
}
if (unclusteredInstances > 0) {
text.append("\nUnclustered Instances : "+unclusteredInstances);
}
if (clusterer instanceof DistributionClusterer) {
text.append("\n\nLog likelihood: "
+ Utils.doubleToString(loglk, 1, 5)
+ "\n");
}
return text.toString();
}
/**
* Print the cluster assignments for either the training
* or the testing data.
*
* @param clusterer the clusterer to use for cluster assignments
* @return a string containing the instance indexes and cluster assigns.
* @exception if cluster assignments can't be printed
*/
private static String printClusterings (Clusterer clusterer, Instances train,
String testFileName, Range attributesToOutput)
throws Exception
{
StringBuffer text = new StringBuffer();
int i = 0;
int cnum;
if (testFileName.length() != 0) {
BufferedReader testStream = null;
try {
testStream = new BufferedReader(new FileReader(testFileName));
}
catch (Exception e) {
throw new Exception("Can't open file " + e.getMessage() + '.');
}
Instances test = new Instances(testStream, 1);
while (test.readInstance(testStream)) {
try {
cnum = clusterer.clusterInstance(test.instance(0));
text.append(i + " " + cnum + " "
+ attributeValuesString(test.instance(0), attributesToOutput) + "\n");
}
catch (Exception e) {
/* throw new Exception('\n' + "Unable to cluster instance\n"
+ e.getMessage()); */
text.append(i + " Unclustered "
+ attributeValuesString(test.instance(0), attributesToOutput) + "\n");
}
test.delete(0);
i++;
}
}
else// output for training data
{
for (i = 0; i < train.numInstances(); i++) {
try {
cnum = clusterer.clusterInstance(train.instance(i));
text.append(i + " " + cnum + " "
+ attributeValuesString(train.instance(i), attributesToOutput)
+ "\n");
}
catch (Exception e) {
/* throw new Exception('\n'
+ "Unable to cluster instance\n"
+ e.getMessage()); */
text.append(i + " Unclustered "
+ attributeValuesString(train.instance(i), attributesToOutput)
+ "\n");
}
}
}
return text.toString();
}
/**
* Builds a string listing the attribute values in a specified range of indices,
* separated by commas and enclosed in brackets.
*
* @param instance the instance to print the values from
* @param attributes the range of the attributes to list
* @return a string listing values of the attributes in the range
*/
private static String attributeValuesString(Instance instance, Range attRange) {
StringBuffer text = new StringBuffer();
if (attRange != null) {
boolean firstOutput = true;
attRange.setUpper(instance.numAttributes() - 1);
for (int i=0; i<instance.numAttributes(); i++)
if (attRange.isInRange(i)) {
if (firstOutput) text.append("(");
else text.append(",");
text.append(instance.toString(i));
firstOutput = false;
}
if (!firstOutput) text.append(")");
}
return text.toString();
}
/**
* Make up the help string giving all the command line options
*
* @param clusterer the clusterer to include options for
* @return a string detailing the valid command line options
*/
private static String makeOptionString (Clusterer clusterer) {
StringBuffer optionsText = new StringBuffer("");
// General options
optionsText.append("\n\nGeneral options:\n\n");
optionsText.append("-t <name of training file>\n");
optionsText.append("\tSets training file.\n");
optionsText.append("-T <name of test file>\n");
optionsText.append("-l <name of input file>\n");
optionsText.append("\tSets model input file.\n");
optionsText.append("-d <name of output file>\n");
optionsText.append("\tSets model output file.\n");
optionsText.append("-p <attribute range>\n");
optionsText.append("\tOutput predictions. Predictions are for "
+ "training file"
+ "\n\tif only training file is specified,"
+ "\n\totherwise predictions are for the test file."
+ "\n\tThe range specifies attribute values to be output"
+ "\n\twith the predictions. Use '-p 0' for none.\n");
optionsText.append("-x <number of folds>\n");
optionsText.append("\tOnly Distribution Clusterers can be cross "
+ "validated.\n");
optionsText.append("-s <random number seed>\n");
optionsText.append("-c <class index>\n");
optionsText.append("\tSet class attribute. If supplied, class is ignored");
optionsText.append("\n\tduring clustering but is used in a classes to");
optionsText.append("\n\tclusters evaluation.\n");
// Get scheme-specific options
if (clusterer instanceof OptionHandler) {
optionsText.append("\nOptions specific to "
+ clusterer.getClass().getName() + ":\n\n");
Enumeration enum = ((OptionHandler)clusterer).listOptions();
while (enum.hasMoreElements()) {
Option option = (Option)enum.nextElement();
optionsText.append(option.synopsis() + '\n');
optionsText.append(option.description() + "\n");
}
}
return optionsText.toString();
}
/**
* Main method for testing this class.
*
* @param args the options
*/
public static void main (String[] args) {
try {
if (args.length == 0) {
throw new Exception("The first argument must be the name of a "
+ "clusterer");
}
String ClustererString = args[0];
args[0] = "";
Clusterer newClusterer = Clusterer.forName(ClustererString, null);
System.out.println(evaluateClusterer(newClusterer, args));
}
catch (Exception e) {
log.error(e.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -