📄 clusterevaluation.java
字号:
throw new Exception("Can only do class based evaluation on the "
+"training data");
}
if (objectInputFileName.length() != 0) {
throw new Exception("Can't load a clusterer and do class based "
+"evaluation");
}
}
if (theClass != -1) {
if (theClass < 1
|| theClass > train.numAttributes()) {
throw new Exception("Class is out of range!");
}
if (!train.attribute(theClass-1).isNominal()) {
throw new Exception("Class must be nominal!");
}
train.setClassIndex(theClass-1);
}
if (objectInputFileName.length() != 0) {
objectInputStream = new ObjectInputStream(new FileInputStream(objectInputFileName));
}
if (objectOutputFileName.length() != 0) {
objectOutputStream = new
ObjectOutputStream(new FileOutputStream(objectOutputFileName));
}
}
catch (Exception e) {
throw new Exception("ClusterEvaluation: " + e.getMessage() + '.');
}
// Save options
if (options != null) {
savedOptions = new String[options.length];
System.arraycopy(options, 0, savedOptions, 0, options.length);
}
if (objectInputFileName.length() != 0) {
Utils.checkForRemainingOptions(options);
}
// Set options for clusterer
if (clusterer instanceof OptionHandler) {
((OptionHandler)clusterer).setOptions(options);
}
Utils.checkForRemainingOptions(options);
if (objectInputFileName.length() != 0) {
// Load the clusterer from file
clusterer = (Clusterer)objectInputStream.readObject();
objectInputStream.close();
}
else {
// Build the clusterer if no object file provided
if (theClass == -1) {
clusterer.buildClusterer(train);
} else {
AttributeFilter removeClass = new AttributeFilter();
removeClass.setAttributeIndices(""+theClass);
removeClass.setInvertSelection(false);
removeClass.setInputFormat(train);
Instances clusterTrain = Filter.useFilter(train, removeClass);
clusterer.buildClusterer(clusterTrain);
ClusterEvaluation ce = new ClusterEvaluation();
ce.setClusterer(clusterer);
ce.evaluateClusterer(train);
return "\n\n=== Clustering stats for training data ===\n\n" + ce.clusterResultsToString();
}
}
/* Output cluster predictions only (for the test data if specified,
otherwise for the training data */
if (printClusterAssignments) {
return printClusterings(clusterer, train, testFileName, attributesToOutput);
}
text.append(clusterer.toString());
text.append("\n\n=== Clustering stats for training data ===\n\n"
+ printClusterStats(clusterer, data));
if (testFileName.length() != 0) {
text.append("\n\n=== Clustering stats for testing data ===\n\n"
+ printClusterStats(clusterer, testFileName));
}
if ((clusterer instanceof DistributionClusterer) &&
(doXval == true) &&
(testFileName.length() == 0) &&
(objectInputFileName.length() == 0)) {
// cross validate the log likelihood on the training data
random = new Random(seed);
random.setSeed(seed);
train.randomize(random);
text.append(crossValidateModel(clusterer.getClass().getName()
, train, folds, savedOptions));
}
// Save the clusterer if an object output file is provided
if (objectOutputFileName.length() != 0) {
objectOutputStream.writeObject(clusterer);
objectOutputStream.flush();
objectOutputStream.close();
}
return text.toString();
}
/**
* Evaluates a clusterer for percentage split
*
* Evaluates a clusterer with the options given in an array of
* strings. It takes the string indicated by "-t" as training file, the
* string indicated by "-T" as test file.
* If the test file is missing, a stratified ten-fold
* cross-validation is performed (distribution clusterers only).
* Using "-x" you can change the number of
* folds to be used, and using "-s" the random seed.
* If the "-p" option is present it outputs the classification for
* each test instance. If you provide the name of an object file using
* "-l", a clusterer will be loaded from the given file. If you provide the
* name of an object file using "-d", the clusterer built from the
* training data will be saved to the given file.
*
* @param clusterer machine learning clusterer
* @param options the array of string containing the options
* @exception Exception if model could not be evaluated successfully
* @return a string describing the results
*/
public static String evaluateClustererInstancePercentage (Clusterer clusterer,
String[] options, Instances data, int percentage)
throws Exception
{
int seed = 1, folds = 10, percent = 0, sizeOfTrainFile = 0, sizeOfTestFile = 0;
boolean doXval = false;
Instances train = null;
Instances template = null;
Instances test = null;
Random random;
String testFileName, seedString, foldsString, objectInputFileName, objectOutputFileName, attributeRangeString;
String[] savedOptions = null;
boolean printClusterAssignments = false;
Range attributesToOutput = null;
ObjectInputStream objectInputStream = null;
ObjectOutputStream objectOutputStream = null;
StringBuffer text = new StringBuffer();
// train = data;
int theClass = -1; // class based evaluation of clustering
try {
percent = percentage;
sizeOfTrainFile = data.numInstances() * percent/100;
sizeOfTestFile = data.numInstances() - sizeOfTrainFile;
train = new Instances (data, 0, sizeOfTrainFile);
test = new Instances (data, sizeOfTrainFile, sizeOfTestFile);
if (Utils.getFlag('h', options)) {
throw new Exception("Help requested.");
}
// Get basic options (options the same for all clusterers
//printClusterAssignments = Utils.getFlag('p', options);
objectInputFileName = Utils.getOption('l', options);
objectOutputFileName = Utils.getOption('d', options);
testFileName = Utils.getOption('T', options);
// Check -p option
try {
attributeRangeString = Utils.getOption('p', options);
}
catch (Exception e) {
throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " +
"It now expects a parameter specifying a range of attributes " +
"to list with the predictions. Use '-p 0' for none.");
}
if (attributeRangeString.length() != 0) {
printClusterAssignments = true;
if (!attributeRangeString.equals("0"))
attributesToOutput = new Range(attributeRangeString);
}
if ((objectInputFileName.length() != 0)
&& (printClusterAssignments == false)) {
throw new Exception("Can't use both train and model file "
+ "unless -p specified.");
}
seedString = Utils.getOption('s', options);
if (seedString.length() != 0) {
seed = Integer.parseInt(seedString);
}
foldsString = Utils.getOption('x', options);
if (foldsString.length() != 0) {
folds = Integer.parseInt(foldsString);
doXval = true;
}
}
catch (Exception e) {
throw new Exception('\n' + e.getMessage()
+ makeOptionString(clusterer));
}
try {
String classString = Utils.getOption('c',options);
if (classString.length() != 0) {
if (classString.compareTo("last") == 0) {
theClass = train.numAttributes();
} else if (classString.compareTo("first") == 0) {
theClass = 1;
} else {
theClass = Integer.parseInt(classString);
}
if (doXval || testFileName.length() != 0) {
throw new Exception("Can only do class based evaluation on the "
+"training data");
}
if (objectInputFileName.length() != 0) {
throw new Exception("Can't load a clusterer and do class based "
+"evaluation");
}
}
if (theClass != -1) {
if (theClass < 1
|| theClass > train.numAttributes()) {
throw new Exception("Class is out of range!");
}
if (!train.attribute(theClass-1).isNominal()) {
throw new Exception("Class must be nominal!");
}
train.setClassIndex(theClass-1);
}
if (objectInputFileName.length() != 0) {
objectInputStream = new ObjectInputStream(new FileInputStream(objectInputFileName));
}
if (objectOutputFileName.length() != 0) {
objectOutputStream = new
ObjectOutputStream(new FileOutputStream(objectOutputFileName));
}
}
catch (Exception e) {
throw new Exception("ClusterEvaluation: " + e.getMessage() + '.');
}
// Save options
if (options != null) {
savedOptions = new String[options.length];
System.arraycopy(options, 0, savedOptions, 0, options.length);
}
if (objectInputFileName.length() != 0) {
Utils.checkForRemainingOptions(options);
}
// Set options for clusterer
if (clusterer instanceof OptionHandler) {
((OptionHandler)clusterer).setOptions(options);
}
Utils.checkForRemainingOptions(options);
if (objectInputFileName.length() != 0) {
// Load the clusterer from file
clusterer = (Clusterer)objectInputStream.readObject();
objectInputStream.close();
}
else {
// Build the clusterer if no object file provided
if (theClass == -1) {
clusterer.buildClusterer(train);
}
else {
AttributeFilter removeClass = new AttributeFilter();
removeClass.setAttributeIndices(""+theClass);
removeClass.setInvertSelection(false);
removeClass.setInputFormat(train);
Instances clusterTrain = Filter.useFilter(train, removeClass);
clusterer.buildClusterer(clusterTrain);
ClusterEvaluation ce = new ClusterEvaluation();
ce.setClusterer(clusterer);
ce.evaluateClusterer(train);
return "\n\n=== Clustering stats for training data ===\n\n" + ce.clusterResultsToString();
}
}
/* Output cluster predictions only (for the test data if specified,
otherwise for the training data */
if (printClusterAssignments) {
return printClusterings(clusterer, train, testFileName, attributesToOutput);
}
text.append(clusterer.toString());
text.append("\n\n=== Clustering stats for training data ===\n\n"
+ printClusterStats(clusterer, data));
// if (testFileName.length() != 0) {
text.append("\n\n=== Clustering stats for testing data ===\n\n"
+ printClusterStats(clusterer, test));
// }
if ((clusterer instanceof DistributionClusterer) &&
(doXval == true) &&
(testFileName.length() == 0) &&
(objectInputFileName.length() == 0)) {
// cross validate the log likelihood on the training data
random = new Random(seed);
random.setSeed(seed);
train.randomize(random);
text.append(crossValidateModel(clusterer.getClass().getName()
, train, folds, savedOptions));
}
// Save the clusterer if an object output file is provided
if (objectOutputFileName.length() != 0) {
objectOutputStream.writeObject(clusterer);
objectOutputStream.flush();
objectOutputStream.close();
}
return text.toString();
}
/**
* Performs a cross-validation
* for a distribution clusterer on a set of instances.
*
* @param clustererString a string naming the class of the clusterer
* @param data the data on which the cross-validation is to be
* performed
* @param numFolds the number of folds for the cross-validation
* @param options the options to the clusterer
* @return a string containing the cross validated log likelihood
* @exception Exception if a clusterer could not be generated
*/
public static String crossValidateModel (String clustererString,
Instances data,
int numFolds,
String[] options)
throws Exception
{
Clusterer clusterer = null;
Instances train, test;
String[] savedOptions = null;
double foldAv;
double CvAv = 0.0;
double[] tempDist;
StringBuffer CvString = new StringBuffer();
if (options != null) {
savedOptions = new String[options.length];
}
data = new Instances(data);
for (int i = 0; i < numFolds; i++) {
// create clusterer
try {
clusterer = (Clusterer)Class.forName(clustererString).newInstance();
}
catch (Exception e) {
throw new Exception("Can't find class with name "
+ clustererString + '.');
}
if (!(clusterer instanceof DistributionClusterer)) {
throw new Exception(clustererString
+ " must be a distrinbution "
+ "clusterer.");
}
// Save options
if (options != null) {
System.arraycopy(options, 0, savedOptions, 0, options.length);
}
// Parse options
if (clusterer instanceof OptionHandler) {
try {
((OptionHandler)clusterer).setOptions(savedOptions);
Utils.checkForRemainingOptions(savedOptions);
}
catch (Exception e) {
throw new Exception("Can't parse given options in "
+ "cross-validation!");
}
}
// Build and test classifier
train = data.trainCV(numFolds, i);
clusterer.buildClusterer(train);
test = data.testCV(numFolds, i);
foldAv = 0.0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -