📄 checkclassifier.java
字号:
evaluation = new Evaluation(train);
} catch (Exception ex) {
throw new Error("Error setting up for tests: " + ex.getMessage());
}
try {
Instances trainCopy = new Instances(train);
Instances testCopy = new Instances(test);
classifier.buildClassifier(trainCopy);
compareDatasets(train, trainCopy);
built = true;
testWRTZeroR(classifier, evaluation, trainCopy, testCopy);
compareDatasets(test, testCopy);
println("yes");
result[0] = true;
} catch (Exception ex) {
println("no");
result[0] = false;
if (m_Debug) {
println("\n=== Full Report ===");
print("Problem during");
if (built) {
print(" testing");
} else {
print(" training");
}
println(": " + ex.getMessage() + "\n");
println("Here are the datasets:\n");
println("=== Train Dataset ===\n"
+ train.toString() + "\n");
println("=== Test Dataset ===\n"
+ test.toString() + "\n\n");
}
}
return result;
}
/**
* Runs a text on the datasets with the given characteristics.
* @return index 0 is true if the test was passed, index 1 is true if test
* was acceptable
*/
protected boolean[] runBasicTest(boolean nominalPredictor,
boolean numericPredictor,
boolean stringPredictor,
boolean numericClass,
int missingLevel,
boolean predictorMissing,
boolean classMissing,
int numTrain,
int numTest,
int numClasses,
FastVector accepts) {
boolean[] result = new boolean[2];
Instances train = null;
Instances test = null;
Classifier classifier = null;
Evaluation evaluation = null;
boolean built = false;
try {
train = makeTestDataset(42, numTrain,
nominalPredictor ? 2 : 0,
numericPredictor ? 1 : 0,
stringPredictor ? 1 : 0,
numClasses,
numericClass);
test = makeTestDataset(24, numTest,
nominalPredictor ? 2 : 0,
numericPredictor ? 1 : 0,
stringPredictor ? 1 : 0,
numClasses,
numericClass);
if (nominalPredictor) {
train.deleteAttributeAt(0);
test.deleteAttributeAt(0);
}
if (missingLevel > 0) {
addMissing(train, missingLevel, predictorMissing, classMissing);
addMissing(test, Math.min(missingLevel, 50), predictorMissing,
classMissing);
}
classifier = Classifier.makeCopies(getClassifier(), 1)[0];
evaluation = new Evaluation(train);
} catch (Exception ex) {
ex.printStackTrace();
throw new Error("Error setting up for tests: " + ex.getMessage());
}
try {
classifier.buildClassifier(train);
built = true;
if (!testWRTZeroR(classifier, evaluation, train, test)[0]) {
result[1] = true;
throw new Exception("Scheme performs worse than ZeroR");
}
println("yes");
result[0] = true;
}
catch (Exception ex) {
boolean acceptable = false;
String msg = ex.getMessage().toLowerCase();
if (msg.indexOf("worse than zeror") >= 0) {
println("warning: performs worse than ZeroR");
result[0] = true;
result[1] = true;
} else {
for (int i = 0; i < accepts.size(); i++) {
if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {
acceptable = true;
}
}
println("no" + (acceptable ? " (OK error message)" : ""));
result[1] = acceptable;
}
if (m_Debug) {
println("\n=== Full Report ===");
print("Problem during");
if (built) {
print(" testing");
} else {
print(" training");
}
println(": " + ex.getMessage() + "\n");
if (!acceptable) {
if (accepts.size() > 0) {
print("Error message doesn't mention ");
for (int i = 0; i < accepts.size(); i++) {
if (i != 0) {
print(" or ");
}
print('"' + (String)accepts.elementAt(i) + '"');
}
}
println("here are the datasets:\n");
println("=== Train Dataset ===\n"
+ train.toString() + "\n");
println("=== Test Dataset ===\n"
+ test.toString() + "\n\n");
}
}
}
return result;
}
/**
* Determine whether the scheme performs worse than ZeroR during testing
*
* @param classifier the pre-trained classifier
* @param evaluation the classifier evaluation object
* @param train the training data
* @param test the test data
* @return index 0 is true if the scheme performs better than ZeroR
* @throws Exception if there was a problem during the scheme's testing
*/
protected boolean[] testWRTZeroR(Classifier classifier,
Evaluation evaluation,
Instances train, Instances test)
throws Exception {
boolean[] result = new boolean[2];
evaluation.evaluateModel(classifier, test);
try {
// Tested OK, compare with ZeroR
Classifier zeroR = new weka.classifiers.rules.ZeroR();
zeroR.buildClassifier(train);
Evaluation zeroREval = new Evaluation(train);
zeroREval.evaluateModel(zeroR, test);
result[0] = Utils.grOrEq(zeroREval.errorRate(), evaluation.errorRate());
}
catch (Exception ex) {
throw new Error("Problem determining ZeroR performance: "
+ ex.getMessage());
}
return result;
}
/**
* Compare two datasets to see if they differ.
*
* @param data1 one set of instances
* @param data2 the other set of instances
* @throws Exception if the datasets differ
*/
protected void compareDatasets(Instances data1, Instances data2)
throws Exception {
if (!data2.equalHeaders(data1)) {
throw new Exception("header has been modified");
}
if (!(data2.numInstances() == data1.numInstances())) {
throw new Exception("number of instances has changed");
}
for (int i = 0; i < data2.numInstances(); i++) {
Instance orig = data1.instance(i);
Instance copy = data2.instance(i);
for (int j = 0; j < orig.numAttributes(); j++) {
if (orig.isMissing(j)) {
if (!copy.isMissing(j)) {
throw new Exception("instances have changed");
}
} else if (orig.value(j) != copy.value(j)) {
throw new Exception("instances have changed");
}
if (orig.weight() != copy.weight()) {
throw new Exception("instance weights have changed");
}
}
}
}
/**
* Add missing values to a dataset.
*
* @param data the instances to add missing values to
* @param level the level of missing values to add (if positive, this
* is the probability that a value will be set to missing, if negative
* all but one value will be set to missing (not yet implemented))
* @param predictorMissing if true, predictor attributes will be modified
* @param classMissing if true, the class attribute will be modified
*/
protected void addMissing(Instances data, int level,
boolean predictorMissing, boolean classMissing) {
int classIndex = data.classIndex();
Random random = new Random(1);
for (int i = 0; i < data.numInstances(); i++) {
Instance current = data.instance(i);
for (int j = 0; j < data.numAttributes(); j++) {
if (((j == classIndex) && classMissing) ||
((j != classIndex) && predictorMissing)) {
if (Math.abs(random.nextInt()) % 100 < level)
current.setMissing(j);
}
}
}
}
/**
* Make a simple set of instances, which can later be modified
* for use in specific tests.
*
* @param seed the random number seed
* @param numInstances the number of instances to generate
* @param numNominal the number of nominal attributes
* @param numNumeric the number of numeric attributes
* @param numString the number of string attributes
* @param numClasses the number of classes (if nominal class)
* @param numericClass true if the class attribute should be numeric
* @return the test dataset
* @throws Exception if the dataset couldn't be generated
*/
protected Instances makeTestDataset(int seed, int numInstances,
int numNominal, int numNumeric,
int numString,
int numClasses, boolean numericClass)
throws Exception {
String[] words = new String[]{"The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"};
int numAttributes = numNominal + numNumeric + numString + 1;
Random random = new Random(seed);
FastVector attributes = new FastVector(numAttributes);
// Add Nominal attributes
for (int i = 0; i < numNominal; i++) {
FastVector nomStrings = new FastVector(i + 1);
for(int j = 0; j <= i; j++) {
nomStrings.addElement("a" + (i + 1) + "l" + (j + 1));
}
attributes.addElement(new Attribute("Nominal" + (i + 1), nomStrings));
}
// Add Numeric attributes
for (int i = 0; i < numNumeric; i++) {
attributes.addElement(new Attribute("Numeric" + (i + 1)));
}
// Add some String attributes...
for (int i = 0; i < numString; i++) {
attributes.addElement(new Attribute("String" + (i + 1), (FastVector) null));
}
// Add class attribute
if (numericClass) {
attributes.addElement(new Attribute("Class"));
} else {
FastVector nomStrings = new FastVector();
for(int j = 0; j <numClasses; j++) {
nomStrings.addElement("cl" + (j + 1));
}
attributes.addElement(new Attribute("Class",nomStrings));
}
Instances data = new Instances("CheckSet", attributes, numInstances);
data.setClassIndex(data.numAttributes() - 1);
// Generate the instances
for (int i = 0; i < numInstances; i++) {
Instance current = new Instance(numAttributes);
current.setDataset(data);
if (numericClass) {
current.setClassValue(random.nextFloat() * 0.25
+ Math.abs(random.nextInt())
% Math.max(2, numNominal));
} else {
current.setClassValue(Math.abs(random.nextInt()) % data.numClasses());
}
double classVal = current.classValue();
double newVal = 0;
for (int j = 0; j < numAttributes - 1; j++) {
switch (data.attribute(j).type()) {
case Attribute.NUMERIC:
newVal = classVal * 4 + random.nextFloat() * 1 - 0.5;
current.setValue(j, newVal);
break;
case Attribute.NOMINAL:
if (random.nextFloat() < 0.2) {
newVal = Math.abs(random.nextInt())
% data.attribute(j).numValues();
} else {
newVal = ((int)classVal) % data.attribute(j).numValues();
}
current.setValue(j, newVal);
break;
case Attribute.STRING:
String str = "";
for (int n = 0; n < words.length; n++) {
if (n > 0)
str += " ";
str += words[random.nextInt(words.length)];
}
current.setValue(j, data.attribute(j).addStringValue(str));
break;
}
}
data.add(current);
}
return process(data);
}
/**
* Provides a hook for derived classes to further modify the data.
*
* @param data the data to process
* @return the processed data
* @see #m_PostProcessor
*/
protected Instances process(Instances data) {
if (getPostProcessor() == null)
return data;
else
return getPostProcessor().process(data);
}
/**
* Print out a short summary string for the dataset characteristics
*
* @param nominalPredictor true if nominal predictor attributes are present
* @param numericPredictor true if numeric predictor attributes are present
* @param stringPredictor true if string predictor attributes are present
* @param numericClass true if the class attribute is numeric
*/
protected void printAttributeSummary(boolean nominalPredictor,
boolean numericPredictor,
boolean stringPredictor,
boolean numericClass) {
String str = "";
if (numericPredictor)
str += " numeric";
if (nominalPredictor) {
if (str.length() > 0)
str += " &";
str += " nominal";
}
if (stringPredictor) {
if (str.length() > 0)
str += " &";
str += " string";
}
str += " predictors)";
if (numericClass)
str = " (numeric class," + str;
else
str = " (nominal class," + str;
print(str);
}
/**
* Test method for this class
*/
public static void main(String [] args) {
try {
CheckClassifier check = new CheckClassifier();
try {
check.setOptions(args);
Utils.checkForRemainingOptions(args);
} catch (Exception ex) {
String result = ex.getMessage() + "\nCheckClassifier Options:\n\n";
Enumeration enu = check.listOptions();
while (enu.hasMoreElements()) {
Option option = (Option) enu.nextElement();
result += option.synopsis() + "\n" + option.description() + "\n";
}
throw new Exception(result);
}
check.doTests();
} catch (Exception ex) {
System.err.println(ex.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -