📄 checkclassifier.java
字号:
if (handleMissingPredictors)
canHandleMissing(PNom, PNum, PStr, numericClass, true, false, 100);
boolean handleMissingClass = canHandleMissing(PNom, PNum, PStr,
numericClass,
false, true, 20)[0];
if (handleMissingClass)
canHandleMissing(PNom, PNum, PStr, numericClass, false, true, 100);
correctBuildInitialisation(PNom, PNum, PStr, numericClass);
datasetIntegrity(PNom, PNum, PStr, numericClass,
handleMissingPredictors, handleMissingClass);
doesntUseTestClassVal(PNom, PNum, PStr, numericClass);
if (updateable)
updatingEquality(PNom, PNum, PStr, numericClass);
}
}
/**
* Checks whether the scheme can take command line options.
*
* @return index 0 is true if the classifier can take options
*/
protected boolean[] canTakeOptions() {
boolean[] result = new boolean[2];
print("options...");
if (m_Classifier instanceof OptionHandler) {
println("yes");
if (m_Debug) {
println("\n=== Full report ===");
Enumeration enu = ((OptionHandler)m_Classifier).listOptions();
while (enu.hasMoreElements()) {
Option option = (Option) enu.nextElement();
print(option.synopsis() + "\n"
+ option.description() + "\n");
}
println("\n");
}
result[0] = true;
}
else {
println("no");
result[0] = false;
}
return result;
}
/**
* Checks whether the scheme can build models incrementally.
*
* @return index 0 is true if the classifier can train incrementally
*/
protected boolean[] updateableClassifier() {
boolean[] result = new boolean[2];
print("updateable classifier...");
if (m_Classifier instanceof UpdateableClassifier) {
println("yes");
result[0] = true;
}
else {
println("no");
result[0] = false;
}
return result;
}
/**
* Checks whether the scheme says it can handle instance weights.
*
* @return true if the classifier handles instance weights
*/
protected boolean[] weightedInstancesHandler() {
boolean[] result = new boolean[2];
print("weighted instances classifier...");
if (m_Classifier instanceof WeightedInstancesHandler) {
println("yes");
result[0] = true;
}
else {
println("no");
result[0] = false;
}
return result;
}
/**
* Checks basic prediction of the scheme, for simple non-troublesome
* datasets.
*
* @param nominalPredictor if true use nominal predictor attributes
* @param numericPredictor if true use numeric predictor attributes
* @param stringPredictor if true use string predictor attributes
* @param numericClass if true use a numeric class attribute otherwise a
* nominal class attribute
* @return index 0 is true if the test was passed, index 1 is true if test
* was acceptable
*/
protected boolean[] canPredict(boolean nominalPredictor,
boolean numericPredictor,
boolean stringPredictor,
boolean numericClass) {
print("basic predict");
printAttributeSummary(
nominalPredictor, numericPredictor, stringPredictor, numericClass);
print("...");
FastVector accepts = new FastVector();
accepts.addElement("nominal");
accepts.addElement("numeric");
accepts.addElement("string");
int numTrain = getNumInstances(), numTest = getNumInstances(),
numClasses = 2, missingLevel = 0;
boolean predictorMissing = false, classMissing = false;
return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
numericClass,
missingLevel, predictorMissing, classMissing,
numTrain, numTest, numClasses,
accepts);
}
/**
* Checks whether nominal schemes can handle more than two classes.
* If a scheme is only designed for two-class problems it should
* throw an appropriate exception for multi-class problems.
*
* @param nominalPredictor if true use nominal predictor attributes
* @param numericPredictor if true use numeric predictor attributes
* @param stringPredictor if true use string predictor attributes
* @param numClasses the number of classes to test
* @return index 0 is true if the test was passed, index 1 is true if test
* was acceptable
*/
protected boolean[] canHandleNClasses(boolean nominalPredictor,
boolean numericPredictor,
boolean stringPredictor,
int numClasses) {
print("more than two class problems");
printAttributeSummary(
nominalPredictor, numericPredictor, stringPredictor, false);
print("...");
FastVector accepts = new FastVector();
accepts.addElement("number");
accepts.addElement("class");
int numTrain = getNumInstances(), numTest = getNumInstances(),
missingLevel = 0;
boolean predictorMissing = false, classMissing = false;
return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
false,
missingLevel, predictorMissing, classMissing,
numTrain, numTest, numClasses,
accepts);
}
/**
* Checks whether the scheme can handle zero training instances.
*
* @param nominalPredictor if true use nominal predictor attributes
* @param numericPredictor if true use numeric predictor attributes
* @param stringPredictor if true use string predictor attributes
* @param numericClass if true use a numeric class attribute otherwise a
* nominal class attribute
* @return index 0 is true if the test was passed, index 1 is true if test
* was acceptable
*/
protected boolean[] canHandleZeroTraining(boolean nominalPredictor,
boolean numericPredictor,
boolean stringPredictor,
boolean numericClass) {
print("handle zero training instances");
printAttributeSummary(
nominalPredictor, numericPredictor, stringPredictor, numericClass);
print("...");
FastVector accepts = new FastVector();
accepts.addElement("train");
accepts.addElement("training");
accepts.addElement("value");
int numTrain = 0, numTest = getNumInstances(), numClasses = 2,
missingLevel = 0;
boolean predictorMissing = false, classMissing = false;
return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
numericClass,
missingLevel, predictorMissing, classMissing,
numTrain, numTest, numClasses,
accepts);
}
/**
* Checks whether the scheme correctly initialises models when
* buildClassifier is called. This test calls buildClassifier with
* one training dataset and records performance on a test set.
* buildClassifier is then called on a training set with different
* structure, and then again with the original training set. The
* performance on the test set is compared with the original results
* and any performance difference noted as incorrect build initialisation.
*
* @param nominalPredictor if true use nominal predictor attributes
* @param numericPredictor if true use numeric predictor attributes
* @param stringPredictor if true use string predictor attributes
* @param numericClass if true use a numeric class attribute otherwise a
* nominal class attribute
* @return index 0 is true if the test was passed, index 1 is true if the
* scheme performs worse than ZeroR, but without error (index 0 is
* false)
*/
protected boolean[] correctBuildInitialisation(boolean nominalPredictor,
boolean numericPredictor,
boolean stringPredictor,
boolean numericClass) {
boolean[] result = new boolean[2];
print("correct initialisation during buildClassifier");
printAttributeSummary(
nominalPredictor, numericPredictor, stringPredictor, numericClass);
print("...");
int numTrain = getNumInstances(), numTest = getNumInstances(),
numClasses = 2, missingLevel = 0;
boolean predictorMissing = false, classMissing = false;
Instances train1 = null;
Instances test1 = null;
Instances train2 = null;
Instances test2 = null;
Classifier classifier = null;
Evaluation evaluation1A = null;
Evaluation evaluation1B = null;
Evaluation evaluation2 = null;
boolean built = false;
int stage = 0;
try {
// Make two sets of train/test splits with different
// numbers of attributes
train1 = makeTestDataset(42, numTrain,
nominalPredictor ? 2 : 0,
numericPredictor ? 1 : 0,
stringPredictor ? 1 : 0,
numClasses,
numericClass);
train2 = makeTestDataset(84, numTrain,
nominalPredictor ? 3 : 0,
numericPredictor ? 2 : 0,
stringPredictor ? 1 : 0,
numClasses,
numericClass);
test1 = makeTestDataset(24, numTest,
nominalPredictor ? 2 : 0,
numericPredictor ? 1 : 0,
stringPredictor ? 1 : 0,
numClasses,
numericClass);
test2 = makeTestDataset(48, numTest,
nominalPredictor ? 3 : 0,
numericPredictor ? 2 : 0,
stringPredictor ? 1 : 0,
numClasses,
numericClass);
if (nominalPredictor) {
train1.deleteAttributeAt(0);
test1.deleteAttributeAt(0);
train2.deleteAttributeAt(0);
test2.deleteAttributeAt(0);
}
if (missingLevel > 0) {
addMissing(train1, missingLevel, predictorMissing, classMissing);
addMissing(test1, Math.min(missingLevel,50), predictorMissing,
classMissing);
addMissing(train2, missingLevel, predictorMissing, classMissing);
addMissing(test2, Math.min(missingLevel,50), predictorMissing,
classMissing);
}
classifier = Classifier.makeCopies(getClassifier(), 1)[0];
evaluation1A = new Evaluation(train1);
evaluation1B = new Evaluation(train1);
evaluation2 = new Evaluation(train2);
} catch (Exception ex) {
throw new Error("Error setting up for tests: " + ex.getMessage());
}
try {
stage = 0;
classifier.buildClassifier(train1);
built = true;
if (!testWRTZeroR(classifier, evaluation1A, train1, test1)[0]) {
throw new Exception("Scheme performs worse than ZeroR");
}
stage = 1;
built = false;
classifier.buildClassifier(train2);
built = true;
if (!testWRTZeroR(classifier, evaluation2, train2, test2)[0]) {
throw new Exception("Scheme performs worse than ZeroR");
}
stage = 2;
built = false;
classifier.buildClassifier(train1);
built = true;
if (!testWRTZeroR(classifier, evaluation1B, train1, test1)[0]) {
throw new Exception("Scheme performs worse than ZeroR");
}
stage = 3;
if (!evaluation1A.equals(evaluation1B)) {
if (m_Debug) {
println("\n=== Full report ===\n"
+ evaluation1A.toSummaryString("\nFirst buildClassifier()",
true)
+ "\n\n");
println(
evaluation1B.toSummaryString("\nSecond buildClassifier()",
true)
+ "\n\n");
}
throw new Exception("Results differ between buildClassifier calls");
}
println("yes");
result[0] = true;
if (false && m_Debug) {
println("\n=== Full report ===\n"
+ evaluation1A.toSummaryString("\nFirst buildClassifier()",
true)
+ "\n\n");
println(
evaluation1B.toSummaryString("\nSecond buildClassifier()",
true)
+ "\n\n");
}
}
catch (Exception ex) {
String msg = ex.getMessage().toLowerCase();
if (msg.indexOf("worse than zeror") >= 0) {
println("warning: performs worse than ZeroR");
result[1] = true;
} else {
println("no");
result[0] = false;
}
if (m_Debug) {
println("\n=== Full Report ===");
print("Problem during");
if (built) {
print(" testing");
} else {
print(" training");
}
switch (stage) {
case 0:
print(" of dataset 1");
break;
case 1:
print(" of dataset 2");
break;
case 2:
print(" of dataset 1 (2nd build)");
break;
case 3:
print(", comparing results from builds of dataset 1");
break;
}
println(": " + ex.getMessage() + "\n");
println("here are the datasets:\n");
println("=== Train1 Dataset ===\n"
+ train1.toString() + "\n");
println("=== Test1 Dataset ===\n"
+ test1.toString() + "\n\n");
println("=== Train2 Dataset ===\n"
+ train2.toString() + "\n");
println("=== Test2 Dataset ===\n"
+ test2.toString() + "\n\n");
}
}
return result;
}
/**
* Checks basic missing value handling of the scheme. If the missing
* values cause an exception to be thrown by the scheme, this will be
* recorded.
*
* @param nominalPredictor if true use nominal predictor attributes
* @param numericPredictor if true use numeric predictor attributes
* @param stringPredictor if true use string predictor attributes
* @param numericClass if true use a numeric class attribute otherwise a
* nominal class attribute
* @param predictorMissing true if the missing values may be in
* the predictors
* @param classMissing true if the missing values may be in the class
* @param level the percentage of missing values
* @return index 0 is true if the test was passed, index 1 is true if test
* was acceptable
*/
protected boolean[] canHandleMissing(boolean nominalPredictor,
boolean numericPredictor,
boolean stringPredictor,
boolean numericClass,
boolean predictorMissing,
boolean classMissing,
int missingLevel) {
if (missingLevel == 100)
print("100% ");
print("missing");
if (predictorMissing) {
print(" predictor");
if (classMissing)
print(" and");
}
if (classMissing)
print(" class");
print(" values");
printAttributeSummary(
nominalPredictor, numericPredictor, stringPredictor, numericClass);
print("...");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -