📄 checkclassifier.java
字号:
print(" of dataset 1"); break; case 1: print(" of dataset 2"); break; case 2: print(" of dataset 1 (2nd build)"); break; case 3: print(", comparing results from builds of dataset 1"); break; } println(": " + ex.getMessage() + "\n"); println("here are the datasets:\n"); println("=== Train1 Dataset ===\n" + train1.toString() + "\n"); println("=== Test1 Dataset ===\n" + test1.toString() + "\n\n"); println("=== Train2 Dataset ===\n" + train2.toString() + "\n"); println("=== Test2 Dataset ===\n" + test2.toString() + "\n\n"); } } return result; } /** * Checks basic missing value handling of the scheme. If the missing * values cause an exception to be thrown by the scheme, this will be * recorded. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param predictorMissing true if the missing values may be in * the predictors * @param classMissing true if the missing values may be in the class * @param missingLevel the percentage of missing values * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] canHandleMissing( boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType, boolean predictorMissing, boolean classMissing, int missingLevel) { if (missingLevel == 100) print("100% "); print("missing"); if (predictorMissing) { print(" predictor"); if (classMissing) print(" and"); } if (classMissing) print(" class"); print(" values"); printAttributeSummary( nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); FastVector accepts = new FastVector(); accepts.addElement("missing"); accepts.addElement("value"); accepts.addElement("train"); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2; return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType, missingLevel, predictorMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Checks whether an updateable scheme produces the same model when * trained incrementally as when batch trained. The model itself * cannot be compared, so we compare the evaluation on test data * for both models. It is possible to get a false positive on this * test (likelihood depends on the classifier). * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 is true if the test was passed */ protected boolean[] updatingEquality( boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) { print("incremental training produces the same results" + " as batch training"); printAttributeSummary( nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean predictorMissing = false, classMissing = false; boolean[] result = new boolean[2]; Instances train = null; Instances test = null; Classifier [] classifiers = null; Evaluation evaluationB = null; Evaluation evaluationI = null; boolean built = false; try { train = makeTestDataset(42, numTrain, nominalPredictor ? 2 : 0, numericPredictor ? 1 : 0, stringPredictor ? 1 : 0, datePredictor ? 1 : 0, relationalPredictor ? 1 : 0, numClasses, classType, multiInstance); test = makeTestDataset(24, numTest, nominalPredictor ? 2 : 0, numericPredictor ? 1 : 0, stringPredictor ? 1 : 0, datePredictor ? 1 : 0, relationalPredictor ? 1 : 0, numClasses, classType, multiInstance); if (missingLevel > 0) { addMissing(train, missingLevel, predictorMissing, classMissing); addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing); } classifiers = Classifier.makeCopies(getClassifier(), 2); evaluationB = new Evaluation(train); evaluationI = new Evaluation(train); classifiers[0].buildClassifier(train); testWRTZeroR(classifiers[0], evaluationB, train, test); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { classifiers[1].buildClassifier(new Instances(train, 0)); for (int i = 0; i < train.numInstances(); i++) { ((UpdateableClassifier)classifiers[1]).updateClassifier( train.instance(i)); } built = true; testWRTZeroR(classifiers[1], evaluationI, train, test); if (!evaluationB.equals(evaluationI)) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); println("Results differ between batch and " + "incrementally built models.\n" + "Depending on the classifier, this may be OK"); println("Here are the results:\n"); println(evaluationB.toSummaryString( "\nbatch built results\n", true)); println(evaluationI.toSummaryString( "\nincrementally built results\n", true)); println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Test Dataset ===\n" + test.toString() + "\n\n"); } } else { println("yes"); result[0] = true; } } catch (Exception ex) { result[0] = false; print("Problem during"); if (built) print(" testing"); else print(" training"); println(": " + ex.getMessage() + "\n"); } return result; } /** * Checks whether the classifier erroneously uses the class * value of test instances (if provided). Runs the classifier with * test instance class values set to missing and compares with results * when test instance class values are left intact. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 is true if the test was passed */ protected boolean[] doesntUseTestClassVal( boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) { print("classifier ignores test instance class vals"); printAttributeSummary( nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); int numTrain = 2*getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean predictorMissing = false, classMissing = false; boolean[] result = new boolean[2]; Instances train = null; Instances test = null; Classifier [] classifiers = null; boolean evalFail = false; try { train = makeTestDataset(42, numTrain, nominalPredictor ? 3 : 0, numericPredictor ? 2 : 0, stringPredictor ? 1 : 0, datePredictor ? 1 : 0, relationalPredictor ? 1 : 0, numClasses, classType, multiInstance); test = makeTestDataset(24, numTest, nominalPredictor ? 3 : 0, numericPredictor ? 2 : 0, stringPredictor ? 1 : 0, datePredictor ? 1 : 0, relationalPredictor ? 1 : 0, numClasses, classType, multiInstance); if (missingLevel > 0) { addMissing(train, missingLevel, predictorMissing, classMissing); addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing); } classifiers = Classifier.makeCopies(getClassifier(), 2); classifiers[0].buildClassifier(train); classifiers[1].buildClassifier(train); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { // Now set test values to missing when predicting for (int i = 0; i < test.numInstances(); i++) { Instance testInst = test.instance(i); Instance classMissingInst = (Instance)testInst.copy(); classMissingInst.setDataset(test); classMissingInst.setClassMissing(); double [] dist0 = classifiers[0].distributionForInstance(testInst); double [] dist1 = classifiers[1].distributionForInstance(classMissingInst); for (int j = 0; j < dist0.length; j++) { // ignore, if both are NaNs if (Double.isNaN(dist0[j]) && Double.isNaN(dist1[j])) { if (getDebug()) System.out.println("Both predictions are NaN!"); continue; } // distribution different? if (dist0[j] != dist1[j]) { throw new Exception("Prediction different for instance " + (i + 1)); } } } println("yes"); result[0] = true; } catch (Exception ex) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); if (evalFail) { println("Results differ between non-missing and " + "missing test class values."); } else { print("Problem during testing"); println(": " + ex.getMessage() + "\n"); } println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Train Weights ===\n"); for (int i = 0; i < train.numInstances(); i++) { println(" " + (i + 1) + " " + train.instance(i).weight()); } println("=== Test Dataset ===\n" + test.toString() + "\n\n"); println("(test weights all 1.0\n"); } } return result; } /** * Checks whether the classifier can handle instance weights. * This test compares the classifier performance on two datasets * that are identical except for the training weights. If the * results change, then the classifier must be using the weights. It * may be possible to get a false positive from this test if the * weight changes aren't significant enough to induce a change * in classifier performance (but the weights are chosen to minimize * the likelihood of this). * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 true if the test was passed */ protected boolean[] instanceWeights( boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) { print("classifier uses instance weights"); printAttributeSummary( nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); int numTrain = 2*getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean predictorMissing = false, classMissing = false; boolean[] result = new boolean[2];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -