📄 checkclassifier.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
            print(" of dataset 1");            break;          case 1:            print(" of dataset 2");            break;          case 2:            print(" of dataset 1 (2nd build)");            break;          case 3:            print(", comparing results from builds of dataset 1");            break;	          }        println(": " + ex.getMessage() + "\n");        println("here are the datasets:\n");        println("=== Train1 Dataset ===\n"            + train1.toString() + "\n");        println("=== Test1 Dataset ===\n"            + test1.toString() + "\n\n");        println("=== Train2 Dataset ===\n"            + train2.toString() + "\n");        println("=== Test2 Dataset ===\n"            + test2.toString() + "\n\n");      }    }        return result;  }    /**   * Checks basic missing value handling of the scheme. If the missing   * values cause an exception to be thrown by the scheme, this will be   * recorded.   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param classType the class type (NUMERIC, NOMINAL, etc.)   * @param predictorMissing true if the missing values may be in    * the predictors   * @param classMissing true if the missing values may be in the class   * @param missingLevel the percentage of missing values   * @return index 0 is true if the test was passed, index 1 is true if test    *         was acceptable   */  protected boolean[] canHandleMissing(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType,      boolean predictorMissing,      boolean classMissing,      int missingLevel) {        if (missingLevel == 100)      print("100% ");    print("missing");    if (predictorMissing) {      print(" predictor");      if (classMissing)        print(" and");    }    if (classMissing)      print(" class");    print(" values");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);    print("...");    FastVector accepts = new FastVector();    accepts.addElement("missing");    accepts.addElement("value");    accepts.addElement("train");    int numTrain = getNumInstances(), numTest = getNumInstances(),     numClasses = 2;        return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,         datePredictor, relationalPredictor,         multiInstance,        classType,         missingLevel, predictorMissing, classMissing,        numTrain, numTest, numClasses,         accepts);  }    /**   * Checks whether an updateable scheme produces the same model when   * trained incrementally as when batch trained. The model itself   * cannot be compared, so we compare the evaluation on test data   * for both models. It is possible to get a false positive on this   * test (likelihood depends on the classifier).   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param classType the class type (NUMERIC, NOMINAL, etc.)   * @return index 0 is true if the test was passed   */  protected boolean[] updatingEquality(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType) {        print("incremental training produces the same results"        + " as batch training");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);    print("...");    int numTrain = getNumInstances(), numTest = getNumInstances(),     numClasses = 2, missingLevel = 0;    boolean predictorMissing = false, classMissing = false;        boolean[] result = new boolean[2];    Instances train = null;    Instances test = null;    Classifier [] classifiers = null;    Evaluation evaluationB = null;    Evaluation evaluationI = null;    boolean built = false;    try {      train = makeTestDataset(42, numTrain,                               nominalPredictor ? 2 : 0,                              numericPredictor ? 1 : 0,                               stringPredictor ? 1 : 0,                               datePredictor ? 1 : 0,                               relationalPredictor ? 1 : 0,                               numClasses,                               classType,                              multiInstance);      test = makeTestDataset(24, numTest,                             nominalPredictor ? 2 : 0,                             numericPredictor ? 1 : 0,                              stringPredictor ? 1 : 0,                              datePredictor ? 1 : 0,                              relationalPredictor ? 1 : 0,                              numClasses,                              classType,                             multiInstance);      if (missingLevel > 0) {        addMissing(train, missingLevel, predictorMissing, classMissing);        addMissing(test, Math.min(missingLevel, 50), predictorMissing,             classMissing);      }      classifiers = Classifier.makeCopies(getClassifier(), 2);      evaluationB = new Evaluation(train);      evaluationI = new Evaluation(train);      classifiers[0].buildClassifier(train);      testWRTZeroR(classifiers[0], evaluationB, train, test);    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {      classifiers[1].buildClassifier(new Instances(train, 0));      for (int i = 0; i < train.numInstances(); i++) {        ((UpdateableClassifier)classifiers[1]).updateClassifier(            train.instance(i));      }      built = true;      testWRTZeroR(classifiers[1], evaluationI, train, test);      if (!evaluationB.equals(evaluationI)) {        println("no");        result[0] = false;                if (m_Debug) {          println("\n=== Full Report ===");          println("Results differ between batch and "              + "incrementally built models.\n"              + "Depending on the classifier, this may be OK");          println("Here are the results:\n");          println(evaluationB.toSummaryString(              "\nbatch built results\n", true));          println(evaluationI.toSummaryString(              "\nincrementally built results\n", true));          println("Here are the datasets:\n");          println("=== Train Dataset ===\n"              + train.toString() + "\n");          println("=== Test Dataset ===\n"              + test.toString() + "\n\n");        }      }      else {        println("yes");        result[0] = true;      }    } catch (Exception ex) {      result[0] = false;            print("Problem during");      if (built)        print(" testing");      else        print(" training");      println(": " + ex.getMessage() + "\n");    }        return result;  }    /**   * Checks whether the classifier erroneously uses the class   * value of test instances (if provided). Runs the classifier with   * test instance class values set to missing and compares with results   * when test instance class values are left intact.   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param classType the class type (NUMERIC, NOMINAL, etc.)   * @return index 0 is true if the test was passed   */  protected boolean[] doesntUseTestClassVal(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType) {        print("classifier ignores test instance class vals");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);    print("...");    int numTrain = 2*getNumInstances(), numTest = getNumInstances(),     numClasses = 2, missingLevel = 0;    boolean predictorMissing = false, classMissing = false;        boolean[] result = new boolean[2];    Instances train = null;    Instances test = null;    Classifier [] classifiers = null;    boolean evalFail = false;    try {      train = makeTestDataset(42, numTrain,                               nominalPredictor ? 3 : 0,                              numericPredictor ? 2 : 0,                               stringPredictor ? 1 : 0,                               datePredictor ? 1 : 0,                               relationalPredictor ? 1 : 0,                               numClasses,                               classType,                              multiInstance);      test = makeTestDataset(24, numTest,                             nominalPredictor ? 3 : 0,                             numericPredictor ? 2 : 0,                              stringPredictor ? 1 : 0,                              datePredictor ? 1 : 0,                              relationalPredictor ? 1 : 0,                              numClasses,                              classType,                             multiInstance);      if (missingLevel > 0) {        addMissing(train, missingLevel, predictorMissing, classMissing);        addMissing(test, Math.min(missingLevel, 50), predictorMissing,             classMissing);      }      classifiers = Classifier.makeCopies(getClassifier(), 2);      classifiers[0].buildClassifier(train);      classifiers[1].buildClassifier(train);    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {            // Now set test values to missing when predicting      for (int i = 0; i < test.numInstances(); i++) {        Instance testInst = test.instance(i);        Instance classMissingInst = (Instance)testInst.copy();        classMissingInst.setDataset(test);        classMissingInst.setClassMissing();        double [] dist0 = classifiers[0].distributionForInstance(testInst);        double [] dist1 = classifiers[1].distributionForInstance(classMissingInst);        for (int j = 0; j < dist0.length; j++) {          // ignore, if both are NaNs          if (Double.isNaN(dist0[j]) && Double.isNaN(dist1[j])) {            if (getDebug())              System.out.println("Both predictions are NaN!");            continue;          }          // distribution different?          if (dist0[j] != dist1[j]) {            throw new Exception("Prediction different for instance " + (i + 1));          }        }      }            println("yes");      result[0] = true;    } catch (Exception ex) {      println("no");      result[0] = false;            if (m_Debug) {        println("\n=== Full Report ===");                if (evalFail) {          println("Results differ between non-missing and "              + "missing test class values.");        } else {          print("Problem during testing");          println(": " + ex.getMessage() + "\n");        }        println("Here are the datasets:\n");        println("=== Train Dataset ===\n"            + train.toString() + "\n");        println("=== Train Weights ===\n");        for (int i = 0; i < train.numInstances(); i++) {          println(" " + (i + 1)               + "    " + train.instance(i).weight());        }        println("=== Test Dataset ===\n"            + test.toString() + "\n\n");	        println("(test weights all 1.0\n");      }    }        return result;  }    /**   * Checks whether the classifier can handle instance weights.   * This test compares the classifier performance on two datasets   * that are identical except for the training weights. If the    * results change, then the classifier must be using the weights. It   * may be possible to get a false positive from this test if the    * weight changes aren't significant enough to induce a change   * in classifier performance (but the weights are chosen to minimize   * the likelihood of this).   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param classType the class type (NUMERIC, NOMINAL, etc.)   * @return index 0 true if the test was passed   */  protected boolean[] instanceWeights(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType) {        print("classifier uses instance weights");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);    print("...");    int numTrain = 2*getNumInstances(), numTest = getNumInstances(),     numClasses = 2, missingLevel = 0;    boolean predictorMissing = false, classMissing = false;        boolean[] result = new boolean[2];
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -