📄 checkclassifier.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
    FastVector accepts = new FastVector();
    accepts.addElement("missing");
    accepts.addElement("value");
    accepts.addElement("train");
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
        numClasses = 2;

    return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, 
                        numericClass, 
			missingLevel, predictorMissing, classMissing,
			numTrain, numTest, numClasses, 
			accepts);
  }

  /**
   * Checks whether an updateable scheme produces the same model when
   * trained incrementally as when batch trained. The model itself
   * cannot be compared, so we compare the evaluation on test data
   * for both models. It is possible to get a false positive on this
   * test (likelihood depends on the classifier).
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param stringPredictor if true use string predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @return index 0 is true if the test was passed
   */
  protected boolean[] updatingEquality(boolean nominalPredictor,
				       boolean numericPredictor, 
                                       boolean stringPredictor, 
				       boolean numericClass) {

    print("incremental training produces the same results"
		     + " as batch training");
    printAttributeSummary(
        nominalPredictor, numericPredictor, stringPredictor, numericClass);
    print("...");
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
        numClasses = 2, missingLevel = 0;
    boolean predictorMissing = false, classMissing = false;

    boolean[] result = new boolean[2];
    Instances train = null;
    Instances test = null;
    Classifier [] classifiers = null;
    Evaluation evaluationB = null;
    Evaluation evaluationI = null;
    boolean built = false;
    try {
      train = makeTestDataset(42, numTrain, 
			      nominalPredictor ? 2 : 0,
			      numericPredictor ? 1 : 0, 
			      stringPredictor ? 1 : 0, 
			      numClasses, 
			      numericClass);
      test = makeTestDataset(24, numTest,
			     nominalPredictor ? 2 : 0,
			     numericPredictor ? 1 : 0, 
			     stringPredictor ? 1 : 0, 
			     numClasses, 
			     numericClass);
      if (nominalPredictor) {
	train.deleteAttributeAt(0);
	test.deleteAttributeAt(0);
      }
      if (missingLevel > 0) {
	addMissing(train, missingLevel, predictorMissing, classMissing);
	addMissing(test, Math.min(missingLevel, 50), predictorMissing, 
		   classMissing);
      }
      classifiers = Classifier.makeCopies(getClassifier(), 2);
      evaluationB = new Evaluation(train);
      evaluationI = new Evaluation(train);
      classifiers[0].buildClassifier(train);
      testWRTZeroR(classifiers[0], evaluationB, train, test);
    } catch (Exception ex) {
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {
      classifiers[1].buildClassifier(new Instances(train, 0));
      for (int i = 0; i < train.numInstances(); i++) {
	((UpdateableClassifier)classifiers[1]).updateClassifier(
             train.instance(i));
      }
      built = true;
      testWRTZeroR(classifiers[1], evaluationI, train, test);
      if (!evaluationB.equals(evaluationI)) {
	println("no");
        result[0] = false;

	if (m_Debug) {
	  println("\n=== Full Report ===");
	  println("Results differ between batch and "
			     + "incrementally built models.\n"
			     + "Depending on the classifier, this may be OK");
	  println("Here are the results:\n");
	  println(evaluationB.toSummaryString(
			     "\nbatch built results\n", true));
	  println(evaluationI.toSummaryString(
                             "\nincrementally built results\n", true));
	  println("Here are the datasets:\n");
	  println("=== Train Dataset ===\n"
			     + train.toString() + "\n");
	  println("=== Test Dataset ===\n"
			     + test.toString() + "\n\n");
	}
      }
      else {
        println("yes");
        result[0] = true;
      }
    } catch (Exception ex) {
      result[0] = false;

      print("Problem during");
      if (built)
	print(" testing");
      else
	print(" training");
      println(": " + ex.getMessage() + "\n");
    }

    return result;
  }

  /**
   * Checks whether the classifier erroneously uses the class
   * value of test instances (if provided). Runs the classifier with
   * test instance class values set to missing and compares with results
   * when test instance class values are left intact.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param stringPredictor if true use string predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @return index 0 is true if the test was passed
   */
  protected boolean[] doesntUseTestClassVal(boolean nominalPredictor,
					    boolean numericPredictor, 
                                            boolean stringPredictor, 
					    boolean numericClass) {

    print("classifier ignores test instance class vals");
    printAttributeSummary(
        nominalPredictor, numericPredictor, stringPredictor, numericClass);
    print("...");
    int numTrain = 2*getNumInstances(), numTest = getNumInstances(), 
        numClasses = 2, missingLevel = 0;
    boolean predictorMissing = false, classMissing = false;

    boolean[] result = new boolean[2];
    Instances train = null;
    Instances test = null;
    Classifier [] classifiers = null;
    Evaluation evaluationB = null;
    Evaluation evaluationI = null;
    boolean evalFail = false;
    try {
      train = makeTestDataset(42, numTrain, 
			      nominalPredictor ? 3 : 0,
			      numericPredictor ? 2 : 0, 
			      stringPredictor ? 1 : 0, 
			      numClasses, 
			      numericClass);
      test = makeTestDataset(24, numTest,
			     nominalPredictor ? 3 : 0,
			     numericPredictor ? 2 : 0, 
			     stringPredictor ? 1 : 0, 
			     numClasses, 
			     numericClass);
      if (nominalPredictor) {
	train.deleteAttributeAt(0);
	test.deleteAttributeAt(0);
      }
      if (missingLevel > 0) {
	addMissing(train, missingLevel, predictorMissing, classMissing);
	addMissing(test, Math.min(missingLevel, 50), predictorMissing, 
		   classMissing);
      }
      classifiers = Classifier.makeCopies(getClassifier(), 2);
      evaluationB = new Evaluation(train);
      evaluationI = new Evaluation(train);
      classifiers[0].buildClassifier(train);
      classifiers[1].buildClassifier(train);
    } catch (Exception ex) {
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {

      // Now set test values to missing when predicting
      for (int i = 0; i < test.numInstances(); i++) {
	Instance testInst = test.instance(i);
	Instance classMissingInst = (Instance)testInst.copy();
        classMissingInst.setDataset(test);
	classMissingInst.setClassMissing();
	double [] dist0 = classifiers[0].distributionForInstance(testInst);
	double [] dist1 = classifiers[1].distributionForInstance(classMissingInst);
	for (int j = 0; j < dist0.length; j++) {
	  if (dist0[j] != dist1[j]) {
	    throw new Exception("Prediction different for instance " 
				+ (i + 1));
	  }
	}
      }

      println("yes");
      result[0] = true;
    } catch (Exception ex) {
      println("no");
      result[0] = false;

      if (m_Debug) {
	println("\n=== Full Report ===");
	
	if (evalFail) {
	  println("Results differ between non-missing and "
			     + "missing test class values.");
	} else {
	  print("Problem during testing");
	  println(": " + ex.getMessage() + "\n");
	}
	println("Here are the datasets:\n");
	println("=== Train Dataset ===\n"
			   + train.toString() + "\n");
	println("=== Train Weights ===\n");
	for (int i = 0; i < train.numInstances(); i++) {
	  println(" " + (i + 1) 
			     + "    " + train.instance(i).weight());
	}
	println("=== Test Dataset ===\n"
			   + test.toString() + "\n\n");	
	println("(test weights all 1.0\n");
      }
    }

    return result;
  }

  /**
   * Checks whether the classifier can handle instance weights.
   * This test compares the classifier performance on two datasets
   * that are identical except for the training weights. If the 
   * results change, then the classifier must be using the weights. It
   * may be possible to get a false positive from this test if the 
   * weight changes aren't significant enough to induce a change
   * in classifier performance (but the weights are chosen to minimize
   * the likelihood of this).
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param stringPredictor if true use string predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @return index 0 true if the test was passed
   */
  protected boolean[] instanceWeights(boolean nominalPredictor,
				      boolean numericPredictor, 
                                      boolean stringPredictor, 
				      boolean numericClass) {

    print("classifier uses instance weights");
    printAttributeSummary(
        nominalPredictor, numericPredictor, stringPredictor, numericClass);
    print("...");
    int numTrain = 2*getNumInstances(), numTest = getNumInstances(), 
        numClasses = 2, missingLevel = 0;
    boolean predictorMissing = false, classMissing = false;

    boolean[] result = new boolean[2];
    Instances train = null;
    Instances test = null;
    Classifier [] classifiers = null;
    Evaluation evaluationB = null;
    Evaluation evaluationI = null;
    boolean built = false;
    boolean evalFail = false;
    try {
      train = makeTestDataset(42, numTrain, 
			      nominalPredictor ? 3 : 0,
			      numericPredictor ? 2 : 0, 
			      stringPredictor ? 1 : 0, 
			      numClasses, 
			      numericClass);
      test = makeTestDataset(24, numTest,
			     nominalPredictor ? 3 : 0,
			     numericPredictor ? 2 : 0, 
			     stringPredictor ? 1 : 0, 
			     numClasses, 
			     numericClass);
      if (nominalPredictor) {
	train.deleteAttributeAt(0);
	test.deleteAttributeAt(0);
      }
      if (missingLevel > 0) {
	addMissing(train, missingLevel, predictorMissing, classMissing);
	addMissing(test, Math.min(missingLevel, 50), predictorMissing, 
		   classMissing);
      }
      classifiers = Classifier.makeCopies(getClassifier(), 2);
      evaluationB = new Evaluation(train);
      evaluationI = new Evaluation(train);
      classifiers[0].buildClassifier(train);
      testWRTZeroR(classifiers[0], evaluationB, train, test);
    } catch (Exception ex) {
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {

      // Now modify instance weights and re-built/test
      for (int i = 0; i < train.numInstances(); i++) {
	train.instance(i).setWeight(0);
      }
      Random random = new Random(1);
      for (int i = 0; i < train.numInstances() / 2; i++) {
	int inst = Math.abs(random.nextInt()) % train.numInstances();
	int weight = Math.abs(random.nextInt()) % 10 + 1;
	train.instance(inst).setWeight(weight);
      }
      classifiers[1].buildClassifier(train);
      built = true;
      testWRTZeroR(classifiers[1], evaluationI, train, test);
      if (evaluationB.equals(evaluationI)) {
	//	println("no");
	evalFail = true;
	throw new Exception("evalFail");
      }

      println("yes");
      result[0] = true;
    } catch (Exception ex) {
      println("no");
      result[0] = false;

      if (m_Debug) {
	println("\n=== Full Report ===");
	
	if (evalFail) {
	  println("Results don't differ between non-weighted and "
			     + "weighted instance models.");
	  println("Here are the results:\n");
	  println(evaluationB.toSummaryString("\nboth methods\n",
							 true));
	} else {
	  print("Problem during");
	  if (built) {
	    print(" testing");
	  } else {
	    print(" training");
	  }
	  println(": " + ex.getMessage() + "\n");
	}
	println("Here are the datasets:\n");
	println("=== Train Dataset ===\n"
			   + train.toString() + "\n");
	println("=== Train Weights ===\n");
	for (int i = 0; i < train.numInstances(); i++) {
	  println(" " + (i + 1) 
			     + "    " + train.instance(i).weight());
	}
	println("=== Test Dataset ===\n"
			   + test.toString() + "\n\n");	
	println("(test weights all 1.0\n");
      }
    }

    return result;
  }

  /**
   * Checks whether the scheme alters the training dataset during
   * training. If the scheme needs to modify the training
   * data it should take a copy of the training data. Currently checks
   * for changes to header structure, number of instances, order of
   * instances, instance weights.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param stringPredictor if true use string predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @param predictorMissing true if we know the classifier can handle
   * (at least) moderate missing predictor values
   * @param classMissing true if we know the classifier can handle
   * (at least) moderate missing class values
   * @return index 0 is true if the test was passed
   */
  protected boolean[] datasetIntegrity(boolean nominalPredictor,
				       boolean numericPredictor, 
                                       boolean stringPredictor, 
				       boolean numericClass,
				       boolean predictorMissing,
				       boolean classMissing) {

    print("classifier doesn't alter original datasets");
    printAttributeSummary(
        nominalPredictor, numericPredictor, stringPredictor, numericClass);
    print("...");
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
        numClasses = 2, missingLevel = 20;

    boolean[] result = new boolean[2];
    Instances train = null;
    Instances test = null;
    Classifier classifier = null;
    Evaluation evaluation = null;
    boolean built = false;
    try {
      train = makeTestDataset(42, numTrain, 
			      nominalPredictor ? 2 : 0,
			      numericPredictor ? 1 : 0, 
			      stringPredictor ? 1 : 0, 
			      numClasses, 
			      numericClass);
      test = makeTestDataset(24, numTest,
			     nominalPredictor ? 2 : 0,
			     numericPredictor ? 1 : 0, 
			     stringPredictor ? 1 : 0, 
			     numClasses, 
			     numericClass);
      if (nominalPredictor) {
	train.deleteAttributeAt(0);
	test.deleteAttributeAt(0);
      }
      if (missingLevel > 0) {
	addMissing(train, missingLevel, predictorMissing, classMissing);
	addMissing(test, Math.min(missingLevel, 50), predictorMissing, 
		   classMissing);
      }
      classifier = Classifier.makeCopies(getClassifier(), 1)[0];
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -