📄 checkclassifier.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
      return true;
    }
    System.out.println("no");
    return false;
  }

  /**
   * Checks whether the scheme can build models incrementally.
   *
   * @return true if the classifier can train incrementally
   */
  protected boolean updateableClassifier() {

    System.out.print("updateable classifier...");
    if (m_Classifier instanceof UpdateableClassifier) {
      System.out.println("yes");
      return true;
    }
    System.out.println("no");
    return false;
  }

  /**
   * Checks whether the scheme says it can handle instance weights.
   *
   * @return true if the classifier handles instance weights
   */
  protected boolean weightedInstancesHandler() {

    System.out.print("weighted instances classifier...");
    if (m_Classifier instanceof WeightedInstancesHandler) {
      System.out.println("yes");
      return true;
    }
    System.out.println("no");
    return false;
  }

  /**
   * Checks basic prediction of the scheme, for simple non-troublesome
   * datasets.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @return true if the test was passed
   */
  protected boolean canPredict(boolean nominalPredictor,
			       boolean numericPredictor, 
			       boolean numericClass) {

    System.out.print("basic predict");
    printAttributeSummary(nominalPredictor, numericPredictor, numericClass);
    System.out.print("...");
    FastVector accepts = new FastVector();
    accepts.addElement("nominal");
    accepts.addElement("numeric");
    int numTrain = 20, numTest = 20, numClasses = 2, missingLevel = 0;
    boolean predictorMissing = false, classMissing = false;

    return runBasicTest(nominalPredictor, numericPredictor, numericClass, 
			missingLevel, predictorMissing, classMissing,
			numTrain, numTest, numClasses, 
			accepts);
  }

  /**
   * Checks whether nominal schemes can handle more than two classes.
   * If a scheme is only designed for two-class problems it should
   * throw an appropriate exception for multi-class problems.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param numClasses the number of classes to test
   * @return true if the test was passed
   */
  protected boolean canHandleNClasses(boolean nominalPredictor,
				      boolean numericPredictor, 
				      int numClasses) {

    System.out.print("more than two class problems");
    printAttributeSummary(nominalPredictor, numericPredictor, false);
    System.out.print("...");
    FastVector accepts = new FastVector();
    accepts.addElement("number");
    accepts.addElement("class");
    int numTrain = 20, numTest = 20, missingLevel = 0;
    boolean predictorMissing = false, classMissing = false;

    return runBasicTest(nominalPredictor, numericPredictor, false, 
			missingLevel, predictorMissing, classMissing,
			numTrain, numTest, numClasses, 
			accepts);
  }

  /**
   * Checks whether the scheme can handle zero training instances.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @return true if the test was passed
   */
  protected boolean canHandleZeroTraining(boolean nominalPredictor,
					  boolean numericPredictor, 
					  boolean numericClass) {

    System.out.print("handle zero training instances");
    printAttributeSummary(nominalPredictor, numericPredictor, numericClass);
    System.out.print("...");
    FastVector accepts = new FastVector();
    accepts.addElement("train");
    accepts.addElement("value");
    int numTrain = 0, numTest = 20, numClasses = 2, missingLevel = 0;
    boolean predictorMissing = false, classMissing = false;

    return runBasicTest(nominalPredictor, numericPredictor, numericClass, 
			missingLevel, predictorMissing, classMissing,
			numTrain, numTest, numClasses, 
			accepts);
  }


  /**
   * Checks whether the scheme correctly initialises models when 
   * buildClassifier is called. This test calls buildClassifier with
   * one training dataset and records performance on a test set. 
   * buildClassifier is then called on a training set with different
   * structure, and then again with the original training set. The
   * performance on the test set is compared with the original results
   * and any performance difference noted as incorrect build initialisation.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @return true if the test was passed
   */
  protected boolean correctBuildInitialisation(boolean nominalPredictor,
					       boolean numericPredictor, 
					       boolean numericClass) {

    System.out.print("correct initialisation during buildClassifier");
    printAttributeSummary(nominalPredictor, numericPredictor, numericClass);
    System.out.print("...");
    int numTrain = 20, numTest = 20, numClasses = 2, missingLevel = 0;
    boolean predictorMissing = false, classMissing = false;

    Instances train1 = null;
    Instances test1 = null;
    Instances train2 = null;
    Instances test2 = null;
    Classifier classifier = null;
    Evaluation evaluation1A = null;
    Evaluation evaluation1B = null;
    Evaluation evaluation2 = null;
    boolean built = false;
    int stage = 0;
    try {

      // Make two sets of train/test splits with different 
      // numbers of attributes
      train1 = makeTestDataset(42, numTrain, 
			       nominalPredictor ? 2 : 0,
			       numericPredictor ? 1 : 0, 
			       numClasses, 
			       numericClass);
      train2 = makeTestDataset(84, numTrain, 
			       nominalPredictor ? 3 : 0,
			       numericPredictor ? 2 : 0, 
			       numClasses, 
			       numericClass);
      test1 = makeTestDataset(24, numTest,
			      nominalPredictor ? 2 : 0,
			      numericPredictor ? 1 : 0, 
			      numClasses, 
			      numericClass);
      test2 = makeTestDataset(48, numTest,
			      nominalPredictor ? 3 : 0,
			      numericPredictor ? 2 : 0, 
			      numClasses, 
			      numericClass);
      if (nominalPredictor) {
	train1.deleteAttributeAt(0);
	test1.deleteAttributeAt(0);
	train2.deleteAttributeAt(0);
	test2.deleteAttributeAt(0);
      }
      if (missingLevel > 0) {
	addMissing(train1, missingLevel, predictorMissing, classMissing);
	addMissing(test1, Math.min(missingLevel,50), predictorMissing, 
		   classMissing);
	addMissing(train2, missingLevel, predictorMissing, classMissing);
	addMissing(test2, Math.min(missingLevel,50), predictorMissing, 
		   classMissing);
      }

      classifier = Classifier.makeCopies(getClassifier(), 1)[0];
      evaluation1A = new Evaluation(train1);
      evaluation1B = new Evaluation(train1);
      evaluation2 = new Evaluation(train2);
    } catch (Exception ex) {
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {
      stage = 0;
      classifier.buildClassifier(train1);
      built = true;
      if (!testWRTZeroR(classifier, evaluation1A, train1, test1)) {
	throw new Exception("Scheme performs worse than ZeroR");
      }

      stage = 1;
      built = false;
      classifier.buildClassifier(train2);
      built = true;
      if (!testWRTZeroR(classifier, evaluation2, train2, test2)) {
	throw new Exception("Scheme performs worse than ZeroR");
      }

      stage = 2;
      built = false;
      classifier.buildClassifier(train1);
      built = true;
      if (!testWRTZeroR(classifier, evaluation1B, train1, test1)) {
	throw new Exception("Scheme performs worse than ZeroR");
      }

      stage = 3;
      if (!evaluation1A.equals(evaluation1B)) {
	if (m_Debug) {
	  System.out.println("\n=== Full report ===\n"
		+ evaluation1A.toSummaryString("\nFirst buildClassifier()",
					       true)
		+ "\n\n");
	  System.out.println(
                evaluation1B.toSummaryString("\nSecond buildClassifier()",
					     true)
		+ "\n\n");
	}
	throw new Exception("Results differ between buildClassifier calls");
      }
      System.out.println("yes");

      if (false && m_Debug) {
	System.out.println("\n=== Full report ===\n"
                + evaluation1A.toSummaryString("\nFirst buildClassifier()",
					       true)
		+ "\n\n");
	System.out.println(
                evaluation1B.toSummaryString("\nSecond buildClassifier()",
					     true)
		+ "\n\n");
      }
      return true;
    } catch (Exception ex) {
      String msg = ex.getMessage().toLowerCase();
      if (msg.indexOf("worse than zeror") >= 0) {
	System.out.println("warning: performs worse than ZeroR");
      } else {
	System.out.println("no");
      }
      if (m_Debug) {
	System.out.println("\n=== Full Report ===");
	System.out.print("Problem during");
	if (built) {
	  System.out.print(" testing");
	} else {
	  System.out.print(" training");
	}
	switch (stage) {
	case 0:
	  System.out.print(" of dataset 1");
	  break;
	case 1:
	  System.out.print(" of dataset 2");
	  break;
	case 2:
	  System.out.print(" of dataset 1 (2nd build)");
	  break;
	case 3:
	  System.out.print(", comparing results from builds of dataset 1");
	  break;	  
	}
	System.out.println(": " + ex.getMessage() + "\n");
	System.out.println("here are the datasets:\n");
	System.out.println("=== Train1 Dataset ===\n"
			   + train1.toString() + "\n");
	System.out.println("=== Test1 Dataset ===\n"
			   + test1.toString() + "\n\n");
	System.out.println("=== Train2 Dataset ===\n"
			   + train2.toString() + "\n");
	System.out.println("=== Test2 Dataset ===\n"
			   + test2.toString() + "\n\n");
      }
    }
    return false;
  }


  /**
   * Checks basic missing value handling of the scheme. If the missing
   * values cause an exception to be thrown by the scheme, this will be
   * recorded.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @param predictorMissing true if the missing values may be in 
   * the predictors
   * @param classMissing true if the missing values may be in the class
   * @param level the percentage of missing values
   * @return true if the test was passed
   */
  protected boolean canHandleMissing(boolean nominalPredictor,
				     boolean numericPredictor, 
				     boolean numericClass,
				     boolean predictorMissing,
				     boolean classMissing,
				     int missingLevel) {

    if (missingLevel == 100) {
      System.out.print("100% ");
    }
    System.out.print("missing");
    if (predictorMissing) {
      System.out.print(" predictor");
      if (classMissing) {
	System.out.print(" and");
      }
    }
    if (classMissing) {
      System.out.print(" class");
    }
    System.out.print(" values");
    printAttributeSummary(nominalPredictor, numericPredictor, numericClass);
    System.out.print("...");
    FastVector accepts = new FastVector();
    accepts.addElement("missing");
    accepts.addElement("value");
    accepts.addElement("train");
    int numTrain = 20, numTest = 20, numClasses = 2;

    return runBasicTest(nominalPredictor, numericPredictor, numericClass, 
			missingLevel, predictorMissing, classMissing,
			numTrain, numTest, numClasses, 
			accepts);
  }


  /**
   * Checks whether an updateable scheme produces the same model when
   * trained incrementally as when batch trained. The model itself
   * cannot be compared, so we compare the evaluation on test data
   * for both models. It is possible to get a false positive on this
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -