📄 checkclassifier.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
      if (handleMissingPredictors)
	canHandleMissing(PNom, PNum, PStr, numericClass, true, false, 100);

      boolean handleMissingClass = canHandleMissing(PNom, PNum, PStr,
                                                    numericClass, 
						    false, true, 20)[0];
      if (handleMissingClass)
	canHandleMissing(PNom, PNum, PStr, numericClass, false, true, 100);

      correctBuildInitialisation(PNom, PNum, PStr, numericClass);
      datasetIntegrity(PNom, PNum, PStr, numericClass,
		       handleMissingPredictors, handleMissingClass);
      doesntUseTestClassVal(PNom, PNum, PStr, numericClass);
      if (updateable)
	updatingEquality(PNom, PNum, PStr, numericClass);
    }
  }

  /**
   * Checks whether the scheme can take command line options.
   *
   * @return index 0 is true if the classifier can take options
   */
  protected boolean[] canTakeOptions() {

    boolean[] result = new boolean[2];
    
    print("options...");
    if (m_Classifier instanceof OptionHandler) {
      println("yes");
      if (m_Debug) {
	println("\n=== Full report ===");
	Enumeration enu = ((OptionHandler)m_Classifier).listOptions();
	while (enu.hasMoreElements()) {
	  Option option = (Option) enu.nextElement();
	  print(option.synopsis() + "\n" 
			   + option.description() + "\n");
	}
	println("\n");
      }
      result[0] = true;
    }
    else {
      println("no");
      result[0] = false;
    }

    return result;
  }

  /**
   * Checks whether the scheme can build models incrementally.
   *
   * @return index 0 is true if the classifier can train incrementally
   */
  protected boolean[] updateableClassifier() {

    boolean[] result = new boolean[2];
    
    print("updateable classifier...");
    if (m_Classifier instanceof UpdateableClassifier) {
      println("yes");
      result[0] = true;
    }
    else {
      println("no");
      result[0] = false;
    }

    return result;
  }

  /**
   * Checks whether the scheme says it can handle instance weights.
   *
   * @return true if the classifier handles instance weights
   */
  protected boolean[] weightedInstancesHandler() {

    boolean[] result = new boolean[2];
    
    print("weighted instances classifier...");
    if (m_Classifier instanceof WeightedInstancesHandler) {
      println("yes");
      result[0] = true;
    }
    else {
      println("no");
      result[0] = false;
    }

    return result;
  }

  /**
   * Checks basic prediction of the scheme, for simple non-troublesome
   * datasets.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param stringPredictor if true use string predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   */
  protected boolean[] canPredict(boolean nominalPredictor,
			         boolean numericPredictor, 
                                 boolean stringPredictor, 
			         boolean numericClass) {

    print("basic predict");
    printAttributeSummary(
        nominalPredictor, numericPredictor, stringPredictor, numericClass);
    print("...");
    FastVector accepts = new FastVector();
    accepts.addElement("nominal");
    accepts.addElement("numeric");
    accepts.addElement("string");
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
        numClasses = 2, missingLevel = 0;
    boolean predictorMissing = false, classMissing = false;

    return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, 
                        numericClass, 
			missingLevel, predictorMissing, classMissing,
			numTrain, numTest, numClasses, 
			accepts);
  }

  /**
   * Checks whether nominal schemes can handle more than two classes.
   * If a scheme is only designed for two-class problems it should
   * throw an appropriate exception for multi-class problems.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param stringPredictor if true use string predictor attributes
   * @param numClasses the number of classes to test
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   */
  protected boolean[] canHandleNClasses(boolean nominalPredictor,
				        boolean numericPredictor, 
                                        boolean stringPredictor, 
				        int numClasses) {

    print("more than two class problems");
    printAttributeSummary(
        nominalPredictor, numericPredictor, stringPredictor, false);
    print("...");
    FastVector accepts = new FastVector();
    accepts.addElement("number");
    accepts.addElement("class");
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
        missingLevel = 0;
    boolean predictorMissing = false, classMissing = false;

    return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, 
                        false,
			missingLevel, predictorMissing, classMissing,
			numTrain, numTest, numClasses, 
			accepts);
  }

  /**
   * Checks whether the scheme can handle zero training instances.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param stringPredictor if true use string predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   */
  protected boolean[] canHandleZeroTraining(boolean nominalPredictor,
					    boolean numericPredictor, 
                                            boolean stringPredictor, 
					    boolean numericClass) {

    print("handle zero training instances");
    printAttributeSummary(
        nominalPredictor, numericPredictor, stringPredictor, numericClass);
    print("...");
    FastVector accepts = new FastVector();
    accepts.addElement("train");
    accepts.addElement("training");
    accepts.addElement("value");
    int numTrain = 0, numTest = getNumInstances(), numClasses = 2, 
        missingLevel = 0;
    boolean predictorMissing = false, classMissing = false;

    return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, 
                        numericClass, 
			missingLevel, predictorMissing, classMissing,
			numTrain, numTest, numClasses, 
			accepts);
  }

  /**
   * Checks whether the scheme correctly initialises models when 
   * buildClassifier is called. This test calls buildClassifier with
   * one training dataset and records performance on a test set. 
   * buildClassifier is then called on a training set with different
   * structure, and then again with the original training set. The
   * performance on the test set is compared with the original results
   * and any performance difference noted as incorrect build initialisation.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param stringPredictor if true use string predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @return index 0 is true if the test was passed, index 1 is true if the
   *         scheme performs worse than ZeroR, but without error (index 0 is
   *         false)
   */
  protected boolean[] correctBuildInitialisation(boolean nominalPredictor,
					         boolean numericPredictor, 
                                                 boolean stringPredictor, 
					         boolean numericClass) {

    boolean[] result = new boolean[2];

    print("correct initialisation during buildClassifier");
    printAttributeSummary(
        nominalPredictor, numericPredictor, stringPredictor, numericClass);
    print("...");
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
        numClasses = 2, missingLevel = 0;
    boolean predictorMissing = false, classMissing = false;

    Instances train1 = null;
    Instances test1 = null;
    Instances train2 = null;
    Instances test2 = null;
    Classifier classifier = null;
    Evaluation evaluation1A = null;
    Evaluation evaluation1B = null;
    Evaluation evaluation2 = null;
    boolean built = false;
    int stage = 0;
    try {

      // Make two sets of train/test splits with different 
      // numbers of attributes
      train1 = makeTestDataset(42, numTrain, 
			       nominalPredictor ? 2 : 0,
			       numericPredictor ? 1 : 0, 
			       stringPredictor ? 1 : 0, 
			       numClasses, 
			       numericClass);
      train2 = makeTestDataset(84, numTrain, 
			       nominalPredictor ? 3 : 0,
			       numericPredictor ? 2 : 0, 
			       stringPredictor ? 1 : 0, 
			       numClasses, 
			       numericClass);
      test1 = makeTestDataset(24, numTest,
			      nominalPredictor ? 2 : 0,
			      numericPredictor ? 1 : 0, 
			      stringPredictor ? 1 : 0, 
			      numClasses, 
			      numericClass);
      test2 = makeTestDataset(48, numTest,
			      nominalPredictor ? 3 : 0,
			      numericPredictor ? 2 : 0, 
			      stringPredictor ? 1 : 0, 
			      numClasses, 
			      numericClass);
      if (nominalPredictor) {
	train1.deleteAttributeAt(0);
	test1.deleteAttributeAt(0);
	train2.deleteAttributeAt(0);
	test2.deleteAttributeAt(0);
      }
      if (missingLevel > 0) {
	addMissing(train1, missingLevel, predictorMissing, classMissing);
	addMissing(test1, Math.min(missingLevel,50), predictorMissing, 
		   classMissing);
	addMissing(train2, missingLevel, predictorMissing, classMissing);
	addMissing(test2, Math.min(missingLevel,50), predictorMissing, 
		   classMissing);
      }

      classifier = Classifier.makeCopies(getClassifier(), 1)[0];
      evaluation1A = new Evaluation(train1);
      evaluation1B = new Evaluation(train1);
      evaluation2 = new Evaluation(train2);
    } catch (Exception ex) {
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {
      stage = 0;
      classifier.buildClassifier(train1);
      built = true;
      if (!testWRTZeroR(classifier, evaluation1A, train1, test1)[0]) {
	throw new Exception("Scheme performs worse than ZeroR");
      }

      stage = 1;
      built = false;
      classifier.buildClassifier(train2);
      built = true;
      if (!testWRTZeroR(classifier, evaluation2, train2, test2)[0]) {
	throw new Exception("Scheme performs worse than ZeroR");
      }

      stage = 2;
      built = false;
      classifier.buildClassifier(train1);
      built = true;
      if (!testWRTZeroR(classifier, evaluation1B, train1, test1)[0]) {
	throw new Exception("Scheme performs worse than ZeroR");
      }

      stage = 3;
      if (!evaluation1A.equals(evaluation1B)) {
	if (m_Debug) {
	  println("\n=== Full report ===\n"
		+ evaluation1A.toSummaryString("\nFirst buildClassifier()",
					       true)
		+ "\n\n");
	  println(
                evaluation1B.toSummaryString("\nSecond buildClassifier()",
					     true)
		+ "\n\n");
	}
	throw new Exception("Results differ between buildClassifier calls");
      }
      println("yes");
      result[0] = true;

      if (false && m_Debug) {
	println("\n=== Full report ===\n"
                + evaluation1A.toSummaryString("\nFirst buildClassifier()",
					       true)
		+ "\n\n");
	println(
                evaluation1B.toSummaryString("\nSecond buildClassifier()",
					     true)
		+ "\n\n");
      }
    } 
    catch (Exception ex) {
      String msg = ex.getMessage().toLowerCase();
      if (msg.indexOf("worse than zeror") >= 0) {
	println("warning: performs worse than ZeroR");
        result[1] = true;
      } else {
	println("no");
        result[0] = false;
      }
      if (m_Debug) {
	println("\n=== Full Report ===");
	print("Problem during");
	if (built) {
	  print(" testing");
	} else {
	  print(" training");
	}
	switch (stage) {
	case 0:
	  print(" of dataset 1");
	  break;
	case 1:
	  print(" of dataset 2");
	  break;
	case 2:
	  print(" of dataset 1 (2nd build)");
	  break;
	case 3:
	  print(", comparing results from builds of dataset 1");
	  break;	  
	}
	println(": " + ex.getMessage() + "\n");
	println("here are the datasets:\n");
	println("=== Train1 Dataset ===\n"
			   + train1.toString() + "\n");
	println("=== Test1 Dataset ===\n"
			   + test1.toString() + "\n\n");
	println("=== Train2 Dataset ===\n"
			   + train2.toString() + "\n");
	println("=== Test2 Dataset ===\n"
			   + test2.toString() + "\n\n");
      }
    }

    return result;
  }

  /**
   * Checks basic missing value handling of the scheme. If the missing
   * values cause an exception to be thrown by the scheme, this will be
   * recorded.
   *
   * @param nominalPredictor if true use nominal predictor attributes
   * @param numericPredictor if true use numeric predictor attributes
   * @param stringPredictor if true use string predictor attributes
   * @param numericClass if true use a numeric class attribute otherwise a
   * nominal class attribute
   * @param predictorMissing true if the missing values may be in 
   * the predictors
   * @param classMissing true if the missing values may be in the class
   * @param level the percentage of missing values
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   */
  protected boolean[] canHandleMissing(boolean nominalPredictor,
				       boolean numericPredictor, 
                                       boolean stringPredictor, 
				       boolean numericClass,
				       boolean predictorMissing,
				       boolean classMissing,
				       int missingLevel) {

    if (missingLevel == 100)
      print("100% ");
    print("missing");
    if (predictorMissing) {
      print(" predictor");
      if (classMissing)
	print(" and");
    }
    if (classMissing)
      print(" class");
    print(" values");
    printAttributeSummary(
        nominalPredictor, numericPredictor, stringPredictor, numericClass);
    print("...");
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -