📄 checkclassifier.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
    Instances train = null;    Instances test = null;    Classifier [] classifiers = null;    Evaluation evaluationB = null;    Evaluation evaluationI = null;    boolean built = false;    boolean evalFail = false;    try {      train = makeTestDataset(42, numTrain,                               nominalPredictor ? 3 : 0,                              numericPredictor ? 2 : 0,                               stringPredictor ? 1 : 0,                               datePredictor ? 1 : 0,                               relationalPredictor ? 1 : 0,                               numClasses,                               classType,                              multiInstance);      test = makeTestDataset(24, numTest,                             nominalPredictor ? 3 : 0,                             numericPredictor ? 2 : 0,                              stringPredictor ? 1 : 0,                              datePredictor ? 1 : 0,                              relationalPredictor ? 1 : 0,                              numClasses,                              classType,                             multiInstance);      if (missingLevel > 0) {        addMissing(train, missingLevel, predictorMissing, classMissing);        addMissing(test, Math.min(missingLevel, 50), predictorMissing,             classMissing);      }      classifiers = Classifier.makeCopies(getClassifier(), 2);      evaluationB = new Evaluation(train);      evaluationI = new Evaluation(train);      classifiers[0].buildClassifier(train);      testWRTZeroR(classifiers[0], evaluationB, train, test);    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {            // Now modify instance weights and re-built/test      for (int i = 0; i < train.numInstances(); i++) {        train.instance(i).setWeight(0);      }      Random random = new Random(1);      for (int i = 0; i < train.numInstances() / 2; i++) {        int inst = Math.abs(random.nextInt()) % train.numInstances();        int weight = Math.abs(random.nextInt()) % 10 + 1;        train.instance(inst).setWeight(weight);      }      classifiers[1].buildClassifier(train);      built = true;      testWRTZeroR(classifiers[1], evaluationI, train, test);      if (evaluationB.equals(evaluationI)) {        //	println("no");        evalFail = true;        throw new Exception("evalFail");      }            println("yes");      result[0] = true;    } catch (Exception ex) {      println("no");      result[0] = false;            if (m_Debug) {        println("\n=== Full Report ===");                if (evalFail) {          println("Results don't differ between non-weighted and "              + "weighted instance models.");          println("Here are the results:\n");          println(evaluationB.toSummaryString("\nboth methods\n",              true));        } else {          print("Problem during");          if (built) {            print(" testing");          } else {            print(" training");          }          println(": " + ex.getMessage() + "\n");        }        println("Here are the datasets:\n");        println("=== Train Dataset ===\n"            + train.toString() + "\n");        println("=== Train Weights ===\n");        for (int i = 0; i < train.numInstances(); i++) {          println(" " + (i + 1)               + "    " + train.instance(i).weight());        }        println("=== Test Dataset ===\n"            + test.toString() + "\n\n");	        println("(test weights all 1.0\n");      }    }        return result;  }    /**   * Checks whether the scheme alters the training dataset during   * training. If the scheme needs to modify the training   * data it should take a copy of the training data. Currently checks   * for changes to header structure, number of instances, order of   * instances, instance weights.   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param classType the class type (NUMERIC, NOMINAL, etc.)   * @param predictorMissing true if we know the classifier can handle   * (at least) moderate missing predictor values   * @param classMissing true if we know the classifier can handle   * (at least) moderate missing class values   * @return index 0 is true if the test was passed   */  protected boolean[] datasetIntegrity(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType,      boolean predictorMissing,      boolean classMissing) {        print("classifier doesn't alter original datasets");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);    print("...");    int numTrain = getNumInstances(), numTest = getNumInstances(),     numClasses = 2, missingLevel = 20;        boolean[] result = new boolean[2];    Instances train = null;    Instances test = null;    Classifier classifier = null;    Evaluation evaluation = null;    boolean built = false;    try {      train = makeTestDataset(42, numTrain,                               nominalPredictor ? 2 : 0,                              numericPredictor ? 1 : 0,                               stringPredictor ? 1 : 0,                               datePredictor ? 1 : 0,                               relationalPredictor ? 1 : 0,                               numClasses,                               classType,                              multiInstance);      test = makeTestDataset(24, numTest,                             nominalPredictor ? 2 : 0,                             numericPredictor ? 1 : 0,                              stringPredictor ? 1 : 0,                              datePredictor ? 1 : 0,                              relationalPredictor ? 1 : 0,                              numClasses,                              classType,                             multiInstance);      if (missingLevel > 0) {        addMissing(train, missingLevel, predictorMissing, classMissing);        addMissing(test, Math.min(missingLevel, 50), predictorMissing,             classMissing);      }      classifier = Classifier.makeCopies(getClassifier(), 1)[0];      evaluation = new Evaluation(train);    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {      Instances trainCopy = new Instances(train);      Instances testCopy = new Instances(test);      classifier.buildClassifier(trainCopy);      compareDatasets(train, trainCopy);      built = true;      testWRTZeroR(classifier, evaluation, trainCopy, testCopy);      compareDatasets(test, testCopy);            println("yes");      result[0] = true;    } catch (Exception ex) {      println("no");      result[0] = false;            if (m_Debug) {        println("\n=== Full Report ===");        print("Problem during");        if (built) {          print(" testing");        } else {          print(" training");        }        println(": " + ex.getMessage() + "\n");        println("Here are the datasets:\n");        println("=== Train Dataset ===\n"            + train.toString() + "\n");        println("=== Test Dataset ===\n"            + test.toString() + "\n\n");      }    }        return result;  }    /**   * Runs a text on the datasets with the given characteristics.   *    * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param classType the class type (NUMERIC, NOMINAL, etc.)   * @param missingLevel the percentage of missing values   * @param predictorMissing true if the missing values may be in    * the predictors   * @param classMissing true if the missing values may be in the class   * @param numTrain the number of instances in the training set   * @param numTest the number of instaces in the test set   * @param numClasses the number of classes   * @param accepts the acceptable string in an exception   * @return index 0 is true if the test was passed, index 1 is true if test    *         was acceptable   */  protected boolean[] runBasicTest(boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,      boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType,      int missingLevel,      boolean predictorMissing,      boolean classMissing,      int numTrain,      int numTest,      int numClasses,      FastVector accepts) {        return runBasicTest(		nominalPredictor, 		numericPredictor,		stringPredictor,		datePredictor,		relationalPredictor,		multiInstance,		classType, 		TestInstances.CLASS_IS_LAST,		missingLevel,		predictorMissing,		classMissing,		numTrain,		numTest,		numClasses,		accepts);  }    /**   * Runs a text on the datasets with the given characteristics.   *    * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param classType the class type (NUMERIC, NOMINAL, etc.)   * @param classIndex the attribute index of the class   * @param missingLevel the percentage of missing values   * @param predictorMissing true if the missing values may be in    * the predictors   * @param classMissing true if the missing values may be in the class   * @param numTrain the number of instances in the training set   * @param numTest the number of instaces in the test set   * @param numClasses the number of classes   * @param accepts the acceptable string in an exception   * @return index 0 is true if the test was passed, index 1 is true if test    *         was acceptable   */  protected boolean[] runBasicTest(boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,      boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType,      int classIndex,      int missingLevel,      boolean predictorMissing,      boolean classMissing,      int numTrain,      int numTest,      int numClasses,      FastVector accepts) {        boolean[] result = new boolean[2];    Instances train = null;    Instances test = null;    Classifier classifier = null;    Evaluation evaluation = null;    boolean built = false;    try {      train = makeTestDataset(42, numTrain,                               nominalPredictor     ? 2 : 0,                              numericPredictor     ? 1 : 0,                               stringPredictor      ? 1 : 0,                              datePredictor        ? 1 : 0,                              relationalPredictor  ? 1 : 0,                              numClasses,                               classType,                              classIndex,                              multiInstance);      test = makeTestDataset(24, numTest,                             nominalPredictor     ? 2 : 0,                             numericPredictor     ? 1 : 0,                              stringPredictor      ? 1 : 0,                             datePredictor        ? 1 : 0,                             relationalPredictor  ? 1 : 0,                             numClasses,                              classType,                             classIndex,                             multiInstance);      if (missingLevel > 0) {        addMissing(train, missingLevel, predictorMissing, classMissing);        addMissing(test, Math.min(missingLevel, 50), predictorMissing,             classMissing);      }      classifier = Classifier.makeCopies(getClassifier(), 1)[0];      evaluation = new Evaluation(train);    } catch (Exception ex) {      ex.printStackTrace();      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {      classifier.buildClassifier(train);      built = true;      if (!testWRTZeroR(classifier, evaluation, train, test)[0]) {        result[0] = true;        result[1] = true;        throw new Exception("Scheme performs worse than ZeroR");      }            println("yes");      result[0] = true;    }     catch (Exception ex) {      boolean acceptable = false;      String msg;      if (ex.getMessage() == null)	msg = "";      else        msg = ex.getMessage().toLowerCase();      if (msg.indexOf("not in classpath") > -1)	m_ClasspathProblems = true;      if (msg.indexOf("worse than zeror") >= 0) {        println("warning: performs worse than ZeroR");        result[0] = true;        result[1] = true;      } else {        for (int i = 0; i < accepts.size(); i++) {
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -