checkattributeselection.java

来自「这是关于数据挖掘的一些算法」· Java 代码 · 共 1,632 行 · 第 1/4 页
JAVA
1,632 行
   * @return index 0 is true if the test was passed, index 1 is true if test    *         was acceptable   * @see TestInstances#CLASS_IS_LAST   */  protected boolean[] canHandleClassAsNthAttribute(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType,      int classIndex) {        if (classIndex == TestInstances.CLASS_IS_LAST)      print("class attribute as last attribute");    else      print("class attribute as " + (classIndex + 1) + ". attribute");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);    print("...");    FastVector accepts = new FastVector();    int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;    boolean predictorMissing = false, classMissing = false;        return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,                         datePredictor, relationalPredictor,                         multiInstance,                        classType,                        classIndex,                        missingLevel, predictorMissing, classMissing,                        numTrain, numClasses,                         accepts);  }    /**   * Checks whether the scheme can handle zero training instances.   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param classType the class type (NUMERIC, NOMINAL, etc.)   * @return index 0 is true if the test was passed, index 1 is true if test    *         was acceptable   */  protected boolean[] canHandleZeroTraining(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType) {        print("handle zero training instances");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);    print("...");    FastVector accepts = new FastVector();    accepts.addElement("train");    accepts.addElement("value");    int numTrain = 0, numClasses = 2, missingLevel = 0;    boolean predictorMissing = false, classMissing = false;        return runBasicTest(              nominalPredictor, numericPredictor, stringPredictor,               datePredictor, relationalPredictor,               multiInstance,              classType,               missingLevel, predictorMissing, classMissing,              numTrain, numClasses,               accepts);  }    /**   * Checks whether the scheme correctly initialises models when    * ASSearch.search is called. This test calls search with   * one training dataset. ASSearch is then called on a training set with    * different structure, and then again with the original training set.    * If the equals method of the ASEvaluation class returns false, this is    * noted as incorrect search initialisation.   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param classType the class type (NUMERIC, NOMINAL, etc.)   * @return index 0 is true if the test was passed, index 1 is always false   */  protected boolean[] correctSearchInitialisation(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType) {    boolean[] result = new boolean[2];        print("correct initialisation during search");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);    print("...");    int numTrain = getNumInstances(),     numClasses = 2, missingLevel = 0;    boolean predictorMissing = false, classMissing = false;        Instances train1 = null;    Instances train2 = null;    ASSearch search = null;    ASEvaluation evaluation1A = null;    ASEvaluation evaluation1B = null;    ASEvaluation evaluation2 = null;    AttributeSelection attsel1A = null;    AttributeSelection attsel1B = null;    int stage = 0;    try {            // Make two train sets with different numbers of attributes      train1 = makeTestDataset(42, numTrain,                                nominalPredictor    ? getNumNominal()    : 0,                               numericPredictor    ? getNumNumeric()    : 0,                                stringPredictor     ? getNumString()     : 0,                                datePredictor       ? getNumDate()       : 0,                                relationalPredictor ? getNumRelational() : 0,                                numClasses,                                classType,                               multiInstance);      train2 = makeTestDataset(84, numTrain,                                nominalPredictor    ? getNumNominal() + 1 : 0,                               numericPredictor    ? getNumNumeric() + 1 : 0,                                stringPredictor     ? getNumString()      : 0,                                datePredictor       ? getNumDate()        : 0,                                relationalPredictor ? getNumRelational()  : 0,                                numClasses,                                classType,                               multiInstance);      if (missingLevel > 0) {        addMissing(train1, missingLevel, predictorMissing, classMissing);        addMissing(train2, missingLevel, predictorMissing, classMissing);      }            search = ASSearch.makeCopies(getSearch(), 1)[0];      evaluation1A = ASEvaluation.makeCopies(getEvaluator(), 1)[0];      evaluation1B = ASEvaluation.makeCopies(getEvaluator(), 1)[0];      evaluation2 = ASEvaluation.makeCopies(getEvaluator(), 1)[0];    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {      stage = 0;      attsel1A = search(search, evaluation1A, train1);            stage = 1;      search(search, evaluation2, train2);            stage = 2;      attsel1B = search(search, evaluation1B, train1);            stage = 3;      if (!attsel1A.toResultsString().equals(attsel1B.toResultsString())) {        if (m_Debug) {          println(              "\n=== Full report ===\n"              + "\nFirst search\n"              + evaluation1A.toString()              + "\n\n");          println(              "\nSecond search\n"              + evaluation1B.toString()              + "\n\n");        }        throw new Exception("Results differ between search calls");      }      println("yes");      result[0] = true;            if (false && m_Debug) {        println(            "\n=== Full report ===\n"            + "\nFirst search\n"            + evaluation1A.toString()            + "\n\n");        println(            "\nSecond search\n"            + evaluation1B.toString()            + "\n\n");      }    }     catch (Exception ex) {      println("no");      result[0] = false;      if (m_Debug) {        println("\n=== Full Report ===");        print("Problem during  training");        switch (stage) {          case 0:            print(" of dataset 1");            break;          case 1:            print(" of dataset 2");            break;          case 2:            print(" of dataset 1 (2nd build)");            break;          case 3:            print(", comparing results from builds of dataset 1");            break;	          }        println(": " + ex.getMessage() + "\n");        println("here are the datasets:\n");        println("=== Train1 Dataset ===\n"            + train1.toString() + "\n");        println("=== Train2 Dataset ===\n"            + train2.toString() + "\n");      }    }        return result;  }    /**   * Checks basic missing value handling of the scheme. If the missing   * values cause an exception to be thrown by the scheme, this will be   * recorded.   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param classType the class type (NUMERIC, NOMINAL, etc.)   * @param predictorMissing true if the missing values may be in    * the predictors   * @param classMissing true if the missing values may be in the class   * @param missingLevel the percentage of missing values   * @return index 0 is true if the test was passed, index 1 is true if test    *         was acceptable   */  protected boolean[] canHandleMissing(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType,      boolean predictorMissing,      boolean classMissing,      int missingLevel) {        if (missingLevel == 100)      print("100% ");    print("missing");    if (predictorMissing) {      print(" predictor");      if (classMissing)        print(" and");    }    if (classMissing)      print(" class");    print(" values");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);    print("...");    FastVector accepts = new FastVector();    accepts.addElement("missing");    accepts.addElement("value");    accepts.addElement("train");    accepts.addElement("no attributes");    int numTrain = getNumInstances(), numClasses = 2;        return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,         datePredictor, relationalPredictor,         multiInstance,        classType,         missingLevel, predictorMissing, classMissing,        numTrain, numClasses,         accepts);  }    /**   * Checks whether the scheme can handle instance weights.   * This test compares the scheme performance on two datasets   * that are identical except for the training weights. If the    * results change, then the scheme must be using the weights. It   * may be possible to get a false positive from this test if the    * weight changes aren't significant enough to induce a change   * in scheme performance (but the weights are chosen to minimize   * the likelihood of this).   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param classType the class type (NUMERIC, NOMINAL, etc.)   * @return index 0 true if the test was passed   */  protected boolean[] instanceWeights(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int classType) {        print("scheme uses instance weights");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);    print("...");    int numTrain = 2*getNumInstances(),     numClasses = 2, missingLevel = 0;    boolean predictorMissing = false, classMissing = false;        boolean[] result = new boolean[2];    Instances train = null;    ASSearch[] search = null;    ASEvaluation evaluationB = null;    ASEvaluation evaluationI = null;    AttributeSelection attselB = null;    AttributeSelection attselI = null;    boolean evalFail = false;    try {      train = makeTestDataset(42, numTrain,                               nominalPredictor    ? getNumNominal() + 1 : 0,                              numericPredictor    ? getNumNumeric() + 1 : 0,                               stringPredictor     ? getNumString()      : 0,                               datePredictor       ? getNumDate()        : 0,                               relationalPredictor ? getNumRelational()  : 0,                               numClasses,                               classType,                              multiInstance);      if (missingLevel > 0)        addMissing(train, missingLevel, predictorMissing, classMissing);      search = ASSearch.makeCopies(getSearch(), 2);      evaluationB = ASEvaluation.makeCopies(getEvaluator(), 1)[0];      evaluationI = ASEvaluation.makeCopies(getEvaluator(), 1)[0];      attselB = search(search[0], evaluationB, train);    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {            // Now modify instance weights and re-built/test      for (int i = 0; i < train.numInstances(); i++) {        train.instance(i).setWeight(0);      }      Random random = new Random(1);      for (int i = 0; i < train.numInstances() / 2; i++) {        int inst = Math.abs(random.nextInt()) % train.numInstances();        int weight = Math.abs(random.nextInt()) % 10 + 1;        train.instance(inst).setWeight(weight);      }      attselI = search(search[1], evaluationI, train);      if (attselB.toResultsString().equals(attselI.toResultsString())) {        //	println("no");        evalFail = true;        throw new Exception("evalFail");      }            println("yes");      result[0] = true;    } catch (Exception ex) {      println("no");      result[0] = false;            if (m_Debug) {        println("\n=== Full Report ===");                if (evalFail) {          println("Results don't differ between non-weighted and "              + "weighted instance models.");          println("Here are the results:\n");          println("\nboth methods\n");          println(evaluationB.toString());        } else {          print("Problem during training");          println(": " + ex.getMessage() + "\n");        }        println("Here is the dataset:\n");        println("=== Train Dataset ===\n"            + train.toString() + "\n");        println("=== Train Weights ===\n");        for (int i = 0; i < train.numInstances(); i++) {          println(" " + (i + 1)               + "    " + train.instance(i).weight());        }      }    }        return result;  }    /**   * Checks whether the scheme alters the training dataset during   * training. If the scheme needs to modify the training   * data it should take a copy of the training data. Currently checks   * for changes to header structure, number of instances, order of
checkattributeselection.java - 源码说明

本页面展示了「这是关于数据挖掘的一些算法」中的 checkattributeselection.java 源码文件，采用 Java 编程语言编写，共 1,632 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与数据挖掘相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?