📄 checkclusterer.java

📁 数据挖掘中聚类的算法
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
      for (int i = 0; i < train.numInstances(); i++) {        train.instance(i).setWeight(0);      }      Random random = new Random(1);      for (int i = 0; i < train.numInstances() / 2; i++) {        int inst = Math.abs(random.nextInt()) % train.numInstances();        int weight = Math.abs(random.nextInt()) % 10 + 1;        train.instance(inst).setWeight(weight);      }      clusterers[1].buildClusterer(train);      built = true;      evaluationI.setClusterer(clusterers[1]);      if (evaluationB.equals(evaluationI)) {        //	println("no");        evalFail = true;        throw new Exception("evalFail");      }            println("yes");      result[0] = true;    } catch (Exception ex) {      println("no");      result[0] = false;            if (m_Debug) {        println("\n=== Full Report ===");                if (evalFail) {          println("Results don't differ between non-weighted and "              + "weighted instance models.");          println("Here are the results:\n");          println("\nboth methods\n");          println(evaluationB.clusterResultsToString());        } else {          print("Problem during");          if (built) {            print(" testing");          } else {            print(" training");          }          println(": " + ex.getMessage() + "\n");        }        println("Here is the dataset:\n");        println("=== Train Dataset ===\n"            + train.toString() + "\n");        println("=== Train Weights ===\n");        for (int i = 0; i < train.numInstances(); i++) {          println(" " + (i + 1)               + "    " + train.instance(i).weight());        }      }    }        return result;  }    /**   * Checks whether the scheme alters the training dataset during   * training. If the scheme needs to modify the training   * data it should take a copy of the training data. Currently checks   * for changes to header structure, number of instances, order of   * instances, instance weights.   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param predictorMissing true if we know the clusterer can handle   * (at least) moderate missing predictor values   * @return index 0 is true if the test was passed   */  protected boolean[] datasetIntegrity(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      boolean predictorMissing) {        print("clusterer doesn't alter original datasets");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance);    print("...");    int numTrain = getNumInstances(), missingLevel = 20;        boolean[] result = new boolean[2];    Instances train = null;    Clusterer clusterer = null;    try {      train = makeTestDataset(42, numTrain,                               nominalPredictor    ? getNumNominal()    : 0,                              numericPredictor    ? getNumNumeric()    : 0,                               stringPredictor     ? getNumString()     : 0,                               datePredictor       ? getNumDate()       : 0,                               relationalPredictor ? getNumRelational() : 0,                               multiInstance);      if (nominalPredictor && !multiInstance)        train.deleteAttributeAt(0);      if (missingLevel > 0)        addMissing(train, missingLevel, predictorMissing);      clusterer = Clusterer.makeCopies(getClusterer(), 1)[0];    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {      Instances trainCopy = new Instances(train);      clusterer.buildClusterer(trainCopy);      compareDatasets(train, trainCopy);            println("yes");      result[0] = true;    } catch (Exception ex) {      println("no");      result[0] = false;            if (m_Debug) {        println("\n=== Full Report ===");        print("Problem during training");        println(": " + ex.getMessage() + "\n");        println("Here is the dataset:\n");        println("=== Train Dataset ===\n"            + train.toString() + "\n");      }    }        return result;  }    /**   * Checks whether an updateable scheme produces the same model when   * trained incrementally as when batch trained. The model itself   * cannot be compared, so we compare the evaluation on test data   * for both models. It is possible to get a false positive on this   * test (likelihood depends on the classifier).   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @return index 0 is true if the test was passed   */  protected boolean[] updatingEquality(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance) {        print("incremental training produces the same results"        + " as batch training");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance);    print("...");    int numTrain = getNumInstances(), missingLevel = 0;    boolean predictorMissing = false, classMissing = false;        boolean[] result = new boolean[2];    Instances train = null;    Clusterer[] clusterers = null;    ClusterEvaluation evaluationB = null;    ClusterEvaluation evaluationI = null;    boolean built = false;    try {      train = makeTestDataset(42, numTrain,                               nominalPredictor    ? getNumNominal()    : 0,                              numericPredictor    ? getNumNumeric()    : 0,                               stringPredictor     ? getNumString()     : 0,                               datePredictor       ? getNumDate()       : 0,                               relationalPredictor ? getNumRelational() : 0,                               multiInstance);      if (missingLevel > 0)        addMissing(train, missingLevel, predictorMissing, classMissing);      clusterers = Clusterer.makeCopies(getClusterer(), 2);      evaluationB = new ClusterEvaluation();      evaluationI = new ClusterEvaluation();      clusterers[0].buildClusterer(train);      evaluationB.setClusterer(clusterers[0]);    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {      clusterers[1].buildClusterer(new Instances(train, 0));      for (int i = 0; i < train.numInstances(); i++) {        ((UpdateableClusterer)clusterers[1]).updateClusterer(            train.instance(i));      }      built = true;      evaluationI.setClusterer(clusterers[1]);      if (!evaluationB.equals(evaluationI)) {        println("no");        result[0] = false;                if (m_Debug) {          println("\n=== Full Report ===");          println("Results differ between batch and "              + "incrementally built models.\n"              + "Depending on the classifier, this may be OK");          println("Here are the results:\n");          println("\nbatch built results\n" + evaluationB.clusterResultsToString());          println("\nincrementally built results\n" + evaluationI.clusterResultsToString());          println("Here are the datasets:\n");          println("=== Train Dataset ===\n"              + train.toString() + "\n");        }      }      else {        println("yes");        result[0] = true;      }    } catch (Exception ex) {      result[0] = false;            print("Problem during");      if (built)        print(" testing");      else        print(" training");      println(": " + ex.getMessage() + "\n");    }        return result;  }    /**   * Runs a text on the datasets with the given characteristics.   *    * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param missingLevel the percentage of missing values   * @param predictorMissing true if the missing values may be in    * the predictors   * @param numTrain the number of instances in the training set   * @param accepts the acceptable string in an exception   * @return index 0 is true if the test was passed, index 1 is true if test    *         was acceptable   */  protected boolean[] runBasicTest(boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,      boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int missingLevel,      boolean predictorMissing,      int numTrain,      FastVector accepts) {        boolean[] result = new boolean[2];    Instances train = null;    Clusterer clusterer = null;    try {      train = makeTestDataset(42, numTrain,                               nominalPredictor    ? getNumNominal()    : 0,                              numericPredictor    ? getNumNumeric()    : 0,                               stringPredictor     ? getNumString()     : 0,                              datePredictor       ? getNumDate()       : 0,                              relationalPredictor ? getNumRelational() : 0,                              multiInstance);      if (nominalPredictor && !multiInstance)        train.deleteAttributeAt(0);      if (missingLevel > 0)        addMissing(train, missingLevel, predictorMissing);      clusterer = Clusterer.makeCopies(getClusterer(), 1)[0];    } catch (Exception ex) {      ex.printStackTrace();      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {      clusterer.buildClusterer(train);      println("yes");      result[0] = true;    }     catch (Exception ex) {      boolean acceptable = false;      String msg = ex.getMessage().toLowerCase();      for (int i = 0; i < accepts.size(); i++) {        if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {          acceptable = true;        }      }            println("no" + (acceptable ? " (OK error message)" : ""));      result[1] = acceptable;            if (m_Debug) {        println("\n=== Full Report ===");        print("Problem during training");        println(": " + ex.getMessage() + "\n");        if (!acceptable) {          if (accepts.size() > 0) {            print("Error message doesn't mention ");            for (int i = 0; i < accepts.size(); i++) {              if (i != 0) {                print(" or ");              }              print('"' + (String)accepts.elementAt(i) + '"');            }          }          println("here is the dataset:\n");          println("=== Train Dataset ===\n"              + train.toString() + "\n");        }      }    }        return result;  }    /**   * Add missing values to a dataset.   *   * @param data the instances to add missing values to   * @param level the level of missing values to add (if positive, this   * is the probability that a value will be set to missing, if negative   * all but one value will be set to missing (not yet implemented))   * @param predictorMissing if true, predictor attributes will be modified   */  protected void addMissing(Instances data, int level, boolean predictorMissing) {        Random random = new Random(1);    for (int i = 0; i < data.numInstances(); i++) {      Instance current = data.instance(i);      for (int j = 0; j < data.numAttributes(); j++) {        if (predictorMissing) {          if (Math.abs(random.nextInt()) % 100 < level)            current.setMissing(j);        }      }    }  }    /**   * Make a simple set of instances with variable position of the class    * attribute, which can later be modified for use in specific tests.   *   * @param seed the random number seed   * @param numInstances the number of instances to generate   * @param numNominal the number of nominal attributes   * @param numNumeric the number of numeric attributes   * @param numString the number of string attributes   * @param numDate the number of date attributes   * @param numRelational the number of relational attributes   * @param multiInstance whether the dataset should a multi-instance dataset   * @return the test dataset   * @throws Exception if the dataset couldn't be generated   * @see TestInstances#CLASS_IS_LAST   */  protected Instances makeTestDataset(int seed, int numInstances,                                       int numNominal, int numNumeric,                                       int numString, int numDate,                                      int numRelational,                                      boolean multiInstance)  throws Exception {        TestInstances dataset = new TestInstances();        dataset.setSeed(seed);    dataset.setNumInstances(numInstances);    dataset.setNumNominal(numNominal);    dataset.setNumNumeric(numNumeric);    dataset.setNumString(numString);    dataset.setNumDate(numDate);    dataset.setNumRelational(numRelational);    dataset.setClassIndex(TestInstances.NO_CLASS);    dataset.setMultiInstance(multiInstance);        return dataset.generate();  }    /**   * Print out a short summary string for the dataset characteristics   *   * @param nominalPredictor true if nominal predictor attributes are present   * @param numericPredictor true if numeric predictor attributes are present   * @param stringPredictor true if string predictor attributes are present   * @param datePredictor true if date predictor attributes are present   * @param relationalPredictor true if relational predictor attributes are present   * @param multiInstance whether multi-instance is needed   */  protected void printAttributeSummary(boolean nominalPredictor,                                        boolean numericPredictor,                                        boolean stringPredictor,                                        boolean datePredictor,                                        boolean relationalPredictor,                                        boolean multiInstance) {        String str = "";    if (numericPredictor)      str += "numeric";        if (nominalPredictor) {      if (str.length() > 0)        str += " & ";      str += "nominal";    }        if (stringPredictor) {      if (str.length() > 0)        str += " & ";      str += "string";    }        if (datePredictor) {      if (str.length() > 0)        str += " & ";      str += "date";    }        if (relationalPredictor) {      if (str.length() > 0)        str += " & ";      str += "relational";    }        str = " (" + str + " predictors)";        print(str);  }    /**   * Test method for this class   *    * @param args the commandline options   */  public static void main(String [] args) {    runCheck(new CheckClusterer(), args);  }}
上一页 1 23
💿 文件大小 124 K
👤 上传用户 wuseyue
📂 所属分类数学计算
🏷️ 相关标签

#数据挖掘 #聚类 #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -