📄 checkclusterer.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
      evaluationI = new ClusterEvaluation();      clusterers[0].buildClusterer(train);    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {            // Now modify instance weights and re-built/test      for (int i = 0; i < train.numInstances(); i++) {        train.instance(i).setWeight(0);      }      Random random = new Random(1);      for (int i = 0; i < train.numInstances() / 2; i++) {        int inst = Math.abs(random.nextInt()) % train.numInstances();        int weight = Math.abs(random.nextInt()) % 10 + 1;        train.instance(inst).setWeight(weight);      }      clusterers[1].buildClusterer(train);      built = true;      if (evaluationB.equals(evaluationI)) {        //	println("no");        evalFail = true;        throw new Exception("evalFail");      }            println("yes");      result[0] = true;    } catch (Exception ex) {      println("no");      result[0] = false;            if (m_Debug) {        println("\n=== Full Report ===");                if (evalFail) {          println("Results don't differ between non-weighted and "              + "weighted instance models.");          println("Here are the results:\n");          println("\nboth methods\n");          println(evaluationB.clusterResultsToString());        } else {          print("Problem during");          if (built) {            print(" testing");          } else {            print(" training");          }          println(": " + ex.getMessage() + "\n");        }        println("Here are the datasets:\n");        println("=== Train Dataset ===\n"            + train.toString() + "\n");        println("=== Train Weights ===\n");        for (int i = 0; i < train.numInstances(); i++) {          println(" " + (i + 1)               + "    " + train.instance(i).weight());        }        println("=== Test Dataset ===\n"            + test.toString() + "\n\n");	        println("(test weights all 1.0\n");      }    }        return result;  }    /**   * Checks whether the scheme alters the training dataset during   * training. If the scheme needs to modify the training   * data it should take a copy of the training data. Currently checks   * for changes to header structure, number of instances, order of   * instances, instance weights.   *   * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param predictorMissing true if we know the clusterer can handle   * (at least) moderate missing predictor values   * @return index 0 is true if the test was passed   */  protected boolean[] datasetIntegrity(      boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,       boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      boolean predictorMissing) {        print("clusterer doesn't alter original datasets");    printAttributeSummary(        nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance);    print("...");    int numTrain = getNumInstances(), numTest = getNumInstances(), missingLevel = 20;        boolean[] result = new boolean[2];    Instances train = null;    Instances test = null;    Clusterer clusterer = null;    boolean built = false;    try {      train = makeTestDataset(42, numTrain,                               nominalPredictor ? 2 : 0,                              numericPredictor ? 1 : 0,                               stringPredictor ? 1 : 0,                               datePredictor ? 1 : 0,                               relationalPredictor ? 1 : 0,                               multiInstance);      test = makeTestDataset(24, numTest,                             nominalPredictor ? 2 : 0,                             numericPredictor ? 1 : 0,                              stringPredictor ? 1 : 0,                              datePredictor ? 1 : 0,                              relationalPredictor ? 1 : 0,                              multiInstance);      if (nominalPredictor && !multiInstance) {        train.deleteAttributeAt(0);        test.deleteAttributeAt(0);      }      if (missingLevel > 0) {        addMissing(train, missingLevel, predictorMissing);        addMissing(test, Math.min(missingLevel, 50), predictorMissing);      }      clusterer = Clusterer.makeCopies(getClusterer(), 1)[0];    } catch (Exception ex) {      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {      Instances trainCopy = new Instances(train);      Instances testCopy = new Instances(test);      clusterer.buildClusterer(trainCopy);      compareDatasets(train, trainCopy);      built = true;      compareDatasets(test, testCopy);            println("yes");      result[0] = true;    } catch (Exception ex) {      println("no");      result[0] = false;            if (m_Debug) {        println("\n=== Full Report ===");        print("Problem during");        if (built) {          print(" testing");        } else {          print(" training");        }        println(": " + ex.getMessage() + "\n");        println("Here are the datasets:\n");        println("=== Train Dataset ===\n"            + train.toString() + "\n");        println("=== Test Dataset ===\n"            + test.toString() + "\n\n");      }    }        return result;  }    /**   * Runs a text on the datasets with the given characteristics.   *    * @param nominalPredictor if true use nominal predictor attributes   * @param numericPredictor if true use numeric predictor attributes   * @param stringPredictor if true use string predictor attributes   * @param datePredictor if true use date predictor attributes   * @param relationalPredictor if true use relational predictor attributes   * @param multiInstance whether multi-instance is needed   * @param missingLevel the percentage of missing values   * @param predictorMissing true if the missing values may be in    * the predictors   * @param numTrain the number of instances in the training set   * @param numTest the number of instaces in the test set   * @param accepts the acceptable string in an exception   * @return index 0 is true if the test was passed, index 1 is true if test    *         was acceptable   */  protected boolean[] runBasicTest(boolean nominalPredictor,      boolean numericPredictor,       boolean stringPredictor,      boolean datePredictor,      boolean relationalPredictor,      boolean multiInstance,      int missingLevel,      boolean predictorMissing,      int numTrain,      int numTest,      FastVector accepts) {        boolean[] result = new boolean[2];    Instances train = null;    Instances test = null;    Clusterer clusterer = null;    boolean built = false;    try {      train = makeTestDataset(42, numTrain,                               nominalPredictor     ? 2 : 0,                              numericPredictor     ? 1 : 0,                               stringPredictor      ? 1 : 0,                              datePredictor        ? 1 : 0,                              relationalPredictor  ? 1 : 0,                              multiInstance);      test = makeTestDataset(24, numTest,                             nominalPredictor     ? 2 : 0,                             numericPredictor     ? 1 : 0,                              stringPredictor      ? 1 : 0,                             datePredictor        ? 1 : 0,                             relationalPredictor  ? 1 : 0,                             multiInstance);      if (nominalPredictor && !multiInstance) {        train.deleteAttributeAt(0);        test.deleteAttributeAt(0);      }      if (missingLevel > 0) {        addMissing(train, missingLevel, predictorMissing);        addMissing(test, Math.min(missingLevel, 50), predictorMissing);      }      clusterer = Clusterer.makeCopies(getClusterer(), 1)[0];    } catch (Exception ex) {      ex.printStackTrace();      throw new Error("Error setting up for tests: " + ex.getMessage());    }    try {      clusterer.buildClusterer(train);      built = true;            println("yes");      result[0] = true;    }     catch (Exception ex) {      boolean acceptable = false;      String msg = ex.getMessage().toLowerCase();      for (int i = 0; i < accepts.size(); i++) {        if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {          acceptable = true;        }      }            println("no" + (acceptable ? " (OK error message)" : ""));      result[1] = acceptable;            if (m_Debug) {        println("\n=== Full Report ===");        print("Problem during");        if (built) {          print(" testing");        } else {          print(" training");        }        println(": " + ex.getMessage() + "\n");        if (!acceptable) {          if (accepts.size() > 0) {            print("Error message doesn't mention ");            for (int i = 0; i < accepts.size(); i++) {              if (i != 0) {                print(" or ");              }              print('"' + (String)accepts.elementAt(i) + '"');            }          }          println("here are the datasets:\n");          println("=== Train Dataset ===\n"              + train.toString() + "\n");          println("=== Test Dataset ===\n"              + test.toString() + "\n\n");        }      }    }        return result;  }    /**   * Compare two datasets to see if they differ.   *   * @param data1 one set of instances   * @param data2 the other set of instances   * @throws Exception if the datasets differ   */  protected void compareDatasets(Instances data1, Instances data2)  throws Exception {    if (!data2.equalHeaders(data1)) {      throw new Exception("header has been modified");    }    if (!(data2.numInstances() == data1.numInstances())) {      throw new Exception("number of instances has changed");    }    for (int i = 0; i < data2.numInstances(); i++) {      Instance orig = data1.instance(i);      Instance copy = data2.instance(i);      for (int j = 0; j < orig.numAttributes(); j++) {        if (orig.isMissing(j)) {          if (!copy.isMissing(j)) {            throw new Exception("instances have changed");          }        } else if (orig.value(j) != copy.value(j)) {          throw new Exception("instances have changed");        }        if (orig.weight() != copy.weight()) {          throw new Exception("instance weights have changed");        }	        }    }  }    /**   * Add missing values to a dataset.   *   * @param data the instances to add missing values to   * @param level the level of missing values to add (if positive, this   * is the probability that a value will be set to missing, if negative   * all but one value will be set to missing (not yet implemented))   * @param predictorMissing if true, predictor attributes will be modified   */  protected void addMissing(Instances data, int level, boolean predictorMissing) {        Random random = new Random(1);    for (int i = 0; i < data.numInstances(); i++) {      Instance current = data.instance(i);      for (int j = 0; j < data.numAttributes(); j++) {        if (predictorMissing) {          if (Math.abs(random.nextInt()) % 100 < level)            current.setMissing(j);        }      }    }  }    /**   * Make a simple set of instances with variable position of the class    * attribute, which can later be modified for use in specific tests.   *   * @param seed the random number seed   * @param numInstances the number of instances to generate   * @param numNominal the number of nominal attributes   * @param numNumeric the number of numeric attributes   * @param numString the number of string attributes   * @param numDate the number of date attributes   * @param numRelational the number of relational attributes   * @param multiInstance whether the dataset should a multi-instance dataset   * @return the test dataset   * @throws Exception if the dataset couldn't be generated   * @see TestInstances#CLASS_IS_LAST   */  protected Instances makeTestDataset(int seed, int numInstances,                                       int numNominal, int numNumeric,                                       int numString, int numDate,                                      int numRelational,                                      boolean multiInstance)  throws Exception {        TestInstances dataset = new TestInstances();        dataset.setSeed(seed);    dataset.setNumInstances(numInstances);    dataset.setNumNominal(numNominal);    dataset.setNumNumeric(numNumeric);    dataset.setNumString(numString);    dataset.setNumDate(numDate);    dataset.setNumRelational(numRelational);    dataset.setClassIndex(TestInstances.NO_CLASS);    dataset.setMultiInstance(multiInstance);        return dataset.generate();  }    /**   * Print out a short summary string for the dataset characteristics   *   * @param nominalPredictor true if nominal predictor attributes are present   * @param numericPredictor true if numeric predictor attributes are present   * @param stringPredictor true if string predictor attributes are present   * @param datePredictor true if date predictor attributes are present   * @param relationalPredictor true if relational predictor attributes are present   * @param multiInstance whether multi-instance is needed   */  protected void printAttributeSummary(boolean nominalPredictor,                                        boolean numericPredictor,                                        boolean stringPredictor,                                        boolean datePredictor,                                        boolean relationalPredictor,                                        boolean multiInstance) {        String str = "";    if (numericPredictor)      str += "numeric";        if (nominalPredictor) {      if (str.length() > 0)        str += " & ";      str += "nominal";    }        if (stringPredictor) {      if (str.length() > 0)        str += " & ";      str += "string";    }        if (datePredictor) {      if (str.length() > 0)        str += " & ";      str += "date";    }        if (relationalPredictor) {      if (str.length() > 0)        str += " & ";      str += "relational";    }        str = " (" + str + " predictors)";        print(str);  }    /**   * Test method for this class   *    * @param args the commandline options   */  public static void main(String [] args) {    try {      CheckClusterer check = new CheckClusterer();            try {        check.setOptions(args);        Utils.checkForRemainingOptions(args);      } catch (Exception ex) {        String result = ex.getMessage() + "\n\n" + check.getClass().getName().replaceAll(".*\\.", "") + " Options:\n\n";        Enumeration enu = check.listOptions();        while (enu.hasMoreElements()) {          Option option = (Option) enu.nextElement();          result += option.synopsis() + "\n" + option.description() + "\n";        }        throw new Exception(result);      }            check.doTests();    } catch (Exception ex) {      System.err.println(ex.getMessage());    }  }}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -