📄 checkclusterer.java
字号:
evaluationI = new ClusterEvaluation(); clusterers[0].buildClusterer(train); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { // Now modify instance weights and re-built/test for (int i = 0; i < train.numInstances(); i++) { train.instance(i).setWeight(0); } Random random = new Random(1); for (int i = 0; i < train.numInstances() / 2; i++) { int inst = Math.abs(random.nextInt()) % train.numInstances(); int weight = Math.abs(random.nextInt()) % 10 + 1; train.instance(inst).setWeight(weight); } clusterers[1].buildClusterer(train); built = true; if (evaluationB.equals(evaluationI)) { // println("no"); evalFail = true; throw new Exception("evalFail"); } println("yes"); result[0] = true; } catch (Exception ex) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); if (evalFail) { println("Results don't differ between non-weighted and " + "weighted instance models."); println("Here are the results:\n"); println("\nboth methods\n"); println(evaluationB.clusterResultsToString()); } else { print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); } println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Train Weights ===\n"); for (int i = 0; i < train.numInstances(); i++) { println(" " + (i + 1) + " " + train.instance(i).weight()); } println("=== Test Dataset ===\n" + test.toString() + "\n\n"); println("(test weights all 1.0\n"); } } return result; } /** * Checks whether the scheme alters the training dataset during * training. If the scheme needs to modify the training * data it should take a copy of the training data. Currently checks * for changes to header structure, number of instances, order of * instances, instance weights. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param predictorMissing true if we know the clusterer can handle * (at least) moderate missing predictor values * @return index 0 is true if the test was passed */ protected boolean[] datasetIntegrity( boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, boolean predictorMissing) { print("clusterer doesn't alter original datasets"); printAttributeSummary( nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance); print("..."); int numTrain = getNumInstances(), numTest = getNumInstances(), missingLevel = 20; boolean[] result = new boolean[2]; Instances train = null; Instances test = null; Clusterer clusterer = null; boolean built = false; try { train = makeTestDataset(42, numTrain, nominalPredictor ? 2 : 0, numericPredictor ? 1 : 0, stringPredictor ? 1 : 0, datePredictor ? 1 : 0, relationalPredictor ? 1 : 0, multiInstance); test = makeTestDataset(24, numTest, nominalPredictor ? 2 : 0, numericPredictor ? 1 : 0, stringPredictor ? 1 : 0, datePredictor ? 1 : 0, relationalPredictor ? 1 : 0, multiInstance); if (nominalPredictor && !multiInstance) { train.deleteAttributeAt(0); test.deleteAttributeAt(0); } if (missingLevel > 0) { addMissing(train, missingLevel, predictorMissing); addMissing(test, Math.min(missingLevel, 50), predictorMissing); } clusterer = Clusterer.makeCopies(getClusterer(), 1)[0]; } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { Instances trainCopy = new Instances(train); Instances testCopy = new Instances(test); clusterer.buildClusterer(trainCopy); compareDatasets(train, trainCopy); built = true; compareDatasets(test, testCopy); println("yes"); result[0] = true; } catch (Exception ex) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Test Dataset ===\n" + test.toString() + "\n\n"); } } return result; } /** * Runs a text on the datasets with the given characteristics. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param missingLevel the percentage of missing values * @param predictorMissing true if the missing values may be in * the predictors * @param numTrain the number of instances in the training set * @param numTest the number of instaces in the test set * @param accepts the acceptable string in an exception * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] runBasicTest(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int missingLevel, boolean predictorMissing, int numTrain, int numTest, FastVector accepts) { boolean[] result = new boolean[2]; Instances train = null; Instances test = null; Clusterer clusterer = null; boolean built = false; try { train = makeTestDataset(42, numTrain, nominalPredictor ? 2 : 0, numericPredictor ? 1 : 0, stringPredictor ? 1 : 0, datePredictor ? 1 : 0, relationalPredictor ? 1 : 0, multiInstance); test = makeTestDataset(24, numTest, nominalPredictor ? 2 : 0, numericPredictor ? 1 : 0, stringPredictor ? 1 : 0, datePredictor ? 1 : 0, relationalPredictor ? 1 : 0, multiInstance); if (nominalPredictor && !multiInstance) { train.deleteAttributeAt(0); test.deleteAttributeAt(0); } if (missingLevel > 0) { addMissing(train, missingLevel, predictorMissing); addMissing(test, Math.min(missingLevel, 50), predictorMissing); } clusterer = Clusterer.makeCopies(getClusterer(), 1)[0]; } catch (Exception ex) { ex.printStackTrace(); throw new Error("Error setting up for tests: " + ex.getMessage()); } try { clusterer.buildClusterer(train); built = true; println("yes"); result[0] = true; } catch (Exception ex) { boolean acceptable = false; String msg = ex.getMessage().toLowerCase(); for (int i = 0; i < accepts.size(); i++) { if (msg.indexOf((String)accepts.elementAt(i)) >= 0) { acceptable = true; } } println("no" + (acceptable ? " (OK error message)" : "")); result[1] = acceptable; if (m_Debug) { println("\n=== Full Report ==="); print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); if (!acceptable) { if (accepts.size() > 0) { print("Error message doesn't mention "); for (int i = 0; i < accepts.size(); i++) { if (i != 0) { print(" or "); } print('"' + (String)accepts.elementAt(i) + '"'); } } println("here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Test Dataset ===\n" + test.toString() + "\n\n"); } } } return result; } /** * Compare two datasets to see if they differ. * * @param data1 one set of instances * @param data2 the other set of instances * @throws Exception if the datasets differ */ protected void compareDatasets(Instances data1, Instances data2) throws Exception { if (!data2.equalHeaders(data1)) { throw new Exception("header has been modified"); } if (!(data2.numInstances() == data1.numInstances())) { throw new Exception("number of instances has changed"); } for (int i = 0; i < data2.numInstances(); i++) { Instance orig = data1.instance(i); Instance copy = data2.instance(i); for (int j = 0; j < orig.numAttributes(); j++) { if (orig.isMissing(j)) { if (!copy.isMissing(j)) { throw new Exception("instances have changed"); } } else if (orig.value(j) != copy.value(j)) { throw new Exception("instances have changed"); } if (orig.weight() != copy.weight()) { throw new Exception("instance weights have changed"); } } } } /** * Add missing values to a dataset. * * @param data the instances to add missing values to * @param level the level of missing values to add (if positive, this * is the probability that a value will be set to missing, if negative * all but one value will be set to missing (not yet implemented)) * @param predictorMissing if true, predictor attributes will be modified */ protected void addMissing(Instances data, int level, boolean predictorMissing) { Random random = new Random(1); for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); for (int j = 0; j < data.numAttributes(); j++) { if (predictorMissing) { if (Math.abs(random.nextInt()) % 100 < level) current.setMissing(j); } } } } /** * Make a simple set of instances with variable position of the class * attribute, which can later be modified for use in specific tests. * * @param seed the random number seed * @param numInstances the number of instances to generate * @param numNominal the number of nominal attributes * @param numNumeric the number of numeric attributes * @param numString the number of string attributes * @param numDate the number of date attributes * @param numRelational the number of relational attributes * @param multiInstance whether the dataset should a multi-instance dataset * @return the test dataset * @throws Exception if the dataset couldn't be generated * @see TestInstances#CLASS_IS_LAST */ protected Instances makeTestDataset(int seed, int numInstances, int numNominal, int numNumeric, int numString, int numDate, int numRelational, boolean multiInstance) throws Exception { TestInstances dataset = new TestInstances(); dataset.setSeed(seed); dataset.setNumInstances(numInstances); dataset.setNumNominal(numNominal); dataset.setNumNumeric(numNumeric); dataset.setNumString(numString); dataset.setNumDate(numDate); dataset.setNumRelational(numRelational); dataset.setClassIndex(TestInstances.NO_CLASS); dataset.setMultiInstance(multiInstance); return dataset.generate(); } /** * Print out a short summary string for the dataset characteristics * * @param nominalPredictor true if nominal predictor attributes are present * @param numericPredictor true if numeric predictor attributes are present * @param stringPredictor true if string predictor attributes are present * @param datePredictor true if date predictor attributes are present * @param relationalPredictor true if relational predictor attributes are present * @param multiInstance whether multi-instance is needed */ protected void printAttributeSummary(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance) { String str = ""; if (numericPredictor) str += "numeric"; if (nominalPredictor) { if (str.length() > 0) str += " & "; str += "nominal"; } if (stringPredictor) { if (str.length() > 0) str += " & "; str += "string"; } if (datePredictor) { if (str.length() > 0) str += " & "; str += "date"; } if (relationalPredictor) { if (str.length() > 0) str += " & "; str += "relational"; } str = " (" + str + " predictors)"; print(str); } /** * Test method for this class * * @param args the commandline options */ public static void main(String [] args) { try { CheckClusterer check = new CheckClusterer(); try { check.setOptions(args); Utils.checkForRemainingOptions(args); } catch (Exception ex) { String result = ex.getMessage() + "\n\n" + check.getClass().getName().replaceAll(".*\\.", "") + " Options:\n\n"; Enumeration enu = check.listOptions(); while (enu.hasMoreElements()) { Option option = (Option) enu.nextElement(); result += option.synopsis() + "\n" + option.description() + "\n"; } throw new Exception(result); } check.doTests(); } catch (Exception ex) { System.err.println(ex.getMessage()); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -