📄 checkclusterer.java
字号:
for (int i = 0; i < train.numInstances(); i++) { train.instance(i).setWeight(0); } Random random = new Random(1); for (int i = 0; i < train.numInstances() / 2; i++) { int inst = Math.abs(random.nextInt()) % train.numInstances(); int weight = Math.abs(random.nextInt()) % 10 + 1; train.instance(inst).setWeight(weight); } clusterers[1].buildClusterer(train); built = true; evaluationI.setClusterer(clusterers[1]); if (evaluationB.equals(evaluationI)) { // println("no"); evalFail = true; throw new Exception("evalFail"); } println("yes"); result[0] = true; } catch (Exception ex) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); if (evalFail) { println("Results don't differ between non-weighted and " + "weighted instance models."); println("Here are the results:\n"); println("\nboth methods\n"); println(evaluationB.clusterResultsToString()); } else { print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); } println("Here is the dataset:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Train Weights ===\n"); for (int i = 0; i < train.numInstances(); i++) { println(" " + (i + 1) + " " + train.instance(i).weight()); } } } return result; } /** * Checks whether the scheme alters the training dataset during * training. If the scheme needs to modify the training * data it should take a copy of the training data. Currently checks * for changes to header structure, number of instances, order of * instances, instance weights. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param predictorMissing true if we know the clusterer can handle * (at least) moderate missing predictor values * @return index 0 is true if the test was passed */ protected boolean[] datasetIntegrity( boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, boolean predictorMissing) { print("clusterer doesn't alter original datasets"); printAttributeSummary( nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance); print("..."); int numTrain = getNumInstances(), missingLevel = 20; boolean[] result = new boolean[2]; Instances train = null; Clusterer clusterer = null; try { train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() : 0, numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, multiInstance); if (nominalPredictor && !multiInstance) train.deleteAttributeAt(0); if (missingLevel > 0) addMissing(train, missingLevel, predictorMissing); clusterer = Clusterer.makeCopies(getClusterer(), 1)[0]; } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { Instances trainCopy = new Instances(train); clusterer.buildClusterer(trainCopy); compareDatasets(train, trainCopy); println("yes"); result[0] = true; } catch (Exception ex) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); print("Problem during training"); println(": " + ex.getMessage() + "\n"); println("Here is the dataset:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); } } return result; } /** * Checks whether an updateable scheme produces the same model when * trained incrementally as when batch trained. The model itself * cannot be compared, so we compare the evaluation on test data * for both models. It is possible to get a false positive on this * test (likelihood depends on the classifier). * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @return index 0 is true if the test was passed */ protected boolean[] updatingEquality( boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance) { print("incremental training produces the same results" + " as batch training"); printAttributeSummary( nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance); print("..."); int numTrain = getNumInstances(), missingLevel = 0; boolean predictorMissing = false, classMissing = false; boolean[] result = new boolean[2]; Instances train = null; Clusterer[] clusterers = null; ClusterEvaluation evaluationB = null; ClusterEvaluation evaluationI = null; boolean built = false; try { train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() : 0, numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, multiInstance); if (missingLevel > 0) addMissing(train, missingLevel, predictorMissing, classMissing); clusterers = Clusterer.makeCopies(getClusterer(), 2); evaluationB = new ClusterEvaluation(); evaluationI = new ClusterEvaluation(); clusterers[0].buildClusterer(train); evaluationB.setClusterer(clusterers[0]); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { clusterers[1].buildClusterer(new Instances(train, 0)); for (int i = 0; i < train.numInstances(); i++) { ((UpdateableClusterer)clusterers[1]).updateClusterer( train.instance(i)); } built = true; evaluationI.setClusterer(clusterers[1]); if (!evaluationB.equals(evaluationI)) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); println("Results differ between batch and " + "incrementally built models.\n" + "Depending on the classifier, this may be OK"); println("Here are the results:\n"); println("\nbatch built results\n" + evaluationB.clusterResultsToString()); println("\nincrementally built results\n" + evaluationI.clusterResultsToString()); println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); } } else { println("yes"); result[0] = true; } } catch (Exception ex) { result[0] = false; print("Problem during"); if (built) print(" testing"); else print(" training"); println(": " + ex.getMessage() + "\n"); } return result; } /** * Runs a text on the datasets with the given characteristics. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param missingLevel the percentage of missing values * @param predictorMissing true if the missing values may be in * the predictors * @param numTrain the number of instances in the training set * @param accepts the acceptable string in an exception * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] runBasicTest(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int missingLevel, boolean predictorMissing, int numTrain, FastVector accepts) { boolean[] result = new boolean[2]; Instances train = null; Clusterer clusterer = null; try { train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() : 0, numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, multiInstance); if (nominalPredictor && !multiInstance) train.deleteAttributeAt(0); if (missingLevel > 0) addMissing(train, missingLevel, predictorMissing); clusterer = Clusterer.makeCopies(getClusterer(), 1)[0]; } catch (Exception ex) { ex.printStackTrace(); throw new Error("Error setting up for tests: " + ex.getMessage()); } try { clusterer.buildClusterer(train); println("yes"); result[0] = true; } catch (Exception ex) { boolean acceptable = false; String msg = ex.getMessage().toLowerCase(); for (int i = 0; i < accepts.size(); i++) { if (msg.indexOf((String)accepts.elementAt(i)) >= 0) { acceptable = true; } } println("no" + (acceptable ? " (OK error message)" : "")); result[1] = acceptable; if (m_Debug) { println("\n=== Full Report ==="); print("Problem during training"); println(": " + ex.getMessage() + "\n"); if (!acceptable) { if (accepts.size() > 0) { print("Error message doesn't mention "); for (int i = 0; i < accepts.size(); i++) { if (i != 0) { print(" or "); } print('"' + (String)accepts.elementAt(i) + '"'); } } println("here is the dataset:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); } } } return result; } /** * Add missing values to a dataset. * * @param data the instances to add missing values to * @param level the level of missing values to add (if positive, this * is the probability that a value will be set to missing, if negative * all but one value will be set to missing (not yet implemented)) * @param predictorMissing if true, predictor attributes will be modified */ protected void addMissing(Instances data, int level, boolean predictorMissing) { Random random = new Random(1); for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); for (int j = 0; j < data.numAttributes(); j++) { if (predictorMissing) { if (Math.abs(random.nextInt()) % 100 < level) current.setMissing(j); } } } } /** * Make a simple set of instances with variable position of the class * attribute, which can later be modified for use in specific tests. * * @param seed the random number seed * @param numInstances the number of instances to generate * @param numNominal the number of nominal attributes * @param numNumeric the number of numeric attributes * @param numString the number of string attributes * @param numDate the number of date attributes * @param numRelational the number of relational attributes * @param multiInstance whether the dataset should a multi-instance dataset * @return the test dataset * @throws Exception if the dataset couldn't be generated * @see TestInstances#CLASS_IS_LAST */ protected Instances makeTestDataset(int seed, int numInstances, int numNominal, int numNumeric, int numString, int numDate, int numRelational, boolean multiInstance) throws Exception { TestInstances dataset = new TestInstances(); dataset.setSeed(seed); dataset.setNumInstances(numInstances); dataset.setNumNominal(numNominal); dataset.setNumNumeric(numNumeric); dataset.setNumString(numString); dataset.setNumDate(numDate); dataset.setNumRelational(numRelational); dataset.setClassIndex(TestInstances.NO_CLASS); dataset.setMultiInstance(multiInstance); return dataset.generate(); } /** * Print out a short summary string for the dataset characteristics * * @param nominalPredictor true if nominal predictor attributes are present * @param numericPredictor true if numeric predictor attributes are present * @param stringPredictor true if string predictor attributes are present * @param datePredictor true if date predictor attributes are present * @param relationalPredictor true if relational predictor attributes are present * @param multiInstance whether multi-instance is needed */ protected void printAttributeSummary(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance) { String str = ""; if (numericPredictor) str += "numeric"; if (nominalPredictor) { if (str.length() > 0) str += " & "; str += "nominal"; } if (stringPredictor) { if (str.length() > 0) str += " & "; str += "string"; } if (datePredictor) { if (str.length() > 0) str += " & "; str += "date"; } if (relationalPredictor) { if (str.length() > 0) str += " & "; str += "relational"; } str = " (" + str + " predictors)"; print(str); } /** * Test method for this class * * @param args the commandline options */ public static void main(String [] args) { runCheck(new CheckClusterer(), args); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -