📄 checkclassifier.java
字号:
numericPredictor ? 1 : 0, numClasses, numericClass); test = makeTestDataset(24, numTest, nominalPredictor ? 2 : 0, numericPredictor ? 1 : 0, numClasses, numericClass); if (nominalPredictor) { train.deleteAttributeAt(0); test.deleteAttributeAt(0); } if (missingLevel > 0) { addMissing(train, missingLevel, predictorMissing, classMissing); addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing); } classifier = Classifier.makeCopies(getClassifier(), 1)[0]; evaluation = new Evaluation(train); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { Instances trainCopy = new Instances(train); Instances testCopy = new Instances(test); classifier.buildClassifier(trainCopy); compareDatasets(train, trainCopy); built = true; testWRTZeroR(classifier, evaluation, trainCopy, testCopy); compareDatasets(test, testCopy); System.out.println("yes"); return true; } catch (Exception ex) { System.out.println("no"); if (m_Debug) { System.out.println("\n=== Full Report ==="); System.out.print("Problem during"); if (built) { System.out.print(" testing"); } else { System.out.print(" training"); } System.out.println(": " + ex.getMessage() + "\n"); System.out.println("Here are the datasets:\n"); System.out.println("=== Train Dataset ===\n" + train.toString() + "\n"); System.out.println("=== Test Dataset ===\n" + test.toString() + "\n\n"); } } return false; } /** * Runs a text on the datasets with the given characteristics. */ protected boolean runBasicTest(boolean nominalPredictor, boolean numericPredictor, boolean numericClass, int missingLevel, boolean predictorMissing, boolean classMissing, int numTrain, int numTest, int numClasses, FastVector accepts) { Instances train = null; Instances test = null; Classifier classifier = null; Evaluation evaluation = null; boolean built = false; try { train = makeTestDataset(42, numTrain, nominalPredictor ? 2 : 0, numericPredictor ? 1 : 0, numClasses, numericClass); test = makeTestDataset(24, numTest, nominalPredictor ? 2 : 0, numericPredictor ? 1 : 0, numClasses, numericClass); if (nominalPredictor) { train.deleteAttributeAt(0); test.deleteAttributeAt(0); } if (missingLevel > 0) { addMissing(train, missingLevel, predictorMissing, classMissing); addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing); } classifier = Classifier.makeCopies(getClassifier(), 1)[0]; evaluation = new Evaluation(train); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { classifier.buildClassifier(train); built = true; if (!testWRTZeroR(classifier, evaluation, train, test)) { throw new Exception("Scheme performs worse than ZeroR"); } System.out.println("yes"); return true; } catch (Exception ex) { boolean acceptable = false; String msg = ex.getMessage().toLowerCase(); if (msg.indexOf("worse than zeror") >= 0) { System.out.println("warning: performs worse than ZeroR"); } else { for (int i = 0; i < accepts.size(); i++) { if (msg.indexOf((String)accepts.elementAt(i)) >= 0) { acceptable = true; } } System.out.println("no" + (acceptable ? " (OK error message)" : "")); } if (m_Debug) { System.out.println("\n=== Full Report ==="); System.out.print("Problem during"); if (built) { System.out.print(" testing"); } else { System.out.print(" training"); } System.out.println(": " + ex.getMessage() + "\n"); if (!acceptable) { if (accepts.size() > 0) { System.out.print("Error message doesn't mention "); for (int i = 0; i < accepts.size(); i++) { if (i != 0) { System.out.print(" or "); } System.out.print('"' + (String)accepts.elementAt(i) + '"'); } } System.out.println("here are the datasets:\n"); System.out.println("=== Train Dataset ===\n" + train.toString() + "\n"); System.out.println("=== Test Dataset ===\n" + test.toString() + "\n\n"); } } } return false; } /** * Determine whether the scheme performs worse than ZeroR during testing * * @param classifier the pre-trained classifier * @param evaluation the classifier evaluation object * @param train the training data * @param test the test data * @return true if the scheme performs better than ZeroR * @exception Exception if there was a problem during the scheme's testing */ protected boolean testWRTZeroR(Classifier classifier, Evaluation evaluation, Instances train, Instances test) throws Exception { evaluation.evaluateModel(classifier, test); try { // Tested OK, compare with ZeroR Classifier zeroR = new weka.classifiers.ZeroR(); zeroR.buildClassifier(train); Evaluation zeroREval = new Evaluation(train); zeroREval.evaluateModel(zeroR, test); return Utils.grOrEq(zeroREval.errorRate(), evaluation.errorRate()); } catch (Exception ex) { throw new Error("Problem determining ZeroR performance: " + ex.getMessage()); } } /** * Compare two datasets to see if they differ. * * @param data1 one set of instances * @param data2 the other set of instances * @exception Exception if the datasets differ */ protected void compareDatasets(Instances data1, Instances data2) throws Exception { if (!data2.equalHeaders(data1)) { throw new Exception("header has been modified"); } if (!(data2.numInstances() == data1.numInstances())) { throw new Exception("number of instances has changed"); } for (int i = 0; i < data2.numInstances(); i++) { Instance orig = data1.instance(i); Instance copy = data2.instance(i); for (int j = 0; j < orig.numAttributes(); j++) { if (orig.isMissing(j)) { if (!copy.isMissing(j)) { throw new Exception("instances have changed"); } } else if (orig.value(j) != copy.value(j)) { throw new Exception("instances have changed"); } if (orig.weight() != copy.weight()) { throw new Exception("instance weights have changed"); } } } } /** * Add missing values to a dataset. * * @param data the instances to add missing values to * @param level the level of missing values to add (if positive, this * is the probability that a value will be set to missing, if negative * all but one value will be set to missing (not yet implemented)) * @param predictorMissing if true, predictor attributes will be modified * @param classMissing if true, the class attribute will be modified */ protected void addMissing(Instances data, int level, boolean predictorMissing, boolean classMissing) { int classIndex = data.classIndex(); Random random = new Random(1); for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); for (int j = 0; j < data.numAttributes(); j++) { if (((j == classIndex) && classMissing) || ((j != classIndex) && predictorMissing)) { if (Math.abs(random.nextInt()) % 100 < level) current.setMissing(j); } } } } /** * Make a simple set of instances, which can later be modified * for use in specific tests. * * @param seed the random number seed * @param numInstances the number of instances to generate * @param numNominal the number of nominal attributes * @param numNumeric the number of numeric attributes * @param numClasses the number of classes (if nominal class) * @param numericClass true if the class attribute should be numeric * @return the test dataset * @exception Exception if the dataset couldn't be generated */ protected Instances makeTestDataset(int seed, int numInstances, int numNominal, int numNumeric, int numClasses, boolean numericClass) throws Exception { int numAttributes = numNominal + numNumeric + 1; Random random = new Random(seed); FastVector attributes = new FastVector(numAttributes); // Add Nominal attributes for (int i = 0; i < numNominal; i++) { FastVector nomStrings = new FastVector(i + 1); for(int j = 0; j <= i; j++) { nomStrings.addElement("a" + (i + 1) + "l" + (j + 1)); } attributes.addElement(new Attribute("Nominal" + (i + 1), nomStrings)); } // Add Numeric attributes for (int i = 0; i < numNumeric; i++) { attributes.addElement(new Attribute("Numeric" + (i + 1))); } // TODO: Add some String attributes... // Add class attribute if (numericClass) { attributes.addElement(new Attribute("Class")); } else { FastVector nomStrings = new FastVector(); for(int j = 0; j <numClasses; j++) { nomStrings.addElement("cl" + (j + 1)); } attributes.addElement(new Attribute("Class",nomStrings)); } Instances data = new Instances("CheckSet", attributes, numInstances); data.setClassIndex(data.numAttributes() - 1); // Generate the instances for (int i = 0; i < numInstances; i++) { Instance current = new Instance(numAttributes); current.setDataset(data); if (numericClass) { current.setClassValue(random.nextFloat() * 0.25 + Math.abs(random.nextInt()) % Math.max(2, numNominal)); } else { current.setClassValue(Math.abs(random.nextInt()) % data.numClasses()); } double classVal = current.classValue(); double newVal = 0; for (int j = 0; j < numAttributes - 1; j++) { switch (data.attribute(j).type()) { case Attribute.NUMERIC: newVal = classVal * 4 + random.nextFloat() * 1 - 0.5; current.setValue(j, newVal); break; case Attribute.NOMINAL: if (random.nextFloat() < 0.2) { newVal = Math.abs(random.nextInt()) % data.attribute(j).numValues(); } else { newVal = ((int)classVal) % data.attribute(j).numValues(); } current.setValue(j, newVal); break; case Attribute.STRING: System.err.println("Huh? this bit isn't implemented yet"); break; } } data.add(current); } return data; } /** * Print out a short summary string for the dataset characteristics * * @param nominalPredictor true if nominal predictor attributes are present * @param numericPredictor true if numeric predictor attributes are present * @param numericClass true if the class attribute is numeric */ protected void printAttributeSummary(boolean nominalPredictor, boolean numericPredictor, boolean numericClass) { if (numericClass) { System.out.print(" (numeric class,"); } else { System.out.print(" (nominal class,"); } if (numericPredictor) { System.out.print(" numeric"); if (nominalPredictor) { System.out.print(" &"); } } if (nominalPredictor) { System.out.print(" nominal"); } System.out.print(" predictors)"); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -