📄 evaluation.java
字号:
if (classIndex != -1) {
train.setClassIndex(classIndex - 1);
} else {
train.setClassIndex(train.numAttributes() - 1);
}
testTimeStart = System.currentTimeMillis();
while (train.readInstance(trainReader)) {
trainingEvaluation.
evaluateModelOnce((Classifier)classifier,
train.instance(0));
train.delete(0);
}
testTimeElapsed = System.currentTimeMillis() - testTimeStart;
trainReader.close();
} else {
testTimeStart = System.currentTimeMillis();
trainingEvaluation.evaluateModel(classifier,
train);
testTimeElapsed = System.currentTimeMillis() - testTimeStart;
}
// Print the results of the training evaluation
if (printMargins) {
return trainingEvaluation.toCumulativeMarginDistributionString();
} else {
text.append("\nTime taken to build model: " +
Utils.doubleToString(trainTimeElapsed / 1000.0,2) +
" seconds");
text.append("\nTime taken to test model on training data: " +
Utils.doubleToString(testTimeElapsed / 1000.0,2) +
" seconds");
text.append(trainingEvaluation.
toSummaryString("\n\n=== Error on training" +
" data ===\n", printComplexityStatistics));
if (template.classAttribute().isNominal()) {
if (classStatistics) {
text.append("\n\n" + trainingEvaluation.toClassDetailsString());
}
text.append("\n\n" + trainingEvaluation.toMatrixString());
}
}
}
// Compute proper error estimates
if (testFileName.length() != 0) {
// Testing is on the supplied test data
while (test.readInstance(testReader)) {
testingEvaluation.evaluateModelOnce((Classifier)classifier,
test.instance(0));
test.delete(0);
}
testReader.close();
text.append("\n\n" + testingEvaluation.
toSummaryString("=== Error on test data ===\n",
printComplexityStatistics));
} else if (trainFileName.length() != 0) {
// Testing is via cross-validation on training data
Random random = new Random(seed);
testingEvaluation.crossValidateModel(classifier, train, folds, random);
if (template.classAttribute().isNumeric()) {
text.append("\n\n\n" + testingEvaluation.
toSummaryString("=== Cross-validation ===\n",
printComplexityStatistics));
} else {
text.append("\n\n\n" + testingEvaluation.
toSummaryString("=== Stratified " +
"cross-validation ===\n",
printComplexityStatistics));
}
}
if (template.classAttribute().isNominal()) {
if (classStatistics) {
text.append("\n\n" + testingEvaluation.toClassDetailsString());
}
text.append("\n\n" + testingEvaluation.toMatrixString());
}
return text.toString();
}
/**
* Attempts to load a cost matrix.
*
* @param costFileName the filename of the cost matrix
* @param numClasses the number of classes that should be in the cost matrix
* (only used if the cost file is in old format).
* @return a <code>CostMatrix</code> value, or null if costFileName is empty
* @exception Exception if an error occurs.
*/
protected static CostMatrix handleCostOption(String costFileName,
int numClasses)
throws Exception {
if ((costFileName != null) && (costFileName.length() != 0)) {
System.out.println(
"NOTE: The behaviour of the -m option has changed between WEKA 3.0"
+" and WEKA 3.1. -m now carries out cost-sensitive *evaluation*"
+" only. For cost-sensitive *prediction*, use one of the"
+" cost-sensitive metaschemes such as"
+" weka.classifiers.meta.CostSensitiveClassifier or"
+" weka.classifiers.meta.MetaCost");
Reader costReader = null;
try {
costReader = new BufferedReader(new FileReader(costFileName));
} catch (Exception e) {
throw new Exception("Can't open file " + e.getMessage() + '.');
}
try {
// First try as a proper cost matrix format
return new CostMatrix(costReader);
} catch (Exception ex) {
try {
// Now try as the poxy old format :-)
//System.err.println("Attempting to read old format cost file");
try {
costReader.close(); // Close the old one
costReader = new BufferedReader(new FileReader(costFileName));
} catch (Exception e) {
throw new Exception("Can't open file " + e.getMessage() + '.');
}
CostMatrix costMatrix = new CostMatrix(numClasses);
//System.err.println("Created default cost matrix");
costMatrix.readOldFormat(costReader);
return costMatrix;
//System.err.println("Read old format");
} catch (Exception e2) {
// re-throw the original exception
//System.err.println("Re-throwing original exception");
throw ex;
}
}
} else {
return null;
}
}
/**
* Evaluates the classifier on a given set of instances. Note that
* the data must have exactly the same format (e.g. order of
* attributes) as the data used to train the classifier! Otherwise
* the results will generally be meaningless.
*
* @param classifier machine learning classifier
* @param data set of test instances for evaluation
* @exception Exception if model could not be evaluated
* successfully
*/
public void evaluateModel(Classifier classifier,
Instances data) throws Exception {
double [] predicted;
for (int i = 0; i < data.numInstances(); i++) {
evaluateModelOnce((Classifier)classifier,
data.instance(i));
}
}
//<<30/01/2005, Frank J. Xu
//Modified functions for model evaluation used in ETI KBBI Platform.
/**
* Evaluates the classifier on a given set of instances. Note that
* the data must have exactly the same format (e.g. order of
* attributes) as the data used to train the classifier! Otherwise
* the results will generally be meaningless.
*
* @param classifier machine learning classifier
* @param data set of test instances for evaluation
* @exception Exception if model could not be evaluated
* successfully
*/
public void eti_evaluateModel(Classifier classifier,
Instances data,
double threshold,
double confidence) throws Exception {
double [] predicted;
for (int i = 0; i < data.numInstances(); i++) {
eti_evaluateModelOnce((Classifier)classifier,
data.instance(i),
threshold,
confidence);
}
}
/**
* Evaluates the classifier on a single instance.
*
* @param classifier machine learning classifier
* @param instance the test instance to be classified
* @return the prediction made by the clasifier
* @exception Exception if model could not be evaluated
* successfully or the data contains string attributes
*/
public double eti_evaluateModelOnce(Classifier classifier,
Instance instance,
double threshold,
double confidence) throws Exception {
Instance classMissing = (Instance)instance.copy();
double pred = 0;
classMissing.setDataset(instance.dataset());
classMissing.setClassMissing();
if (m_ClassIsNominal) {
double [] dist = classifier.distributionForInstance(classMissing);
//For Logistic classifier with two classes, use threshold in KBBI platform.
if(classifier instanceof Logistic && dist.length == 2)
pred = dist[0]>threshold?0:1;
else
pred = Utils.maxIndex(dist);
if (dist[(int)pred] <= 0) {
pred = Instance.missingValue();
}
eti_updateStatsForClassifier(dist, instance, (int)pred, confidence);
} else {
pred = classifier.classifyInstance(classMissing);
updateStatsForPredictor(pred, instance);
}
return pred;
}
/**
* Updates all the statistics about a classifiers performance for
* the current test instance.
*
* @param predictedDistribution the probabilities assigned to
* each class
* @param instance the instance to be classified
* @exception Exception if the class of the instance is not
* set
*/
protected void eti_updateStatsForClassifier(double [] predictedDistribution,
Instance instance,
int predictedIndex,
double confidence)
throws Exception {
int actualClass = (int)instance.classValue();
double costFactor = 1;
if (!instance.classIsMissing()) {
updateMargins(predictedDistribution, actualClass, instance.weight());
// Determine the predicted class (doesn't detect multiple
// classifications)
int predictedClass = -1;
double bestProb = 0.0;
for(int i = 0; i < m_NumClasses; i++) {
if (predictedDistribution[i] > bestProb) {
predictedClass = i;
bestProb = predictedDistribution[i];
}
}
m_WithClass += instance.weight();
// Determine misclassification cost
if (m_CostMatrix != null) {
if (predictedClass < 0) {
// For missing predictions, we assume the worst possible cost.
// This is pretty harsh.
// Perhaps we could take the negative of the cost of a correct
// prediction (-m_CostMatrix.getElement(actualClass,actualClass)),
// although often this will be zero
m_TotalCost += instance.weight()
* m_CostMatrix.getMaxCost(actualClass);
} else {
m_TotalCost += instance.weight()
* m_CostMatrix.getElement(actualClass, predictedClass);
}
}
// Update counts when no class was predicted
if (predictedClass < 0) {
m_Unclassified += instance.weight();
return;
}
double predictedProb = Math.max(MIN_SF_PROB,
predictedDistribution[actualClass]);
double priorProb = Math.max(MIN_SF_PROB,
m_ClassPriors[actualClass]
/ m_ClassPriorsSum);
if (predictedProb >= priorProb) {
m_SumKBInfo += (Utils.log2(predictedProb) -
Utils.log2(priorProb))
* instance.weight();
} else {
m_SumKBInfo -= (Utils.log2(1.0-predictedProb) -
Utils.log2(1.0-priorProb))
* instance.weight();
}
m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();
updateNumericScores(predictedDistribution,
makeDistribution(instance.classValue()),
instance.weight());
// Update other stats
m_ConfusionMatrix[actualClass][predictedClass] += instance.weight();
if (predictedClass != actualClass) {
m_Incorrect += instance.weight();
} else {
m_Correct += instance.weight();
if(predictedDistribution[predictedIndex] >= confidence)
m_CorrectNumWithSpecifiedConf[actualClass]++;
}
} else {
m_MissingClass += instance.weight();
}
}
/**
* Calculate the true positive rate with respect to a particular class.
* This is defined as<p>
* <pre>
* correctly classified positives with confidence level > specified value
* ----------------------------------------------------------------------
* correctly classified positives
* </pre>
*
* @param classIndex the index of the class to consider as "positive"
* @return the true positive rate
*/
public double eti_truePositiveConfidence(int classIndex) {
double correct = 0;
for (int j = 0; j < m_NumClasses; j++) {
if (j == classIndex) {
correct += m_ConfusionMatrix[classIndex][j];
break;
}
}
if (correct == 0) {
return 0;
}
return m_CorrectNumWithSpecifiedConf[classIndex]/correct;
}
//30/01/2005, Frank J. Xu>>
/**
* Evaluates the classifier on a single instance.
*
* @param classifier machine learning classifier
* @param instance the test instance to be classified
* @return the prediction made by the clasifier
* @exception Exception if model could not be evaluated
* successfully or the data contains string attributes
*/
public double evaluateModelOnce(Classifier classifier,
Instance instance) throws Exception {
Instance classMissing = (Instance)instance.copy();
double pred = 0;
classMissing.setDataset(instance.dataset());
classMissing.setClassMissing();
if (m_ClassIsNominal) {
double [] dist = classifier.distributionForInstance(classMissing);
pred = Utils.maxIndex(dist);
if (dist[(int)pred] <= 0) {
pred = Instance.missingValue();
}
updateStatsForClassifier(dist, instance);
} else {
pred = classifier.classifyInstance(classMissing);
updateStatsForPredictor(pred, instance);
}
return pred;
}
/**
* Evaluates the supplied distribution on a single instance.
*
* @param dist the supplied distribution
* @param instance the test instance to be classified
* @exception Exception if model could not be evaluated
* successfully
*/
public double evaluateModelOnce(double [] dist,
Instance instance) throws Exception {
double pred;
if (m_ClassIsNominal) {
pred = Utils.maxIndex(dist);
if (dist[(int)pred] <= 0) {
pred = Instance.missingValue();
}
updateStatsForClassifier(dist, instance);
} else {
pred = dist[0];
updateStatsForPredictor(pred, instance);
}
return pred;
}
/**
* Evaluates the supplied prediction on a single instance.
*
* @param prediction the supplied prediction
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -