📄 confusionmatrix.java
字号:
* @return The macro-averaged F measure. */ public double macroAvgFMeasure() { double sum = 0.0; for (int i = 0; i < numCategories(); ++i) sum += oneVsAll(i).fMeasure(); return sum / (double) numCategories(); } /** * Returns Goodman and Kruskal's λ<sub><sub>A</sub></sub> index * of predictive association. This is defined by: * * <blockquote><code> * lambdaA() * <br> * = <big><big>(Σ</big></big><sub><sub>j</sub></sub> * maxReferenceCount(j)<big><big>)</big></big> - maxReferenceCount() * <br> / (totalCount() - maxReferenceCount()) * </code></blockquote> * * where <code>maxReferenceCount(j)</code> is the maximum count * in column <code>j</code> of the matrix: * * <blockquote><code> * maxReferenceCount(j) = MAX<sub><sub>i</sub></sub> count(i,j) * </code></blockquote> * * and where <code>maxReferenceCount()</code> is the maximum * reference count: * * <blockquote><code> * maxReferenceCount() = MAX<sub><sub>i</sub></sub> referenceCount(i) * </code></blockquote> * * <P>Note that like conditional probability and conditional * entropy, the λ<sub><sub>A</sub></sub> statistic is * antisymmetric; the measure λ<sub><sub>B</sub></sub> * simply reverses the rows and columns. The probabilistic * interpretation of λ<sub><sub>A</sub></sub> is like that * of λ<sub><sub>B</sub></sub>, only reversing the role of * the reference and response. * * @return The λ<sub><sub>B</sub></sub> statistic for this * matrix. */ public double lambdaA() { double maxReferenceCount = 0.0; for (int j = 0; j < numCategories(); ++j) { double referenceCount = oneVsAll(j).positiveReference(); if (referenceCount > maxReferenceCount) maxReferenceCount = referenceCount; } double maxCountSum = 0.0; for (int j = 0; j < numCategories(); ++j) { int maxCount = 0; for (int i = 0; i < numCategories(); ++i) { int count = count(i,j); if (count > maxCount) maxCount = count; } maxCountSum += maxCount; } double totalCount = totalCount(); return (maxCountSum - maxReferenceCount) / (totalCount - maxReferenceCount); } /** * Returns Goodman and Kruskal's λ<sub><sub>B</sub></sub> index * of predictive association. This is defined by: * * <blockquote><code> * lambdaB() * <br> * = <big><big>(Σ</big></big><sub><sub>j</sub></sub> * maxResponseCount(i)<big><big>)</big></big> - maxResponseCount() * <br> / (totalCount() - maxResponseCount()) * </code></blockquote> * * where <code>maxResponseCount(i)</code> is the maximum count * in row <code>i</code> of the matrix: * * <blockquote><code> * maxResponseCount(i) = MAX<sub><sub>j</sub></sub> count(i,j) * </code></blockquote> * * and where <code>maxResponseCount()</code> is the maximum * response count: * * <blockquote><code> * maxResponseCount() = MAX<sub><sub>j</sub></sub> responseCount(j) * </code></blockquote> * * <P>The probabilistic interpration of * λ<sub><sub>B</sub></sub> is the reduction in error * likelihood from knowing the specified reference category in * predicting the response category. It will thus take on a value * between 0.0 and 1.0, with higher values being better. Perfect * association yields a value of 1.0 and perfect independence a * value of 0.0. * * <P>Note that the λ<sub><sub>B</sub></sub> statistic is * antisymmetric; the measure λ<sub><sub>A</sub></sub> * simply reverses the rows and columns. * * @return The λ<sub><sub>B</sub></sub> statistic for this * matrix. */ public double lambdaB() { double maxResponseCount = 0.0; for (int i = 0; i < numCategories(); ++i) { double responseCount = oneVsAll(i).positiveResponse(); if (responseCount > maxResponseCount) maxResponseCount = responseCount; } double maxCountSum = 0.0; for (int i = 0; i < numCategories(); ++i) { int maxCount = 0; for (int j = 0; j < numCategories(); ++j) { int count = count(i,j); if (count > maxCount) maxCount = count; } maxCountSum += maxCount; } double totalCount = totalCount(); return (maxCountSum - maxResponseCount) / (totalCount - maxResponseCount); } /** * Returns the mutual information between the reference and * response distributions. Mutual information is defined * Kullback-Lieblier divergence, between the product of the * individual distributions and the joint distribution. Mutual * information is defined as: * * <blockquote><code> * mutualInformation() * <br> * = <big><big>Σ</big></big><sub><sub>i</sub></sub> * <big><big>Σ</big></big><sub><sub>j</sub></sub> * P(i,j) * * log<sub><sub>2</sub></sub> * ( P(i,j) / (P<sub><sub>reference</sub></sub>(i) * * P<sub><sub>response</sub></sub>(j)) ) * <br><br> * P(i,j) = count(i,j) / totalCount() * <br> * P<sub><sub>reference</sub></sub>(i) = oneVsAll(i).referenceLikelihood() * <br> * P<sub><sub>response</sub></sub>(i) = oneVsAll(i).responseLikelihood() * <br> * </code></blockquote> * * A bit of algebra shows that mutual information is the reduction * in entropy of the response distribution from knowing the * reference distribution: * * <blockquote><code> * mutualInformation() = responseEntropy() - conditionalEntropy() * </code></blockquote> * * In this way it is similar to the * λ<sub><sub>B</sub></sub> measure. And like the λ * measures, mutual information is not symmetric. The dual * measure here would subtract the condtional entropy from the * response entropy. * * @return The mutual information between the reference and the * response distributions. */ public double mutualInformation() { double totalCount = totalCount(); double sum = 0.0; for (int i = 0; i < numCategories(); ++i) { double pI = oneVsAll(i).referenceLikelihood(); if (pI <= 0.0) continue; for (int j = 0; j < numCategories(); ++j) { double pJ = oneVsAll(j).responseLikelihood(); if (pJ <= 0.0) continue; double pIJ = ((double)count(i,j)) / totalCount; if (pIJ <= 0.0) continue; sum += pIJ * com.aliasi.util.Math.log2(pIJ/(pI * pJ)); } } return sum; } /** * Returns the Kullback-Liebler (KL) divergence between the * reference and response distributions. KL divergence is also * known as relative entropy. * * <blockquote><code> * klDivergence() * <br> * = <big><big>Σ</big></big><sub><sub>k</sub></sub> * P<sub><sub>reference</sub></sub>(k) * * log<sub><sub>2</sub></sub> (P<sub><sub>reference</sub></sub>(k) * / P<sub><sub>response</sub></sub>(k)) * <br><br> * P<sub><sub>reference</sub></sub>(i) = oneVsAll(i).referenceLikelihood() * <br> * P<sub><sub>response</sub></sub>(i) = oneVsAll(i).responseLikelihood() * <br> * </code></blockquote> * </code></blockquote> * * Note that KL divergence is not symmetric in the reference and response * distributions. * * @return the Kullback-Liebler divergence between the reference and * response distributions. */ public double klDivergence() { double sum = 0.0; for (int k = 0; k < numCategories(); ++k) { PrecisionRecallEvaluation eval = oneVsAll(k); double refProb = eval.referenceLikelihood(); double responseProb = eval.responseLikelihood(); sum += refProb * com.aliasi.util.Math.log2(refProb/responseProb); } return sum; } /** * Return a string-based representation of this confusion matrix. * * @return A string-based representation of this confusion matrix. */ public String toString() { StringBuffer sb = new StringBuffer(); sb.append("GLOBAL CONFUSION MATRIX STATISTICS\n"); toStringGlobal(sb); for (int i = 0; i < numCategories(); ++i) { sb.append("CATEGORY " + i + "=" + categories()[i] + " VS. ALL\n"); sb.append(" Conditional Entropy=" + conditionalEntropy(i)); sb.append('\n'); sb.append(oneVsAll(i).toString()); sb.append('\n'); } return sb.toString(); } void toStringGlobal(StringBuffer sb) { String[] categories = categories(); sb.append("Categories=" + Arrays.asList(categories)); sb.append('\n'); sb.append("Total Count=" + totalCount()); sb.append('\n'); sb.append("Total Correct=" + totalCorrect()); sb.append('\n'); sb.append("Total Accuracy=" + totalAccuracy()); sb.append('\n'); sb.append("95% Confidence Interval=" + totalAccuracy() + " +/- " + confidence95()); sb.append('\n'); sb.append("Confusion Matrix\n"); sb.append("reference \\ response\n"); sb.append(matrixToCSV()); sb.append('\n'); sb.append("Macro-averaged Precision=" + macroAvgPrecision()); sb.append('\n'); sb.append("Macro-averaged Recall=" + macroAvgRecall()); sb.append('\n'); sb.append("Macro-averaged F=" + macroAvgFMeasure()); sb.append('\n'); sb.append("Micro-averaged Results\n"); sb.append(" the following symmetries are expected:\n"); sb.append(" TP=TN, FN=FP\n"); sb.append(" PosRef=PosResp=NegRef=NegResp\n"); sb.append(" Acc=Prec=Rec=F\n"); sb.append(microAverage().toString()); sb.append('\n'); sb.append("Random Accuracy=" + randomAccuracy()); sb.append('\n'); sb.append("Random Accuracy Unbiased=" + randomAccuracyUnbiased()); sb.append('\n'); sb.append("kappa=" + kappa()); sb.append('\n'); sb.append("kappa Unbiased=" + kappaUnbiased()); sb.append('\n'); sb.append("kappa No Prevalence =" + kappaNoPrevalence()); sb.append('\n'); sb.append("Reference Entropy=" + referenceEntropy()); sb.append('\n'); sb.append("Response Entropy=" + responseEntropy()); sb.append('\n'); sb.append("Cross Entropy=" + crossEntropy()); sb.append('\n'); sb.append("Joint Entropy=" + jointEntropy()); sb.append('\n'); sb.append("Conditional Entropy=" + conditionalEntropy()); sb.append('\n'); sb.append("Mutual Information=" + mutualInformation()); sb.append('\n'); sb.append("Kullback-Liebler Divergence=" + klDivergence()); sb.append('\n'); sb.append("chi Squared=" + chiSquared()); sb.append('\n'); sb.append("chi-Squared Degrees of Freedom=" + chiSquaredDegreesOfFreedom()); sb.append('\n'); sb.append("phi Squared=" + phiSquared()); sb.append('\n'); sb.append("Cramer's V=" + cramersV()); sb.append('\n'); sb.append("lambda A=" + lambdaA()); sb.append('\n'); sb.append("lambda B=" + lambdaB()); sb.append('\n'); } /** * NEEDS PROPER CSV ESCAPES */ String matrixToCSV() { StringBuffer sb = new StringBuffer(); // width = height = numcats+1 // upper left corner empty // ROW 0 sb.append(" "); for (int i = 0; i < numCategories(); ++i) { sb.append(','); sb.append(categories()[i]); } // ROW 1 to ROW numCategories() for (int i = 0; i < numCategories(); ++i) { sb.append("\n "); sb.append(categories()[i]); for (int j = 0; j < numCategories(); ++j) { sb.append(','); sb.append(count(i,j)); } } return sb.toString(); } /** * NEEDS PROPER HTML ESCAPES */ String matrixToHTML() { StringBuffer sb = new StringBuffer(); sb.append("<html>\n"); sb.append("<table border='1' cellpadding='5'>"); sb.append('\n'); sb.append("<tr>\n <td colspan='2' rowspan='2'> </td>"); sb.append("\n <td colspan='" + numCategories() + "' align='center' bgcolor='darkgray'><b>Response</b></td></tr>"); sb.append("<tr>"); for (int i = 0; i < numCategories(); ++i) { sb.append("\n <td align='right' bgcolor='lightgray'><i>" + categories()[i] + "</i></td>"); } sb.append("</tr>\n"); for (int i = 0; i < numCategories(); ++i) { sb.append("<tr>"); if (i == 0) sb.append("\n <td rowspan='" + numCategories() + "' bgcolor='darkgray'><b>Ref-<br>erence</b></td>"); sb.append("\n <td align='right' bgcolor='lightgray'><i>" + categories()[i] + "</i></td>"); for (int j = 0; j < numCategories(); ++j) { if (i == j) { sb.append("\n <td align='right' bgcolor='lightgreen'>"); } else if (count(i,j) == 0) { sb.append("\n <td align='right' bgcolor='yellow'>"); } else { sb.append("\n <td align='right' bgcolor='red'>"); } sb.append(count(i,j)); sb.append("</td>"); } sb.append("</tr>\n"); } sb.append("</table>\n"); sb.append("</html>\n"); return sb.toString(); } private void checkIndex(String argMsg, int index) { if (index < 0) { String msg = "Index for " + argMsg + " must be > 0." + " Found index=" + index; throw new IllegalArgumentException(msg); } if (index >= numCategories()) { String msg = "Index for " + argMsg + " must be < numCategories()=" + numCategories(); throw new IllegalArgumentException(msg); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -