📄 confusionmatrix.java
字号:
if (prob <= 0.0) continue; entropySum += prob * com.aliasi.util.Math.log2(prob); } } return -entropySum; } /** * Returns the entropy of the distribution of categories * in the response given that the reference category was * as specified. The conditional entropy is defined by: * * <blockquote><code> * conditionalEntropy(i) * <br> * = - <big><big>Σ</big></big><sub><sub>j</sub></sub> * P'(j|i) * log<sub><sub>2</sub></sub> P'(j|i) * <br><br> * P'(j|i) = count(j,i) / referenceCount(i) * </code></blockquote> * * where * * <blockquote><code> * </code></blockquote> * * @param refCategoryIndex Index of the reference category. * @return Conditional entropy of the category with the specified * index. */ public double conditionalEntropy(int refCategoryIndex) { double entropySum = 0.0; long refCount = oneVsAll(refCategoryIndex).positiveReference(); for (int j = 0; j < numCategories(); ++j) { double conditionalProb = ((double) count(refCategoryIndex,j)) / refCount; if (conditionalProb <= 0.0) continue; entropySum += conditionalProb * com.aliasi.util.Math.log2(conditionalProb); } return -entropySum; } /** * Returns the conditional entropy of the response distribution * against the reference distribution. The conditional entropy * is defined to be the sum of conditional entropies per category * weighted by the reference likelihood of the category. * * <blockquote><code> * conditionalEntropy() * <br> * = <big><big>Σ</big></big><sub><sub>i</sub></sub> * referenceLikelihood(i) * conditionalEntropy(i) * <br><br> * referenceLikelihood(i) = oneVsAll(i).referenceLikelihood() * </code></blockquote> * * <P>Note that this statistic is not symmetric in that if the * roles of reference and response are reversed, the answer may be * different. * * @return The conditional entropy of the response distribution * against the reference distribution */ public double conditionalEntropy() { double entropySum = 0.0; for (int i = 0; i < numCategories(); ++i) { double refProbI = oneVsAll(i).referenceLikelihood(); entropySum += refProbI * conditionalEntropy(i); } return entropySum; } /** * Returns the value of the kappa statistic with chance agreement * determined by the reference distribution. Kappa is defined * in terms of total accuracy and random accuracy: * * <blockquote><code> * kappa() = (totalAccuracy() - randomAccuracy()) * / (1 - randomAccuracy()) * </code></blockquote> * * The kappa statistic was introduced in: * * <blockquote> * Cohen, Jacob. 1960. A coefficient of agreement for nominal scales. * <i>Educational And Psychological Measurement</i> <b>20</b>:37-46. * </blockquote> * * @return Kappa statistic for this confusion matrix. */ public double kappa() { return kappa(randomAccuracy()); } /** * Returns the value of the kappa statistic adjusted for bias. * The unbiased kappa value is defined in terms of total accuracy * and a slightly different computation of expected likelihood that * averages the reference and response probabilities. The exact * definition is: * * <blockquote> * kappaUnbiased() = (totalAccuracy() - randomAccuracyUnbiased()) * / (1 - randomAccuracyUnbiased()) * </blockquote> * * The unbiased version of Kappa was introduced in: * * <blockquote> * Siegel, Sidney and N. John Castellan, Jr. 1988. * <i>Nonparametric Statistics for the Behavioral Sciences</i>. * McGraw Hill. * </blockquote> * * @return The unbiased version of the kappa statistic. */ public double kappaUnbiased() { return kappa(randomAccuracyUnbiased()); } /** * Returns the value of the kappa statistic adjusted for * prevalence. The definition is: * * <blockquote><code> * kappaNoPrevalence() = 2 * totalAccuracy() - 1 * </code></blockquote> * * The no prevalence version of kappa was introduced in: * * <blockquote> * Byrt, Ted, Janet Bishop and John B. Carlin. 1993. * Bias, prevalence, and kappa. * <i>Journal of Clinical Epidemiology</i> <b>46</b>(5):423-429. * </blockquote> * * These authors suggest reporting the three kappa statistics * defined in this class: kappa, kappa adjusted for prevalence, * and kappa adjusted for bias. * * @return The value of kappa adjusted for prevalence. */ public double kappaNoPrevalence() { return 2.0 * totalAccuracy() - 1.0; } private double kappa(double PE) { double PA = totalAccuracy(); return (PA - PE) / (1.0 - PE); } /** * The expected accuracy from a strategy of randomly guessing * categories according to reference and response distributions. * This is defined by: * * <blockquote><code> * randomAccuracy() * = <big><big><big>Σ</big></big></big><sub><sub>i</sub></sub> * referenceLikelihood(i) * resultLikelihood(i) * <br><br> * referenceLikelihood(i) = oneVsAll(i).referenceLikelihood() * <br> * responseLikelihood(i) = oneVsAll(i).responseLikelihood() * </code></blockquote> * * @return The random accuracy for this matrix. */ public double randomAccuracy() { double randomAccuracy = 0.0; for (int i = 0; i < numCategories(); ++i) { PrecisionRecallEvaluation eval = oneVsAll(i); randomAccuracy += eval.referenceLikelihood() * eval.responseLikelihood(); } return randomAccuracy; } /** * The expected accuracy from a strategy of randomly guessing * categories according to the average of the reference and * response distributions. This is defined by: * * <blockquote><code> * randomAccuracyUnbaised() * = <big><big><big>Σ</big></big></big><sub><sub>i</sub></sub> * ((referenceLikelihood(i) + resultLikelihood(i))/2)<sup>2</sup> * <br><br> * referenceLikelihood(i) = oneVsAll(i).referenceLikelihood() * <br> * responseLikelihood(i) = oneVsAll(i).responseLikelihood() * </code></blockquote> * * @return The unbiased random accuracy for this matrix. */ public double randomAccuracyUnbiased() { double randomAccuracy = 0.0; for (int i = 0; i < numCategories(); ++i) { PrecisionRecallEvaluation eval = oneVsAll(i); double avgLikelihood = (eval.referenceLikelihood() + eval.responseLikelihood()) / 2.0; randomAccuracy += avgLikelihood * avgLikelihood; } return randomAccuracy; } /** * Return the number of degrees of freedom of this confusion * matrix for the χ<sup>2</sup> statistic. In general, for an * <code>n×m</code> matrix, the number of degrees of * freedom is equal to <code>(n-1)*(m-1)</code>. Because this * is a symmetric matrix of dimensions equal to the number of * categories, the result is defined to be: * * <blockquote><code> * chiSquaredDegreesOfFreedom() * = (numCategories() - 1)<sup>2</sup> * </code></blockquote> * * @return The number of degrees of freedom for this confusion * matrix. */ public int chiSquaredDegreesOfFreedom() { int sqrt = numCategories() - 1; return sqrt * sqrt; } /** * Returns Pearson's C<sub><sub>2</sub></sub> independence test * statistic for this matrix. The value is asymptotically * χ<sup>2</sup> distributed with a number of degrees of * freedom as specified by {@link #chiSquaredDegreesOfFreedom()}. * * <P>See {@link Statistics#chiSquaredIndependence(double[][])} * for definitions of the statistic over matrices. * * @return The χ<sup>2</sup> statistic for this matrix. */ public double chiSquared() { int numCategories = numCategories(); double[][] contingencyMatrix = new double[numCategories][numCategories]; for (int i = 0; i < numCategories; ++i) for (int j = 0; j < numCategories; ++j) contingencyMatrix[i][j] = count(i,j); return Statistics.chiSquaredIndependence(contingencyMatrix); } /** * Returns the value of Pearson's φ<sup>2</sup> index of mean * square contingency for this matrix. The value of * φ<sup>2</sup> may be defined in terms of χ<sup>2</sup> * by: * * <blockquote><code> * phiSquared() = chiSquared() / totalCount() * </code></blockquote> * * <P>As with our other statistics, this is the <i>sample</i> * value; the true contingency by the true random variables * defining the reference and response. * * @return The φ<sup>2</sup> statistic for this matrix. */ public double phiSquared() { return chiSquared() / (double) totalCount(); } /** * Returns the value of Cramér's V statistic for this matrix. * The square of Cramér's statistic may be defined in terms * of the φ<sup>2</sup> statistic by: * * <blockquote><code> * cramersV() = (phiSquared() / (numCategories()-1))<sup><sup>(1/2)</sup></sup> * </code></blockquote> * * @return The value of Cramér's V statistic for this matrix. */ public double cramersV() { double LMinusOne = numCategories() - 1; return java.lang.Math.sqrt(phiSquared() / LMinusOne); } /** * Returns the one-versus-all precision-recall evaluation for the * specified category index. See the class definition above for * examples. * * @param categoryIndex Index of category. * @return The precision-recall evaluation for the category. */ public PrecisionRecallEvaluation oneVsAll(int categoryIndex) { PrecisionRecallEvaluation eval = new PrecisionRecallEvaluation(); for (int i = 0; i < numCategories(); ++i) for (int j = 0; j < numCategories(); ++j) eval.addCase(i==categoryIndex,j==categoryIndex,mMatrix[i][j]); return eval; } /** * Returns the micro-averaged precision-recall evaluation. This * is just the sum of the precision-recall evaluatiosn provided * by {@link #oneVsAll(int)} over all category indices. See the * class definition above for an example. * * @return The micro-averaged precision-recall evaluation. */ public PrecisionRecallEvaluation microAverage() { long tp = 0; long fp = 0; long fn = 0; long tn = 0; for (int i = 0; i < numCategories(); ++i) { PrecisionRecallEvaluation eval = oneVsAll(i); tp += eval.truePositive(); fp += eval.falsePositive(); tn += eval.trueNegative(); fn += eval.falseNegative(); } return new PrecisionRecallEvaluation(tp,fn,fp,tn); } /** * Returns the average precision per category. This * averaging treats each category of being equal in * weight. Macro-averaged precision is defined by: * * <blockquote><code> * macroAvgPrecision() * <br> * = <big><big>Σ</big></big><sub><sub>i</sub></sub> * precision(i) / numCategories() * <br><br> * precision(i) = oneVsAll(i).precision() * </code></blockquote> * * @return The macro-averaged precision. */ public double macroAvgPrecision() { double sum = 0.0; for (int i = 0; i < numCategories(); ++i) sum += oneVsAll(i).precision(); return sum / (double) numCategories(); } /** * Returns the average precision per category. This averaging * treats each category as being equal in weight. Macro-averaged * recall is defined by: * * <blockquote><code> * macroAvgRecall() * <br> * = <big><big>Σ</big></big><sub><sub>i</sub></sub> * recall(i) / numCategories() * <br><br> * recall(i) = oneVsAll(i).recall() * </code></blockquote> * * @return The macro-averaged recall. */ public double macroAvgRecall() { double sum = 0.0; for (int i = 0; i < numCategories(); ++i) sum += oneVsAll(i).recall(); return sum / (double) numCategories(); } /** * Returns the average F measure per category. This averaging * treats each category as being equal in weight. Macro-averaged * F measure is defined by: * * <blockquote><code> * macroAvgFMeasure() * <br> * = <big><big>Σ</big></big><sub><sub>i</sub></sub> * fMeasure(i) / numCategories() * <br><br> * recall(i) = oneVsAll(i).fMeasure() * </code></blockquote> * * <P>Note that this is not necessarily the same value as results * from computing the F-measure from the the macro-averaged * precision and macro-averaged recall. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -