📄 confusionmatrix.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
                if (prob <= 0.0) continue;                entropySum += prob * com.aliasi.util.Math.log2(prob);            }        }        return -entropySum;    }    /**     * Returns the entropy of the distribution of categories     * in the response given that the reference category was     * as specified.  The conditional entropy is defined by:     *     * <blockquote><code>     *  conditionalEntropy(i)     *  <br>     *  = - <big><big>&Sigma;</big></big><sub><sub>j</sub></sub>     *    P'(j|i) * log<sub><sub>2</sub></sub> P'(j|i)     * <br><br>     * P'(j|i) = count(j,i) / referenceCount(i)     * </code></blockquote>     *     * where     *     * <blockquote><code>     * </code></blockquote>     *     * @param refCategoryIndex Index of the reference category.     * @return Conditional entropy of the category with the specified     * index.     */    public double conditionalEntropy(int refCategoryIndex) {        double entropySum = 0.0;	long refCount = oneVsAll(refCategoryIndex).positiveReference();        for (int j = 0; j < numCategories(); ++j) {            double conditionalProb = ((double) count(refCategoryIndex,j))                / refCount;            if (conditionalProb <= 0.0) continue;            entropySum += conditionalProb                * com.aliasi.util.Math.log2(conditionalProb);        }        return -entropySum;    }    /**     * Returns the conditional entropy of the response distribution     * against the reference distribution.  The conditional entropy     * is defined to be the sum of conditional entropies per category     * weighted by the reference likelihood of the category.     *     * <blockquote><code>     * conditionalEntropy()     * <br>     * = <big><big>&Sigma;</big></big><sub><sub>i</sub></sub>     *   referenceLikelihood(i) * conditionalEntropy(i)     * <br><br>     * referenceLikelihood(i) = oneVsAll(i).referenceLikelihood()     * </code></blockquote>     *     * <P>Note that this statistic is not symmetric in that if the     * roles of reference and response are reversed, the answer may be     * different.     *     * @return The conditional entropy of the response distribution     * against the reference distribution     */    public double conditionalEntropy() {        double entropySum = 0.0;        for (int i = 0; i < numCategories(); ++i) {	    double refProbI = oneVsAll(i).referenceLikelihood();            entropySum += refProbI * conditionalEntropy(i);        }        return entropySum;    }    /**     * Returns the value of the kappa statistic with chance agreement     * determined by the reference distribution.  Kappa is defined     * in terms of total accuracy and random accuracy:     *     * <blockquote><code>     *   kappa() = (totalAccuracy() - randomAccuracy())     *             / (1 - randomAccuracy())     * </code></blockquote>     *     * The kappa statistic was introduced in:     *     * <blockquote>     * Cohen, Jacob. 1960. A coefficient of agreement for nominal scales.     * <i>Educational And Psychological Measurement</i> <b>20</b>:37-46.     * </blockquote>     *     * @return Kappa statistic for this confusion matrix.     */    public double kappa() {        return kappa(randomAccuracy());    }    /**     * Returns the value of the kappa statistic adjusted for bias.     * The unbiased kappa value is defined in terms of total accuracy     * and a slightly different computation of expected likelihood that     * averages the reference and response probabilities.  The exact     * definition is:     *     * <blockquote>     *  kappaUnbiased() = (totalAccuracy() - randomAccuracyUnbiased())     *                    / (1 - randomAccuracyUnbiased())     * </blockquote>     *     * The unbiased version of Kappa was introduced in:     *     * <blockquote>     *  Siegel, Sidney and N. John Castellan, Jr. 1988.     *  <i>Nonparametric Statistics for the Behavioral Sciences</i>.     *  McGraw Hill.     * </blockquote>     *     * @return The unbiased version of the kappa statistic.     */    public double kappaUnbiased() {        return kappa(randomAccuracyUnbiased());    }    /**     * Returns the value of the kappa statistic adjusted for     * prevalence.  The definition is:     *     * <blockquote><code>     *   kappaNoPrevalence() = 2 * totalAccuracy() - 1     * </code></blockquote>     *     * The no prevalence version of kappa was introduced in:     *     * <blockquote>     * Byrt, Ted, Janet Bishop and John B. Carlin. 1993.     * Bias, prevalence, and kappa.     * <i>Journal of Clinical Epidemiology</i> <b>46</b>(5):423-429.     * </blockquote>     *     * These authors suggest reporting the three kappa statistics     * defined in this class: kappa, kappa adjusted for prevalence,     * and kappa adjusted for bias.     *     * @return The value of kappa adjusted for prevalence.     */    public double kappaNoPrevalence() {        return 2.0 * totalAccuracy() - 1.0;    }    private double kappa(double PE) {        double PA = totalAccuracy();        return (PA - PE) / (1.0 - PE);    }    /**     * The expected accuracy from a strategy of randomly guessing     * categories according to reference and response distributions.     * This is defined by:     *     * <blockquote><code>     *  randomAccuracy()     *   = <big><big><big>&Sigma;</big></big></big><sub><sub>i</sub></sub>     *     referenceLikelihood(i) * resultLikelihood(i)     * <br><br>     * referenceLikelihood(i) = oneVsAll(i).referenceLikelihood()     * <br>     * responseLikelihood(i) = oneVsAll(i).responseLikelihood()     * </code></blockquote>     *     * @return The random accuracy for this matrix.     */    public double randomAccuracy() {        double randomAccuracy = 0.0;        for (int i = 0; i < numCategories(); ++i) {	    PrecisionRecallEvaluation eval = oneVsAll(i);            randomAccuracy 		+= eval.referenceLikelihood() * eval.responseLikelihood();        }        return randomAccuracy;    }    /**     * The expected accuracy from a strategy of randomly guessing     * categories according to the average of the reference and     * response distributions.  This is defined by:     *     * <blockquote><code>     *  randomAccuracyUnbaised()     *   = <big><big><big>&Sigma;</big></big></big><sub><sub>i</sub></sub>     *     ((referenceLikelihood(i) + resultLikelihood(i))/2)<sup>2</sup>     * <br><br>     * referenceLikelihood(i) = oneVsAll(i).referenceLikelihood()     * <br>     * responseLikelihood(i) = oneVsAll(i).responseLikelihood()     * </code></blockquote>     *     * @return The unbiased random accuracy for this matrix.     */    public double randomAccuracyUnbiased() {        double randomAccuracy = 0.0;        for (int i = 0; i < numCategories(); ++i) {	    PrecisionRecallEvaluation eval = oneVsAll(i);            double avgLikelihood                = (eval.referenceLikelihood() + eval.responseLikelihood())		/ 2.0;            randomAccuracy += avgLikelihood * avgLikelihood;        }        return randomAccuracy;    }    /**     * Return the number of degrees of freedom of this confusion     * matrix for the &chi;<sup>2</sup> statistic.  In general, for an     * <code>n&times;m</code> matrix, the number of degrees of     * freedom is equal to <code>(n-1)*(m-1)</code>.  Because this     * is a symmetric matrix of dimensions equal to the number of     * categories, the result is defined to be:     *     * <blockquote><code>     *   chiSquaredDegreesOfFreedom()     *   = (numCategories() - 1)<sup>2</sup>     * </code></blockquote>     *     * @return The number of degrees of freedom for this confusion     * matrix.     */    public int chiSquaredDegreesOfFreedom() {        int sqrt = numCategories() - 1;        return sqrt * sqrt;    }    /**     * Returns Pearson's C<sub><sub>2</sub></sub> independence test     * statistic for this matrix.  The value is asymptotically     * &chi;<sup>2</sup> distributed with a number of degrees of     * freedom as specified by {@link #chiSquaredDegreesOfFreedom()}.     *      * <P>See {@link Statistics#chiSquaredIndependence(double[][])}     * for definitions of the statistic over matrices.     *     * @return The &chi;<sup>2</sup> statistic for this matrix.     */    public double chiSquared() {	int numCategories = numCategories();	double[][] contingencyMatrix	    = new double[numCategories][numCategories];	for (int i = 0; i < numCategories; ++i)	    for (int j = 0; j < numCategories; ++j)		contingencyMatrix[i][j] = count(i,j);	return Statistics.chiSquaredIndependence(contingencyMatrix);    }    /**     * Returns the value of Pearson's &phi;<sup>2</sup> index of mean     * square contingency for this matrix.  The value of     * &phi;<sup>2</sup> may be defined in terms of &chi;<sup>2</sup>     * by:     *     * <blockquote><code>     * phiSquared() = chiSquared() / totalCount()     * </code></blockquote>     *     * <P>As with our other statistics, this is the <i>sample</i>     * value; the true contingency by the true random variables     * defining the reference and response.     *     * @return The &phi;<sup>2</sup> statistic for this matrix.     */    public double phiSquared() {        return chiSquared() / (double) totalCount();    }    /**     * Returns the value of Cram&#233;r's V statistic for this matrix.     * The square of Cram&#233;r's statistic may be defined in terms     * of the &phi;<sup>2</sup> statistic by:     *     * <blockquote><code>     *  cramersV() = (phiSquared() / (numCategories()-1))<sup><sup>(1/2)</sup></sup>     * </code></blockquote>     *     * @return The value of Cram&#233;r's V statistic for this matrix.     */    public double cramersV() {        double LMinusOne = numCategories() - 1;        return java.lang.Math.sqrt(phiSquared() / LMinusOne);    }    /**     * Returns the one-versus-all precision-recall evaluation for the     * specified category index.  See the class definition above for     * examples.     *     * @param categoryIndex Index of category.     * @return The precision-recall evaluation for the category.     */    public PrecisionRecallEvaluation oneVsAll(int categoryIndex) {	PrecisionRecallEvaluation eval = new PrecisionRecallEvaluation();	for (int i = 0; i < numCategories(); ++i)	    for (int j = 0; j < numCategories(); ++j)		eval.addCase(i==categoryIndex,j==categoryIndex,mMatrix[i][j]);	return eval;    }    /**     * Returns the micro-averaged precision-recall evaluation.  This     * is just the sum of the precision-recall evaluatiosn provided     * by {@link #oneVsAll(int)} over all category indices.  See the     * class definition above for an example.     *     * @return The micro-averaged precision-recall evaluation.     */    public PrecisionRecallEvaluation microAverage() {	long tp = 0;	long fp = 0;	long fn = 0;	long tn = 0;	for (int i = 0; i < numCategories(); ++i) {	    PrecisionRecallEvaluation eval = oneVsAll(i);	    tp += eval.truePositive();	    fp += eval.falsePositive();	    tn += eval.trueNegative();	    fn += eval.falseNegative();	}	return new PrecisionRecallEvaluation(tp,fn,fp,tn);    }    /**     * Returns the average precision per category.  This     * averaging treats each category of being equal in     * weight.  Macro-averaged precision is defined by:     *     * <blockquote><code>     * macroAvgPrecision()     * <br>     * = <big><big>&Sigma;</big></big><sub><sub>i</sub></sub>     *   precision(i) / numCategories()     * <br><br>     * precision(i) = oneVsAll(i).precision()     * </code></blockquote>     *     * @return The macro-averaged precision.     */    public double macroAvgPrecision() {        double sum = 0.0;        for (int i = 0; i < numCategories(); ++i)            sum += oneVsAll(i).precision();        return sum / (double) numCategories();    }    /**     * Returns the average precision per category.  This averaging     * treats each category as being equal in weight. Macro-averaged     * recall is defined by:     *     * <blockquote><code>     * macroAvgRecall()     * <br>     * = <big><big>&Sigma;</big></big><sub><sub>i</sub></sub>     *   recall(i) / numCategories()     * <br><br>     * recall(i) = oneVsAll(i).recall()     * </code></blockquote>     *     * @return The macro-averaged recall.     */    public double macroAvgRecall() {        double sum = 0.0;        for (int i = 0; i < numCategories(); ++i)            sum += oneVsAll(i).recall();        return sum / (double) numCategories();    }    /**     * Returns the average F measure per category.  This averaging     * treats each category as being equal in weight. Macro-averaged     * F measure is defined by:     *     * <blockquote><code>     * macroAvgFMeasure()     * <br>     * = <big><big>&Sigma;</big></big><sub><sub>i</sub></sub>     *   fMeasure(i) / numCategories()     * <br><br>     * recall(i) = oneVsAll(i).fMeasure()     * </code></blockquote>     *     * <P>Note that this is not necessarily the same value as results     * from computing the F-measure from the the macro-averaged     * precision and macro-averaged recall.     *
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -