📄 classifierevaluator.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
     * @return The average rank of the reference category in     * all classification results.     */    public double averageRankReference() {        double sum = 0.0;        int count = 0;        for (int i = 0; i < numCategories(); ++i) {            for (int rank = 0; rank < numCategories(); ++rank) {                int rankCount = mRankCounts[i][rank];                if (rankCount == 0) continue; // just efficiency                count += rankCount;                sum += rank * rankCount;            }        }        return sum / (double) count;    }    /**     * Returns the average over all test samples of the reciprocal of     * one plus the rank of the reference category in the response.     * This represents counting from one, so if the first-best answer     * is correct, the reciprocal rank is 1/1; if the second is     * correct, 1/2; if the third, 1/3; and so on.  These individual     * recirpocals are then averaged over cases.     *     * <P>Using the example classifications shown in the method     * documentation of {@link #averageRank(String,String)}:     *     * <blockquote><code>     * averageRankReference()      * <br> = (1/1 + 1/1 + 1/1 + 1/2 + 1/1 + 1/3 + 1/1)/7 ~ 0.83     * </code></blockquote>     *     * @return The mean reciprocal rank of the reference category in     * the result ranking.     */    public double meanReciprocalRank() {        double sum = 0.0;        int numCases = 0;        for (int i = 0; i < numCategories(); ++i) {            for (int rank = 0; rank < numCategories(); ++rank) {                int rankCount = mRankCounts[i][rank];                if (rankCount == 0) continue;  // just for efficiency                numCases += rankCount;                sum += ((double) rankCount) / (1.0 + (double) rank);            }        }        return sum / (double) numCases;    }    /**     * Returns the average conditional probability of the specified response     * category for test cases with the specified reference category.  If     * there are no cases matching the reference category, the result     * is <code>Double.NaN</code>.  If the conditional classifiers'     * results are properly normalized, the sum of the averages over     * all categories will be 1.0.     *     * <P>Better classifiers return high values when the reference and     * response categories are the same and lower values when they are     * different.  The log value would be extremely volatile given the     * extremely low and high conditional estimates of the language     * model classifiers.     *      *     * @param refCategory Reference category.     * @param responseCategory Response category.     * @return Average conditional probability of response category in     * cases for specified reference category.     * @throws IllegalArgumentException If the either category is unknown.     */    public double averageConditionalProbability(String refCategory,                                                String responseCategory) {        validateCategory(refCategory);        validateCategory(responseCategory);        double sum = 0.0;        int count = 0;        for (int i = 0; i < mReferenceCategories.size(); ++i) {            if (mReferenceCategories.get(i).equals(refCategory)) {                ConditionalClassification c                    = (ConditionalClassification) mClassifications.get(i);                for (int rank = 0; rank < c.size(); ++rank) {                    if (c.category(rank).equals(responseCategory)) {                        sum += c.conditionalProbability(rank);                        ++count;                        break;                    }                }            }        }        return sum / (double) count;    }    /**     * Returns the average log (base 2) joint probability of the     * response category for cases of the specified reference     * category.  If there are no cases matching the reference     * category, the result is <code>Double.NaN</code>.       *     * <P>Better classifiers return high values when the reference     * and response categories are the same and lower values     * when they are different.  Unlike the conditional probability     * values, joint probability averages are not particularly     * useful because they are not normalized by input length.  For     * the language model classifiers, the scores are normalized     * by length, and provide a better cross-case view.     *     * @param refCategory Reference category.     * @param responseCategory Response category.     * @return Average log (base 2) conditional probability of     * response category in cases for specified reference category.     * @throws IllegalArgumentException If the either category is unknown.     */    public double averageLog2JointProbability(String refCategory,                                              String responseCategory) {        validateCategory(refCategory);        validateCategory(responseCategory);        double sum = 0.0;        int count = 0;        for (int i = 0; i < mReferenceCategories.size(); ++i) {            if (mReferenceCategories.get(i).equals(refCategory)) {                JointClassification c                    = (JointClassification) mClassifications.get(i);                for (int rank = 0; rank < c.size(); ++rank) {                    if (c.category(rank).equals(responseCategory)) {                        sum += c.jointLog2Probability(rank);                        ++count;                        break;                    }                }            }        }        return sum / (double) count;    }    /**     * Returns the average over all test cases of the score of the     * response that matches the reference category.  Better     * classifiers return higher values for this average.     *     * <P>Whether average scores make sense across training instances     * depends on the classifier.     *     * @return The average score of the reference category in the     * response.     */    public double averageScoreReference() {        double sum = 0.0;        for (int i = 0; i < mReferenceCategories.size(); ++i) {            String refCategory = mReferenceCategories.get(i).toString();            ScoredClassification c                = (ScoredClassification) mClassifications.get(i);            for (int rank = 0; rank < c.size(); ++rank) {                if (c.category(rank).equals(refCategory)) {                    sum += c.score(rank);                    break;                }            }        }        return sum / (double) mReferenceCategories.size();    }    /**     * Returns the average over all test cases of the conditional     * probability of the response that matches the reference     * category.  Better classifiers return higher values for this     * average.     *     * <P>As a normalized value, the average conditional probability     * always has a sensible interpretation across training instances.     *     * @return The average conditional probability of the reference     * category in the response.     */    public double averageConditionalProbabilityReference() {        double sum = 0.0;        for (int i = 0; i < mReferenceCategories.size(); ++i) {            String refCategory = mReferenceCategories.get(i).toString();            ConditionalClassification c                = (ConditionalClassification) mClassifications.get(i);            for (int rank = 0; rank < c.size(); ++rank) {                if (c.category(rank).equals(refCategory)) {                    sum += c.conditionalProbability(rank);                    break;                }            }        }        return sum / (double) mReferenceCategories.size();    }    /**     * Returns the average over all test cases of the joint log (base     * 2) probability of the response that matches the reference     * category.  Better classifiers return higher values for this     * average.     *     * <P>Whether average scores make sense across training instances     * depends on the classifier.  For the language-model based     * classifiers, the normalized score values are more reasonable     * averages.     *     * @return The average joint log probability of the reference     * category in the response.     */    public double averageLog2JointProbabilityReference() {        double sum = 0.0;        for (int i = 0; i < mReferenceCategories.size(); ++i) {            String refCategory = mReferenceCategories.get(i).toString();            JointClassification c                = (JointClassification) mClassifications.get(i);            for (int rank = 0; rank < c.size(); ++rank) {                if (c.category(rank).equals(refCategory)) {                    sum += c.jointLog2Probability(rank);                    break;                }            }        }        return sum / (double) mReferenceCategories.size();    }    /**     * Returns the average score of the specified response category     * for test cases with the specified reference category.  If there     * are no cases matching the reference category, the result is     * <code>Double.NaN</code>.     *     * <P>Better classifiers return high values when the reference     * and response categories are the same and lower values     * when they are different.  Depending on the classifier, the     * scores may or may not be meaningful as an average.     *     * @param refCategory Reference category.     * @param responseCategory Response category.     * @return Average score of response category in test cases for     * specified reference category.     * @throws IllegalArgumentException If the either category is unknown.     */    public double averageScore(String refCategory,                               String responseCategory) {        validateCategory(refCategory);        validateCategory(responseCategory);        double sum = 0.0;        int count = 0;        for (int i = 0; i < mReferenceCategories.size(); ++i) {            if (mReferenceCategories.get(i).equals(refCategory)) {                ScoredClassification c                    = (ScoredClassification) mClassifications.get(i);                for (int rank = 0; rank < c.size(); ++rank) {                    if (c.category(rank).equals(responseCategory)) {                        sum += c.score(rank);                        ++count;                        break;                    }                }            }        }        return sum / (double) count;    }    /**     * Returns the average rank of the specified response category for     * test cases with the specified reference category.  If there are     * no cases matching the reference category, the result is     * <code>Double.NaN</code>.     *     * <P>Better classifiers return lower values when the reference     * and response categories are the same and higher values     * when they are different.       *     * <P>For example, suppose there are three categories,     * <code>a</code>, <code>b</code> and <code>c</code>.  Consider     * the following seven test cases, with the specified ranked     * results:     *     * <blockquote>     * <table border='1' cellpadding='5'>     * <tr><td><i>Test Case</i></td>     *     <td><i>Reference</i></td>     *     <td><i>Rank 0</i></td>     *     <td><i>Rank 1</i></td>     *     <td><i>Rank 2</i></td></tr>     * <tr><td>0</td><td>a</td><td>a</td><td>b</td><td>c</td></tr>     * <tr><td>1</td><td>a</td><td>a</td><td>c</td><td>b</td></tr>     * <tr><td>2</td><td>a</td><td>a</td><td>b</td><td>c</td></tr>     * <tr><td>3</td><td>a</td><td>b</td><td>a</td><td>c</td></tr>     * <tr><td>4</td><td>b</td><td>b</td><td>a</td><td>c</td></tr>     * <tr><td>5</td><td>b</td><td>a</td><td>c</td><td>b</td></tr>     * <tr><td>6</td><td>c</td><td>c</td><td>b</td><td>a</td></tr>     * </table>     * </blockquote>     *     * for which:     *     * <blockquote><code>     * averageRank(&quot;a&quot;,&quot;a&quot;) = (0 + 0 + 0 + 1)/4 = 0.25     * <br>     * averageRank(&quot;a&quot;,&quot;b&quot;) = (1 + 2 + 1 + 0)/4 = 1.00     * <br>     * averageRank(&quot;a&quot;,&quot;c&quot;) = (2 + 1 + 2 + 2)/4 = 1.75     * <br>&nbsp;<br>     * averageRank(&quot;b&quot;,&quot;a&quot;) = (1 + 0)/2 = 0.50     * <br>     * averageRank(&quot;b&quot;,&quot;b&quot;) = (0 + 2)/2 = 1.0     * <br>     * averageRank(&quot;b&quot;,&quot;c&quot;) = (2 + 1)/2 = 1.5     * <br>&nbsp;<br>     * averageRank(&quot;c&quot;,&quot;a&quot;) = (2)/1 = 2.0     * <br>     * averageRank(&quot;c&quot;,&quot;b&quot;) = (1)/1 = 1.0     * <br>     * averageRank(&quot;c&quot;,&quot;c&quot;) = (0)/1 = 0.0     * </code></blockquote>      *     * <p>If every ranked result is complete in assigning every     * category to a rank, the sum of the average ranks will be one     * less than the number of cases with the specified reference     * value.  If categories are missing from ranked results, the     * sums may possible be larger than one minus the number of test     * cases.     *     * <p>Note that the confusion matrix is computed using only the     * reference and first column of this matrix of results.     *     * @param refCategory Reference category.     * @param responseCategory Response category.     * @return Average rank of response category in test cases for     * specified reference category.     * @throws IllegalArgumentException If either category is unknown.     */    public double averageRank(String refCategory,                              String responseCategory) {        validateCategory(refCategory);        validateCategory(responseCategory);        double sum = 0.0;        int count = 0;        // iterate over all paired classifications and lists        for (int i = 0; i < mReferenceCategories.size(); ++i) {            if (mReferenceCategories.get(i).equals(refCategory)) {                RankedClassification rankedClassification                    = (RankedClassification) mClassifications.get(i);                int rank = getRank(rankedClassification,responseCategory);                sum += rank;                ++count;            }        }        return sum / (double) count;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -