📄 classifierevaluator.java
字号:
* @return The average rank of the reference category in * all classification results. */ public double averageRankReference() { double sum = 0.0; int count = 0; for (int i = 0; i < numCategories(); ++i) { for (int rank = 0; rank < numCategories(); ++rank) { int rankCount = mRankCounts[i][rank]; if (rankCount == 0) continue; // just efficiency count += rankCount; sum += rank * rankCount; } } return sum / (double) count; } /** * Returns the average over all test samples of the reciprocal of * one plus the rank of the reference category in the response. * This represents counting from one, so if the first-best answer * is correct, the reciprocal rank is 1/1; if the second is * correct, 1/2; if the third, 1/3; and so on. These individual * recirpocals are then averaged over cases. * * <P>Using the example classifications shown in the method * documentation of {@link #averageRank(String,String)}: * * <blockquote><code> * averageRankReference() * <br> = (1/1 + 1/1 + 1/1 + 1/2 + 1/1 + 1/3 + 1/1)/7 ~ 0.83 * </code></blockquote> * * @return The mean reciprocal rank of the reference category in * the result ranking. */ public double meanReciprocalRank() { double sum = 0.0; int numCases = 0; for (int i = 0; i < numCategories(); ++i) { for (int rank = 0; rank < numCategories(); ++rank) { int rankCount = mRankCounts[i][rank]; if (rankCount == 0) continue; // just for efficiency numCases += rankCount; sum += ((double) rankCount) / (1.0 + (double) rank); } } return sum / (double) numCases; } /** * Returns the average conditional probability of the specified response * category for test cases with the specified reference category. If * there are no cases matching the reference category, the result * is <code>Double.NaN</code>. If the conditional classifiers' * results are properly normalized, the sum of the averages over * all categories will be 1.0. * * <P>Better classifiers return high values when the reference and * response categories are the same and lower values when they are * different. The log value would be extremely volatile given the * extremely low and high conditional estimates of the language * model classifiers. * * * @param refCategory Reference category. * @param responseCategory Response category. * @return Average conditional probability of response category in * cases for specified reference category. * @throws IllegalArgumentException If the either category is unknown. */ public double averageConditionalProbability(String refCategory, String responseCategory) { validateCategory(refCategory); validateCategory(responseCategory); double sum = 0.0; int count = 0; for (int i = 0; i < mReferenceCategories.size(); ++i) { if (mReferenceCategories.get(i).equals(refCategory)) { ConditionalClassification c = (ConditionalClassification) mClassifications.get(i); for (int rank = 0; rank < c.size(); ++rank) { if (c.category(rank).equals(responseCategory)) { sum += c.conditionalProbability(rank); ++count; break; } } } } return sum / (double) count; } /** * Returns the average log (base 2) joint probability of the * response category for cases of the specified reference * category. If there are no cases matching the reference * category, the result is <code>Double.NaN</code>. * * <P>Better classifiers return high values when the reference * and response categories are the same and lower values * when they are different. Unlike the conditional probability * values, joint probability averages are not particularly * useful because they are not normalized by input length. For * the language model classifiers, the scores are normalized * by length, and provide a better cross-case view. * * @param refCategory Reference category. * @param responseCategory Response category. * @return Average log (base 2) conditional probability of * response category in cases for specified reference category. * @throws IllegalArgumentException If the either category is unknown. */ public double averageLog2JointProbability(String refCategory, String responseCategory) { validateCategory(refCategory); validateCategory(responseCategory); double sum = 0.0; int count = 0; for (int i = 0; i < mReferenceCategories.size(); ++i) { if (mReferenceCategories.get(i).equals(refCategory)) { JointClassification c = (JointClassification) mClassifications.get(i); for (int rank = 0; rank < c.size(); ++rank) { if (c.category(rank).equals(responseCategory)) { sum += c.jointLog2Probability(rank); ++count; break; } } } } return sum / (double) count; } /** * Returns the average over all test cases of the score of the * response that matches the reference category. Better * classifiers return higher values for this average. * * <P>Whether average scores make sense across training instances * depends on the classifier. * * @return The average score of the reference category in the * response. */ public double averageScoreReference() { double sum = 0.0; for (int i = 0; i < mReferenceCategories.size(); ++i) { String refCategory = mReferenceCategories.get(i).toString(); ScoredClassification c = (ScoredClassification) mClassifications.get(i); for (int rank = 0; rank < c.size(); ++rank) { if (c.category(rank).equals(refCategory)) { sum += c.score(rank); break; } } } return sum / (double) mReferenceCategories.size(); } /** * Returns the average over all test cases of the conditional * probability of the response that matches the reference * category. Better classifiers return higher values for this * average. * * <P>As a normalized value, the average conditional probability * always has a sensible interpretation across training instances. * * @return The average conditional probability of the reference * category in the response. */ public double averageConditionalProbabilityReference() { double sum = 0.0; for (int i = 0; i < mReferenceCategories.size(); ++i) { String refCategory = mReferenceCategories.get(i).toString(); ConditionalClassification c = (ConditionalClassification) mClassifications.get(i); for (int rank = 0; rank < c.size(); ++rank) { if (c.category(rank).equals(refCategory)) { sum += c.conditionalProbability(rank); break; } } } return sum / (double) mReferenceCategories.size(); } /** * Returns the average over all test cases of the joint log (base * 2) probability of the response that matches the reference * category. Better classifiers return higher values for this * average. * * <P>Whether average scores make sense across training instances * depends on the classifier. For the language-model based * classifiers, the normalized score values are more reasonable * averages. * * @return The average joint log probability of the reference * category in the response. */ public double averageLog2JointProbabilityReference() { double sum = 0.0; for (int i = 0; i < mReferenceCategories.size(); ++i) { String refCategory = mReferenceCategories.get(i).toString(); JointClassification c = (JointClassification) mClassifications.get(i); for (int rank = 0; rank < c.size(); ++rank) { if (c.category(rank).equals(refCategory)) { sum += c.jointLog2Probability(rank); break; } } } return sum / (double) mReferenceCategories.size(); } /** * Returns the average score of the specified response category * for test cases with the specified reference category. If there * are no cases matching the reference category, the result is * <code>Double.NaN</code>. * * <P>Better classifiers return high values when the reference * and response categories are the same and lower values * when they are different. Depending on the classifier, the * scores may or may not be meaningful as an average. * * @param refCategory Reference category. * @param responseCategory Response category. * @return Average score of response category in test cases for * specified reference category. * @throws IllegalArgumentException If the either category is unknown. */ public double averageScore(String refCategory, String responseCategory) { validateCategory(refCategory); validateCategory(responseCategory); double sum = 0.0; int count = 0; for (int i = 0; i < mReferenceCategories.size(); ++i) { if (mReferenceCategories.get(i).equals(refCategory)) { ScoredClassification c = (ScoredClassification) mClassifications.get(i); for (int rank = 0; rank < c.size(); ++rank) { if (c.category(rank).equals(responseCategory)) { sum += c.score(rank); ++count; break; } } } } return sum / (double) count; } /** * Returns the average rank of the specified response category for * test cases with the specified reference category. If there are * no cases matching the reference category, the result is * <code>Double.NaN</code>. * * <P>Better classifiers return lower values when the reference * and response categories are the same and higher values * when they are different. * * <P>For example, suppose there are three categories, * <code>a</code>, <code>b</code> and <code>c</code>. Consider * the following seven test cases, with the specified ranked * results: * * <blockquote> * <table border='1' cellpadding='5'> * <tr><td><i>Test Case</i></td> * <td><i>Reference</i></td> * <td><i>Rank 0</i></td> * <td><i>Rank 1</i></td> * <td><i>Rank 2</i></td></tr> * <tr><td>0</td><td>a</td><td>a</td><td>b</td><td>c</td></tr> * <tr><td>1</td><td>a</td><td>a</td><td>c</td><td>b</td></tr> * <tr><td>2</td><td>a</td><td>a</td><td>b</td><td>c</td></tr> * <tr><td>3</td><td>a</td><td>b</td><td>a</td><td>c</td></tr> * <tr><td>4</td><td>b</td><td>b</td><td>a</td><td>c</td></tr> * <tr><td>5</td><td>b</td><td>a</td><td>c</td><td>b</td></tr> * <tr><td>6</td><td>c</td><td>c</td><td>b</td><td>a</td></tr> * </table> * </blockquote> * * for which: * * <blockquote><code> * averageRank("a","a") = (0 + 0 + 0 + 1)/4 = 0.25 * <br> * averageRank("a","b") = (1 + 2 + 1 + 0)/4 = 1.00 * <br> * averageRank("a","c") = (2 + 1 + 2 + 2)/4 = 1.75 * <br> <br> * averageRank("b","a") = (1 + 0)/2 = 0.50 * <br> * averageRank("b","b") = (0 + 2)/2 = 1.0 * <br> * averageRank("b","c") = (2 + 1)/2 = 1.5 * <br> <br> * averageRank("c","a") = (2)/1 = 2.0 * <br> * averageRank("c","b") = (1)/1 = 1.0 * <br> * averageRank("c","c") = (0)/1 = 0.0 * </code></blockquote> * * <p>If every ranked result is complete in assigning every * category to a rank, the sum of the average ranks will be one * less than the number of cases with the specified reference * value. If categories are missing from ranked results, the * sums may possible be larger than one minus the number of test * cases. * * <p>Note that the confusion matrix is computed using only the * reference and first column of this matrix of results. * * @param refCategory Reference category. * @param responseCategory Response category. * @return Average rank of response category in test cases for * specified reference category. * @throws IllegalArgumentException If either category is unknown. */ public double averageRank(String refCategory, String responseCategory) { validateCategory(refCategory); validateCategory(responseCategory); double sum = 0.0; int count = 0; // iterate over all paired classifications and lists for (int i = 0; i < mReferenceCategories.size(); ++i) { if (mReferenceCategories.get(i).equals(refCategory)) { RankedClassification rankedClassification = (RankedClassification) mClassifications.get(i); int rank = getRank(rankedClassification,responseCategory); sum += rank; ++count; } } return sum / (double) count;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -