📄 scoredprecisionrecallevaluation.java
字号:
* #prCurve(boolean)} provides the rejection recall rates * at each threshold. The resulting ROC curves for that example * are: * * As with the recall-precision curve, the parameter determines * whether or not to "interpolate" the rejection recall * values. This is carried out as with the recall-precision curve * by only returning values which would not be interpolated. In * general, without interpolation, the same rows of the table are * used as for the recall-precision curve, namely those at the end * of a run of true positives. Interpolation may result in a * different set of recall points in the pruned answer set, as in * the example above. * * <P>Like the recall-precision curve method, this method does not * insert artificial end ponits of (0,1) and (1,0) into the graph. * As with the recall-precision curve, the final entry will have * recall equal to one. * * <P>Neither interpolated nor uninterpolated return values are * guaranteed to be convex. Convex closure will skew results * upward in an even more unrealistic direction, especially if the * artificial completion point (0,1) is included. * * @param interpolate If <code>true</code>, any point with both * precision and recall lower than another point is eliminated * from the returned precision-recall curve. * @return The receiver operating characteristic curve for the * specified category. */ public double[][] rocCurve(boolean interpolate) { PrecisionRecallEvaluation eval = new PrecisionRecallEvaluation(); ArrayList prList = new ArrayList(); Iterator it = mCases.iterator(); for (int i = 0; it.hasNext(); ++i) { Case cse = (Case) it.next(); boolean correct = cse.mCorrect; eval.addCase(correct,true); if (correct) { double r = div(eval.truePositive(), mPositiveRef); double rr = 1.0 - div(eval.falsePositive(), mNegativeRef); prList.add(new double[] { r, rr }); } } return interpolate(prList,interpolate); } /** * Returns the maximum F<sub><sub>1</sub></sub>-measure for an * actual operating point on the uninterpolated precision-recall * curve. This maximization is based on a post-hoc optimal * acceptance threshold. This is derived from the pair on the * recall-precision curve yielding the highest value of the F * measure, <code>2*recall*precision/(recall+precision)</code>. * * <P>For the example in {@link #prCurve(boolean)}: * * <blockquote><code> * maximumFMeasure("foo") = 0.67 * </code></blockquote> * * corresponding to recall=0.75 and precision=0.60. * * @return Maximum f-measure for the specified category. */ public double maximumFMeasure() { return maximumFMeasure(1.0); } /** * Returns the maximum F<sub><sub>β</sub></sub>-measure for * an actual operating point on the uninterpolated * precision-recall curve for a specified β. This * maximization is based on a post-hoc optimal acceptance * threshold. This is derived from the pair on the * recall-precision curve yielding the highest value of the F * measure, <code>2*recall*precision/(recall+precision)</code>. * * <P>For the example in {@link #prCurve(boolean)}: * * <blockquote><code> * maximumFMeasure("foo") = 0.67 * </code></blockquote> * * corresponding to recall=0.75 and precision=0.60. * * @return Maximum f-measure for the specified category. */ public double maximumFMeasure(double beta) { double maxF = 0.0; double[][] pr = prCurve(false); for (int i = 0; i < pr.length; ++i) { double f = PrecisionRecallEvaluation.fMeasure(beta,pr[i][0],pr[i][1]); maxF = Math.max(maxF,f); } return maxF; } /** * Returns the breakeven point (BEP) for precision and recall * based on the interpolated precision. This is the point where * the interpolated precision-recall curve has recall equal to * precision. Because the interpolation is a step fucntion, the * result is different than if two points were linearly * interpolated. * * <P>For the example illustrated in {@link #prCurve(boolean)}, * the breakeven point is 0.60. This is because the interpolated * precision recall curve is flat from the implicit initial point * <code>(0.00,0.60)</code> to <code>(0.75,0.60)</code> and thus * the line between them has a breakeven point of <code>x = y = * 0.6</code>. * * <P>As an interpolation (equal precision and recall) of a * rounded up estimate (interpolated recall-precision curve), the * breakeven point is not necessarily an achievable operating * point. Note that the recall-precision breakeven point will * always be smaller than the maximum F measure, which does * correspond to an observed operating point, because the * breakeven point always involves lowering the recall of the * first point on the curve with recall greater than precision to * match the precision. * * <P>This method will return <code>0.0</code> if the * precision-recall curve never crosses the diagonal. * * @return The interpolated recall-precision breakeven point. */ public double prBreakevenPoint() { double[][] prCurve = prCurve(true); for (int i = 0; i < prCurve.length; ++i) if (prCurve[i][0] > prCurve[i][1]) return prCurve[i][1]; return 0.0; } /** * Returns point-wise average precision of points on the * uninterpolated precision-recall curve. See {@link * #prCurve(boolean)} for a definition of the values on the curve. * * <P>This method implements the standard information retrieval * definition, which only averages precision measurements from * correct responses. * * <P>For the example provided in {@link #prCurve(boolean)}, the * average precision is the average of precision values for the * correct responses (highlighted lines): * * * <P>Although the reasoning is different, the average precision * returned is the same as the area under the uninterpolated * recall-precision graph. * * @return Pointwise average precision. */ public double averagePrecision() { double[][] prCurve = prCurve(false); double sum = 0.0; for (int i = 0; i < prCurve.length; ++i) sum += prCurve[i][1]; return sum/((double)prCurve.length); } /** * Returns the precision score achieved by returning the top * scoring documents up to the specified rank. The * precision-recall curve is not interpolated for this * computation. If there are not enough documents, the result * <code>Double.NaN</code> is returned. * * @return The precision at the specified rank. */ public double precisionAt(int rank) { if (mCases.size() < rank) return Double.NaN; int correctCount = 0; Iterator it = mCases.iterator(); for (int i = 0; i < rank; ++i) if (((Case)it.next()).mCorrect) ++correctCount; return ((double) correctCount) / (double) rank; } /** * Returns the reciprocal rank (RR) for this evaluation. The * reciprocal rank is defined to be the reciprocal of the rank at * which the first correct result is retrieved (counting from 1). * The return result will be between 1.0 for the first-best result * being correct and 0.0, for none of the results being correct. * * <P>Typically, the mean of the reciprocal ranks for a number * of evaluations is reported. * * @return The reciprocal rank. */ public double reciprocalRank() { Iterator it = mCases.iterator(); for (int i = 0; it.hasNext(); ++i) { Case cse = (Case) it.next(); boolean correct = cse.mCorrect; if (correct) return 1.0 / (double) (i + 1); } return 0.0; } /** * Returns the area under the recall-precision curve with * interpolation as specified. The recall-precision curve is * taken to be a step function for the purposes of this * calculation, and thus whether precision is interpolated, that * is, whether dominated entries are pruned, will affect the area. * * <P>For the example detailed in {@link * #prCurve(boolean)}, the areas without and with * interpolation are: * * * Interpolation will always result in an equal or greater area. * * <P>Note that the uninterpolated area under the recall-precision * curve is the same as the average precision value. * * @param interpolate Set to <code>true</code> to interpolate * the precision values. * @return The area under the specified precision-recall curve. */ public double areaUnderPrCurve(boolean interpolate) { return areaUnder(prCurve(interpolate)); } /** * Returns the area under the receiver operating characteristic * (ROC) curve. The ROC curve is taken to be a step function for * the purposes of this calculation, and thus whether rejection * recall is interpolated, that is, whether dominated entries are, * will affect the area. * * Interpolation will always result in an equal or greater area. * * @param interpolate Set to <code>true</code> to interpolate * the rejection recall values. * @return The area under the ROC curve. */ public double areaUnderRocCurve(boolean interpolate) { return areaUnder(rocCurve(interpolate)); } /** * Returns a string-based representation of this scored precision * recall evaluation. */ public String toString() { StringBuffer sb = new StringBuffer(); sb.append(" Area Under PR Curve (interpolated)=" + areaUnderPrCurve(true)); sb.append("\n Area Under PR Curve (uninterpolated)=" + areaUnderPrCurve(false)); sb.append("\n Area Under ROC Curve (interpolated)=" + areaUnderRocCurve(true)); sb.append("\n Area Under ROC Curve (uninterpolated)=" + areaUnderRocCurve(false)); sb.append("\n Average Precision=" + averagePrecision()); sb.append("\n Maximum F(1) Measure=" + maximumFMeasure()); sb.append("\n BEP (Precision-Recall break even point)=" + prBreakevenPoint()); sb.append("\n Reciprocal Rank=" + reciprocalRank()); int[] ranks = new int[] { 5, 10, 25, 100, 500 }; for (int i = 0; i < ranks.length && mCases.size() < ranks[i]; ++i) sb.append("\n Precision at " + ranks[i] + "=" + precisionAt(ranks[i])); return sb.toString(); } static double div(double x, double y) { return x/y; } private static double[][] interpolate(ArrayList prList, boolean interpolate) { if (!interpolate) { double[][] rps = new double[prList.size()][]; prList.toArray(rps); return rps; } Collections.reverse(prList); LinkedList resultList = new LinkedList(); Iterator it = prList.iterator(); double maxP = Double.NEGATIVE_INFINITY; while (it.hasNext()) { double[] rp = (double[]) it.next(); double p = rp[1]; if (maxP < p) { maxP = p; resultList.addFirst(rp); } } double[][] rps = new double[resultList.size()][]; resultList.toArray(rps); return rps; } private static double areaUnder(double[][] zeroOneStepFunction) { double area = 0.0; double lastX = 0.0; for (int i = 0; i < zeroOneStepFunction.length; ++i) { double x = zeroOneStepFunction[i][0]; double height = zeroOneStepFunction[i][1]; double width = x - lastX; area += width * height; // step function lastX = x; } return area; } static class Case implements Scored { private final boolean mCorrect; private final double mScore; Case(boolean correct, double score) { mCorrect = correct; mScore = score; } public double score() { return mScore; } public String toString() { return mCorrect + " : " + mScore; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -