📄 scoredprecisionrecallevaluation.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
     * #prCurve(boolean)} provides the rejection recall rates     * at each threshold. The resulting ROC curves for that example     * are:     *     * As with the recall-precision curve, the parameter determines     * whether or not to &quot;interpolate&quot; the rejection recall     * values.  This is carried out as with the recall-precision curve     * by only returning values which would not be interpolated.  In     * general, without interpolation, the same rows of the table are     * used as for the recall-precision curve, namely those at the end     * of a run of true positives.  Interpolation may result in a     * different set of recall points in the pruned answer set, as in     * the example above.     *     * <P>Like the recall-precision curve method, this method does not     * insert artificial end ponits of (0,1) and (1,0) into the graph.     * As with the recall-precision curve, the final entry will have     * recall equal to one.       *     * <P>Neither interpolated nor uninterpolated return values are     * guaranteed to be convex.  Convex closure will skew results     * upward in an even more unrealistic direction, especially if the     * artificial completion point (0,1) is included.     *      * @param interpolate If <code>true</code>, any point with both     * precision and recall lower than another point is eliminated     * from the returned precision-recall curve.     * @return The receiver operating characteristic curve for the     * specified category.     */    public double[][] rocCurve(boolean interpolate) {        PrecisionRecallEvaluation eval = new PrecisionRecallEvaluation();        ArrayList prList = new ArrayList();        Iterator it = mCases.iterator();        for (int i = 0; it.hasNext(); ++i) {            Case cse = (Case) it.next();            boolean correct = cse.mCorrect;            eval.addCase(correct,true);            if (correct) {                double r = div(eval.truePositive(), mPositiveRef);                double rr                     = 1.0 - div(eval.falsePositive(), mNegativeRef);                prList.add(new double[] { r, rr });            }        }        return interpolate(prList,interpolate);    }    /**     * Returns the maximum F<sub><sub>1</sub></sub>-measure for an     * actual operating point on the uninterpolated precision-recall     * curve.  This maximization is based on a post-hoc optimal     * acceptance threshold.  This is derived from the pair on the     * recall-precision curve yielding the highest value of the F     * measure, <code>2*recall*precision/(recall+precision)</code>.     *     * <P>For the example in {@link #prCurve(boolean)}:     *     * <blockquote><code>     * maximumFMeasure(&quot;foo&quot;) = 0.67     * </code></blockquote>     *     * corresponding to recall=0.75 and precision=0.60.     *     * @return Maximum f-measure for the specified category.     */    public double maximumFMeasure() {        return maximumFMeasure(1.0);    }    /**     * Returns the maximum F<sub><sub>&beta;</sub></sub>-measure for     * an actual operating point on the uninterpolated     * precision-recall curve for a specified &beta;.  This     * maximization is based on a post-hoc optimal acceptance     * threshold.  This is derived from the pair on the     * recall-precision curve yielding the highest value of the F     * measure, <code>2*recall*precision/(recall+precision)</code>.     *     * <P>For the example in {@link #prCurve(boolean)}:     *     * <blockquote><code>     * maximumFMeasure(&quot;foo&quot;) = 0.67     * </code></blockquote>     *     * corresponding to recall=0.75 and precision=0.60.     *     * @return Maximum f-measure for the specified category.     */    public double maximumFMeasure(double beta) {        double maxF = 0.0;        double[][] pr = prCurve(false);        for (int i = 0; i < pr.length; ++i) {            double f = PrecisionRecallEvaluation.fMeasure(beta,pr[i][0],pr[i][1]);            maxF = Math.max(maxF,f);        }        return maxF;    }    /**     * Returns the breakeven point (BEP) for precision and recall     * based on the interpolated precision.  This is the point where     * the interpolated precision-recall curve has recall equal to     * precision.  Because the interpolation is a step fucntion, the     * result is different than if two points were linearly     * interpolated.     *     * <P>For the example illustrated in {@link #prCurve(boolean)},     * the breakeven point is 0.60.  This is because the interpolated     * precision recall curve is flat from the implicit initial point     * <code>(0.00,0.60)</code> to <code>(0.75,0.60)</code> and thus     * the line between them has a breakeven point of <code>x = y =     * 0.6</code>.     *     * <P>As an interpolation (equal precision and recall) of a     * rounded up estimate (interpolated recall-precision curve), the     * breakeven point is not necessarily an achievable operating     * point.  Note that the recall-precision breakeven point will     * always be smaller than the maximum F measure, which does     * correspond to an observed operating point, because the     * breakeven point always involves lowering the recall of the     * first point on the curve with recall greater than precision to     * match the precision.     *     * <P>This method will return <code>0.0</code> if the     * precision-recall curve never crosses the diagonal.     *     * @return The interpolated recall-precision breakeven point.     */    public double prBreakevenPoint() {        double[][] prCurve = prCurve(true);        for (int i = 0; i < prCurve.length; ++i)            if (prCurve[i][0] > prCurve[i][1])                return prCurve[i][1];        return 0.0;    }    /**     * Returns point-wise average precision of points on the     * uninterpolated precision-recall curve.  See {@link     * #prCurve(boolean)} for a definition of the values on the curve.     *      * <P>This method implements the standard information retrieval     * definition, which only averages precision measurements from     * correct responses.     *     * <P>For the example provided in {@link #prCurve(boolean)}, the     * average precision is the average of precision values for the     * correct responses (highlighted lines):     *      *     * <P>Although the reasoning is different, the average precision     * returned is the same as the area under the uninterpolated     * recall-precision graph.     *     * @return Pointwise average precision.     */    public double averagePrecision() {        double[][] prCurve = prCurve(false);        double sum = 0.0;        for (int i = 0; i < prCurve.length; ++i)            sum += prCurve[i][1];         return sum/((double)prCurve.length);    }    /**     * Returns the precision score achieved by returning the top     * scoring documents up to the specified rank.  The     * precision-recall curve is not interpolated for this     * computation.  If there are not enough documents, the result     * <code>Double.NaN</code> is returned.     *      * @return The precision at the specified rank.     */    public double precisionAt(int rank) {        if (mCases.size() < rank) return Double.NaN;        int correctCount = 0;        Iterator it = mCases.iterator();        for (int i = 0; i < rank; ++i)            if (((Case)it.next()).mCorrect)                ++correctCount;        return ((double) correctCount) / (double) rank;    }    /**     * Returns the reciprocal rank (RR) for this evaluation.  The     * reciprocal rank is defined to be the reciprocal of the rank at     * which the first correct result is retrieved (counting from 1).     * The return result will be between 1.0 for the first-best result     * being correct and 0.0, for none of the results being correct.     *     * <P>Typically, the mean of the reciprocal ranks for a number     * of evaluations is reported.     *     * @return The reciprocal rank.     */    public double reciprocalRank() {        Iterator it = mCases.iterator();        for (int i = 0; it.hasNext(); ++i) {            Case cse = (Case) it.next();            boolean correct = cse.mCorrect;            if (correct)                return 1.0 / (double) (i + 1);        }        return 0.0;    }    /**     * Returns the area under the recall-precision curve with     * interpolation as specified.  The recall-precision curve is     * taken to be a step function for the purposes of this     * calculation, and thus whether precision is interpolated, that     * is, whether dominated entries are pruned, will affect the area.     *     * <P>For the example detailed in {@link     * #prCurve(boolean)}, the areas without and with     * interpolation are:     *     *     * Interpolation will always result in an equal or greater area.     *     * <P>Note that the uninterpolated area under the recall-precision     * curve is the same as the average precision value.     *     * @param interpolate Set to <code>true</code> to interpolate     * the precision values.     * @return The area under the specified precision-recall curve.     */    public double areaUnderPrCurve(boolean interpolate) {        return areaUnder(prCurve(interpolate));    }    /**     * Returns the area under the receiver operating characteristic     * (ROC) curve.  The ROC curve is taken to be a step function for     * the purposes of this calculation, and thus whether rejection     * recall is interpolated, that is, whether dominated entries are,     * will affect the area.     *     * Interpolation will always result in an equal or greater area.     *     * @param interpolate Set to <code>true</code> to interpolate     * the rejection recall values.     * @return The area under the ROC curve.     */    public double areaUnderRocCurve(boolean interpolate) {        return areaUnder(rocCurve(interpolate));    }    /**     * Returns a string-based representation of this scored precision     * recall evaluation.     */    public String toString() {        StringBuffer sb = new StringBuffer();        sb.append("  Area Under PR Curve (interpolated)="                   + areaUnderPrCurve(true));        sb.append("\n  Area Under PR Curve (uninterpolated)="                   + areaUnderPrCurve(false));        sb.append("\n  Area Under ROC Curve (interpolated)="                   + areaUnderRocCurve(true));        sb.append("\n  Area Under ROC Curve (uninterpolated)="                   + areaUnderRocCurve(false));        sb.append("\n  Average Precision=" + averagePrecision());        sb.append("\n  Maximum F(1) Measure=" + maximumFMeasure());        sb.append("\n  BEP (Precision-Recall break even point)="                   + prBreakevenPoint());        sb.append("\n  Reciprocal Rank=" + reciprocalRank());        int[] ranks = new int[] { 5, 10, 25, 100, 500 };        for (int i = 0; i < ranks.length && mCases.size() < ranks[i]; ++i)            sb.append("\n  Precision at " + ranks[i]                      + "=" + precisionAt(ranks[i]));        return sb.toString();    }    static double div(double x, double y) {        return x/y;    }    private static double[][] interpolate(ArrayList prList,                                           boolean interpolate) {        if (!interpolate) {            double[][] rps = new double[prList.size()][];            prList.toArray(rps);            return rps;        }        Collections.reverse(prList);        LinkedList resultList = new LinkedList();        Iterator it = prList.iterator();        double maxP = Double.NEGATIVE_INFINITY;        while (it.hasNext()) {            double[] rp = (double[]) it.next();            double p = rp[1];            if (maxP < p) {                maxP = p;                resultList.addFirst(rp);            }        }        double[][] rps = new double[resultList.size()][];        resultList.toArray(rps);        return rps;    }    private static double areaUnder(double[][] zeroOneStepFunction) {        double area = 0.0;        double lastX = 0.0;        for (int i = 0; i < zeroOneStepFunction.length; ++i) {            double x = zeroOneStepFunction[i][0];            double height = zeroOneStepFunction[i][1];            double width = x - lastX;             area += width * height; // step function            lastX = x;        }        return area;    }    static class Case implements Scored {        private final boolean mCorrect;        private final double mScore;        Case(boolean correct, double score) {            mCorrect = correct;            mScore = score;        }        public double score() {            return mScore;        }        public String toString() {            return mCorrect + " : " + mScore;        }    }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -