📄 hmmevaluation.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
    }    /**     * Adds a first-best response case with the specified tokens,     * reference tags, and first-best response tags.  Note that     * this only adds information to the first-best evaluation,     * not the n-best or lattice-based evaluations.     *     * @param tokens The tokens for the evaluation.     * @param referenceTags The reference tagging.     * @param responseTags The response tagging.     * @throws IllegalArgumentException If the token, reference tag     * and response tag arrays are not all the same length.     */    public void addFirstBestCase(String[] tokens,                                 String[] referenceTags,                                  String[] responseTags) {        verifyEqualLengths("tokens","referenceTags",tokens,referenceTags);        verifyEqualLengths("tokens","responseTags",tokens,responseTags);        mNumTokens += tokens.length;        ++mNumCases;        if (com.aliasi.util.Arrays.equals(referenceTags,responseTags))            ++mNumCasesCorrect;        for (int i = 0; i < tokens.length; ++i) {            Classification result = new Classification(responseTags[i]);            mFirstBestEvaluation.addClassification(referenceTags[i],result);        }        for (int i = 0; i < tokens.length; ++i) {            if (knownTokenSet().contains(tokens[i])) continue;            ++mNumUnknownTokens;            if (referenceTags[i].equals(responseTags[i]))                ++mNumUnknownTokensCorrect;        }    }    /**     * Returns the accuracy measured over entire cases.  This is the     * number of evaluation cases that are completely correct divided     * by the number of cases evaluated.  This number makes sense in     * cases where the cases correspond to meaningful units such as     * sentences.     *      * @return The first-best complete case tagging accuracy.     */    public double caseAccuracy() {        return ((double) mNumCasesCorrect)            / (double) mNumCases;    }    /**     * Returns the set of known tokens for this evaluation.     * This set is immutable, but will reflect the current     * set of known tokens.       *     * @return The set of known tokens for this evaluation.     */    public Set knownTokenSet() {        return Collections.unmodifiableSet(mKnownTokenSet);    }    /**     * Adds the specified token to the set of known tokens.     *     * @param token Token to add to set of known tokens.     */    public void addKnownToken(String token) {        mKnownTokenSet.add(token);    }    /**     * Returns the first-best accuracy for unknown tokens.  Unknown     * tokens are defined to be those not in the mutable set {@link     * #knownTokenSet()} at the time the evaluation case was     * added.     *     * @return The first-best unknown token accuracy.     */    public double unknownTokenAccuracy() {        return ((double)mNumUnknownTokensCorrect)            / (double)mNumUnknownTokens;    }    /**     * Add a lattice-based response case with the specified tokens,      * reference tags and lattice.  Note that this only adds information     * to the lattice evaluation, not the first-best or n-best evaluations.     *      * @param tokens The tokens for the evaluation.     * @param referenceTags The reference tagging.     * @param lattice The response lattice.     * @throws IllegalArgumentException If the token and reference tag     * arrays are different lengths, or if the lattice tokens are not     * the same as the tokens.     */    public void addLatticeCase(String[] tokens,                               String[] referenceTags,                               TagWordLattice lattice) {        verifyEqualLengths("tokens","referenceTags",tokens,referenceTags);        verifyEqual(tokens,lattice.tokens());        for (int i = 0; i < tokens.length; ++i) {            ScoredObject[] scoredTags = lattice.log2ConditionalTags(i);            double[] log2JointProbs = new double[scoredTags.length];            String[] responseNBestTags = new String[scoredTags.length];            for (int j = 0; j < scoredTags.length; ++j) {                log2JointProbs[j] = scoredTags[j].score();                responseNBestTags[j] = scoredTags[j].getObject().toString();            }            JointClassification jc                 = new JointClassification(responseNBestTags, log2JointProbs);            mLatticeEvaluation.addClassification(referenceTags[i],jc);        }    }    /**     * Add an n-best response case with the specified tokens,     * reference tags and n-best iterator.  Note that this only adds     * information to the n-best evaluation, not the first-best or     * confidence-based lattice evaluations.     *     * @param tokens The tokens for the evaluation.     * @param referenceTags The reference tagging.     * @param nBestIterator The n-best iterator.     * @throws IllegalArgumentException If the token and reference tag     * arrays are different lengths.     */    public void addNBestCase(String[] tokens,                             String[] referenceTags,                             Iterator<ScoredObject<String[]>> nBestIterator) {        verifyEqualLengths("tokens","referenceTags",tokens,referenceTags);        for (int i = 0; ((i < mMaxNBest) && nBestIterator.hasNext()); ++i) {            ScoredObject response = (ScoredObject) nBestIterator.next();            String[] responseTags = (String[]) response.getObject();            if (com.aliasi.util.Arrays.equals(referenceTags,responseTags)) {                mNBestHistogram.increment(new Integer(i));                mLastNBest = i;                return;            }        }        mLastNBest = mMaxNBest;        mNBestHistogram.increment(new Integer(mMaxNBest));    }    /**     * Returns a terse, one-line report of the current state of this     * evaluation.     *     * @return A string representation of the state of this     * evaluation.     */    public String toString() {        return "#Cases=" + mNumCases            + "  #Toks=" + mNumTokens            + "  Tok Acc="             + format(mFirstBestEvaluation.confusionMatrix().totalAccuracy())            + "  Case Acc="  + format(caseAccuracy())            + "  Lattice Acc="             + format(mLatticeEvaluation.confusionMatrix().totalAccuracy())            + "  Unknown Toks=" + mNumUnknownTokens            + "  Unknown Tok Acc=" + format(unknownTokenAccuracy())            ;    }        int lastNBest() {        return mLastNBest;    }    static String format(double x) {        return Strings.decimalFormat(x,"0.000",5);    }    static void verifyEqualLengths(String name1, String name2,                                    String[] xs1, String[] xs2) {        if (xs1.length == xs2.length) return;        String msg = "Arrays " + name1 + " and " + name2             + " must be same length."            + " Found " + name1 + ".length=" + xs1.length            + " " + name2 + ".length=" + xs2.length;        throw new IllegalArgumentException(msg);    }    static void verifyEqual(String[] tokens1, String[] tokens2) {        if (tokens1.length != tokens2.length) {            String msg = "Tokens must match lattice tokens."                + " tokens.length=" + tokens1.length                + " lattice.tokens().length=" + tokens2.length;            throw new IllegalArgumentException(msg);        }        for (int i = 0; i < tokens1.length; ++i) {            if (tokens1[i].equals(tokens2[i])) continue;            String msg = "Tokens must match lattice tokens."                + " tokens[" + i + "]=" + tokens1[i]                + " != lattice.tokens()[" + i + "]=" + tokens2[i];            throw new IllegalArgumentException(msg);        }        }    }
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -