📄 tagwordlattice.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
     * at the specified token index.     * @throws IndexOutOfBoundsException If the token index or either     * tag identifier is out of bounds.     */    public double transition(int tokenIndex,                              int sourceTagId, int targetTagId) {        if (tokenIndex == 0) {            String msg = "Token index must be > 0.";            throw new IndexOutOfBoundsException(msg);        }        return mTransitions[tokenIndex][sourceTagId][targetTagId];    }    /**     * Returns the log (base 2) transtion probability for the     * specified token index and source and target tag identifiers.     * See {@link #transition(int,int,int)} for more information.     *     * @param tokenIndex Index of token.     * @param sourceTagId Identifier for source tag in symbol table.     * @param targetTagId Identifier for target tag in symbol table.     * @return Log transition probability from source tag to target     * tag arriving at the specified token index.     * @throws IndexOutOfBoundsException If the token index or either     * tag identifier is out of bounds.     */    public double log2Transitions(int tokenIndex,                                   int sourceTagId, int targetTagId) {        return com.aliasi.util.Math.log2(transition(tokenIndex,                                                    sourceTagId,targetTagId));    }    /**     * Returns the forward probability up to the token of the     * specified index and for the tag of the specified identifier.     * The forward estimate includes the start probabilities and the     * emissions up to and including the token at the specified index.     *     * @param tokenIndex Index of token.     * @param tagId Identifier of tag in symbol table.     * @return Forward probability for the token and tag.     * @throws IndexOutOfBoundsException If the token index or the     * tag identifier is out of bounds.     */    public double forward(int tokenIndex, int tagId) {        return mForwards[tokenIndex][tagId]            * Math.pow(2.0,mForwardExps[tokenIndex]);    }    /**     * Returns the log (base 2) of the forward probabilty up to the     * token of the specified index and for the tag of the specified     * identifier.  See {@link #forward(int,int)} for more     * information.     *     * @param tokenIndex Index of token.     * @param tagId Identifier of tag in symbol table.     * @return Log forward probability for the token index and tag.     * @throws IndexOutOfBoundsException If the token index or the     * tag identifier is out of bounds.     */    public double log2Forward(int tokenIndex, int tagId) {        return com.aliasi.util.Math.log2(mForwards[tokenIndex][tagId])            + mForwardExps[tokenIndex];    }    /**     * Returns the backward probability up to the token of the     * specified index and for the tag of the specified identifier.     * This includes the stop probability and emissions up to but     * not including the specified token index.     *     * @param tokenIndex Index of token.     * @param tagId Identifier of tag in symbol table.     * @return Backward probability for the token and tag.     * @throws IndexOutOfBoundsException If the token index or the     * tag identifier is out of bounds.     */    public double backward(int tokenIndex, int tagId) {        return mBacks[tokenIndex][tagId]            * Math.pow(2.0,mBackExps[tokenIndex]);    }    /**     * Returns the log (base 2) backward probability up to the token     * of the specified index and for the tag of the specified     * identifier.  See {@link #backward(int,int)} for more information.     *     * @param tokenIndex Index of token.     * @param tagId Identifier of tag in symbol table.     * @return Log backward probability for the token and tag.     * @throws IndexOutOfBoundsException If the token index or the     * tag identifier is out of bounds.     */    public double log2Backward(int tokenIndex, int tagId) {        return com.aliasi.util.Math.log2(mBacks[tokenIndex][tagId])            + mBackExps[tokenIndex];    }    /**     * Returns the product of the forward and backward probabilities     * for the token with the specified index and tag with the     * specified identifier.  Dividing this result by the total     * probability as given by {@link #total()} results in the     * normalized state probability between 0.0 and 1.0.  Furthermore,     * the sum of all forward-backward probabilities at any given token     * index is equal to the total lattice probability.     *     * @param tokenIndex Index of token.     * @param tagId Identifier of tag in symbol table.     * @return Forward-backward probability for the token and tag.     * @throws IndexOutOfBoundsException If the token index or the     * tag identifier is out of bounds.     */    public double forwardBackward(int tokenIndex, int tagId) {        return forward(tokenIndex,tagId) * backward(tokenIndex,tagId);    }    /**     * Returns the product of the forward and backward probabilities     * for the token with the specified index and tag with the     * specified identifier.  Dividing this result by the total     * probability as given by {@link #total()} results in the     * normalized state probability between 0.0 and 1.0.  Furthermore,     * the sum of all forward-backward probabilities at any given token     * index is equal to the total lattice probability.     *     * @param tokenIndex Index of token.     * @param tagId Identifier of tag in symbol table.     * @return Forward-backward probability for the token and tag.     * @throws IndexOutOfBoundsException If the token index or the     * tag identifier is out of bounds.     */    public double log2ForwardBackward(int tokenIndex, int tagId) {        return log2Forward(tokenIndex,tagId)             + log2Backward(tokenIndex,tagId);    }    /**     * Returns the total probability for all paths in the lattice.     * This probability is the marginal probability of the input     * tokens. This probability will be equal to the sum of the     * forward-backward probabilities at any token index.  If there     * are no tokens in the lattice, the total probability is 1.0, and     * the log probability is 0.0.       * <P>The conditional probability of a state at a given token     * position given the entire input is equal to the     * forward-backward probability divided by the total     * probability. The forward-backward probability is the joint     * probability of the input tokens and state, whereas the total     * probability is the probability of the input tokens.     *      * <P><i>Warning:</i> This value is likely to underflow for long     * inputs; in this case use {@link #log2Total()} instead.  If     * there are no tokens in the lattice, the total probability is     * 1.0, and the log probability is 0.0.     *     * @return Total probability for the lattice.     */    public double total() {        return mTotal;    }    /**     * Returns the log (base 2) total probability for all paths in the     * lattice.  See {@link #total()} for more information.     *     * @return Log total probability for the lattice.     */    public double log2Total() {        return mLog2Total;    }    final void computeAll() {        computeForward();        computeBackward();        computeTotal();    }    private void computeTotal() {        if (mForwards.length == 0) {            mTotal = 1.0;            mLog2Total = 0.0;            return;        }        double total = 0.0;        int numSymbols = tagSymbolTable().numSymbols();        for (int tagId = 0; tagId < numSymbols; ++tagId)            total += mForwards[0][tagId] * mBacks[0][tagId];        double exp = mForwardExps[0] + mBackExps[0];        mLog2Total = com.aliasi.util.Math.log2(total)            + exp;        mTotal = total * Math.pow(2.0,exp);    }    private void computeForward() {        if (mForwards.length == 0) return;        int numSymbols = tagSymbolTable().numSymbols();        double[] forwards = mForwards[0];        for (int tagId = 0; tagId < numSymbols; ++tagId) {            if (mStarts[tagId] < 0.0) {                mStarts[tagId] = 0.0;            }            forwards[tagId] = mStarts[tagId];  // could assign array        }        mForwardExps[0] = log2ScaleExp(forwards);        int numToks = mTokens.length;        for (int tokenId = 1; tokenId < numToks; ++tokenId) {            forwards = mForwards[tokenId-1];            double[][] transits = mTransitions[tokenId];            for (int tagId = 0; tagId < numSymbols; ++tagId) {                double f = 0.0;                for (int prevTagId = 0; prevTagId < numSymbols; ++prevTagId) {                    f += forwards[prevTagId]                        * transits[prevTagId][tagId];                }                mForwards[tokenId][tagId] = f;            }            mForwardExps[tokenId]                 = log2ScaleExp(mForwards[tokenId]) + mForwardExps[tokenId-1];        }    }        private void computeBackward() {        if (mBacks.length == 0) return;        int numSymbols = tagSymbolTable().numSymbols();        int lastTok = mTokens.length - 1;        double[] backs = mBacks[lastTok];        for (int tagId = 0; tagId < numSymbols; ++tagId)            backs[tagId] = mEnds[tagId]; // could assign array        mBackExps[lastTok] = log2ScaleExp(backs);        for (int tokenId = lastTok; --tokenId >= 0; ) {            backs = mBacks[tokenId+1];            double[][] transits = mTransitions[tokenId+1];            for (int tagId = 0; tagId < numSymbols; ++tagId) {                double b = 0.0;                for (int nextTagId = 0; nextTagId < numSymbols; ++nextTagId) {                    b += backs[nextTagId]                        * transits[tagId][nextTagId];                }                mBacks[tokenId][tagId] = b;            }            mBackExps[tokenId] = log2ScaleExp(mBacks[tokenId])                 + mBackExps[tokenId+1];        }    }    // xs are straight probabilities here    static double log2ScaleExp(double[] xs) {        if (xs.length == 0) return 0.0;        double max = xs[0];        for (int i = 1; i < xs.length; ++i)            if (max < xs[i]) max = xs[i];        if (max < 0.0 || max > 1.0) {            String msg = "Max must be >= 0 and <= 1."                + " max=" + max;            throw new IllegalArgumentException(msg);        }        double exp = 0.0;        double mult = 1.0;        while (max != 0.0 && max < 0.5) {            exp -= 1.0;            mult *= 2.0;            max *= 2.0;        }        for (int j = 0; j < xs.length; ++j)            xs[j] = xs[j] * mult;        if (exp > 0)             throw new RuntimeException("exp=" + exp);        return exp;    }}
上一页 12
💿 文件大小 4561 K
👤 上传用户 edan1181
📂 所属分类 Java编程
🏷️ 相关标签

#LingPipe #Java #自然语言处理 #开源
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -