📄 tagwordlattice.java
字号:
* at the specified token index. * @throws IndexOutOfBoundsException If the token index or either * tag identifier is out of bounds. */ public double transition(int tokenIndex, int sourceTagId, int targetTagId) { if (tokenIndex == 0) { String msg = "Token index must be > 0."; throw new IndexOutOfBoundsException(msg); } return mTransitions[tokenIndex][sourceTagId][targetTagId]; } /** * Returns the log (base 2) transtion probability for the * specified token index and source and target tag identifiers. * See {@link #transition(int,int,int)} for more information. * * @param tokenIndex Index of token. * @param sourceTagId Identifier for source tag in symbol table. * @param targetTagId Identifier for target tag in symbol table. * @return Log transition probability from source tag to target * tag arriving at the specified token index. * @throws IndexOutOfBoundsException If the token index or either * tag identifier is out of bounds. */ public double log2Transitions(int tokenIndex, int sourceTagId, int targetTagId) { return com.aliasi.util.Math.log2(transition(tokenIndex, sourceTagId,targetTagId)); } /** * Returns the forward probability up to the token of the * specified index and for the tag of the specified identifier. * The forward estimate includes the start probabilities and the * emissions up to and including the token at the specified index. * * @param tokenIndex Index of token. * @param tagId Identifier of tag in symbol table. * @return Forward probability for the token and tag. * @throws IndexOutOfBoundsException If the token index or the * tag identifier is out of bounds. */ public double forward(int tokenIndex, int tagId) { return mForwards[tokenIndex][tagId] * Math.pow(2.0,mForwardExps[tokenIndex]); } /** * Returns the log (base 2) of the forward probabilty up to the * token of the specified index and for the tag of the specified * identifier. See {@link #forward(int,int)} for more * information. * * @param tokenIndex Index of token. * @param tagId Identifier of tag in symbol table. * @return Log forward probability for the token index and tag. * @throws IndexOutOfBoundsException If the token index or the * tag identifier is out of bounds. */ public double log2Forward(int tokenIndex, int tagId) { return com.aliasi.util.Math.log2(mForwards[tokenIndex][tagId]) + mForwardExps[tokenIndex]; } /** * Returns the backward probability up to the token of the * specified index and for the tag of the specified identifier. * This includes the stop probability and emissions up to but * not including the specified token index. * * @param tokenIndex Index of token. * @param tagId Identifier of tag in symbol table. * @return Backward probability for the token and tag. * @throws IndexOutOfBoundsException If the token index or the * tag identifier is out of bounds. */ public double backward(int tokenIndex, int tagId) { return mBacks[tokenIndex][tagId] * Math.pow(2.0,mBackExps[tokenIndex]); } /** * Returns the log (base 2) backward probability up to the token * of the specified index and for the tag of the specified * identifier. See {@link #backward(int,int)} for more information. * * @param tokenIndex Index of token. * @param tagId Identifier of tag in symbol table. * @return Log backward probability for the token and tag. * @throws IndexOutOfBoundsException If the token index or the * tag identifier is out of bounds. */ public double log2Backward(int tokenIndex, int tagId) { return com.aliasi.util.Math.log2(mBacks[tokenIndex][tagId]) + mBackExps[tokenIndex]; } /** * Returns the product of the forward and backward probabilities * for the token with the specified index and tag with the * specified identifier. Dividing this result by the total * probability as given by {@link #total()} results in the * normalized state probability between 0.0 and 1.0. Furthermore, * the sum of all forward-backward probabilities at any given token * index is equal to the total lattice probability. * * @param tokenIndex Index of token. * @param tagId Identifier of tag in symbol table. * @return Forward-backward probability for the token and tag. * @throws IndexOutOfBoundsException If the token index or the * tag identifier is out of bounds. */ public double forwardBackward(int tokenIndex, int tagId) { return forward(tokenIndex,tagId) * backward(tokenIndex,tagId); } /** * Returns the product of the forward and backward probabilities * for the token with the specified index and tag with the * specified identifier. Dividing this result by the total * probability as given by {@link #total()} results in the * normalized state probability between 0.0 and 1.0. Furthermore, * the sum of all forward-backward probabilities at any given token * index is equal to the total lattice probability. * * @param tokenIndex Index of token. * @param tagId Identifier of tag in symbol table. * @return Forward-backward probability for the token and tag. * @throws IndexOutOfBoundsException If the token index or the * tag identifier is out of bounds. */ public double log2ForwardBackward(int tokenIndex, int tagId) { return log2Forward(tokenIndex,tagId) + log2Backward(tokenIndex,tagId); } /** * Returns the total probability for all paths in the lattice. * This probability is the marginal probability of the input * tokens. This probability will be equal to the sum of the * forward-backward probabilities at any token index. If there * are no tokens in the lattice, the total probability is 1.0, and * the log probability is 0.0. * <P>The conditional probability of a state at a given token * position given the entire input is equal to the * forward-backward probability divided by the total * probability. The forward-backward probability is the joint * probability of the input tokens and state, whereas the total * probability is the probability of the input tokens. * * <P><i>Warning:</i> This value is likely to underflow for long * inputs; in this case use {@link #log2Total()} instead. If * there are no tokens in the lattice, the total probability is * 1.0, and the log probability is 0.0. * * @return Total probability for the lattice. */ public double total() { return mTotal; } /** * Returns the log (base 2) total probability for all paths in the * lattice. See {@link #total()} for more information. * * @return Log total probability for the lattice. */ public double log2Total() { return mLog2Total; } final void computeAll() { computeForward(); computeBackward(); computeTotal(); } private void computeTotal() { if (mForwards.length == 0) { mTotal = 1.0; mLog2Total = 0.0; return; } double total = 0.0; int numSymbols = tagSymbolTable().numSymbols(); for (int tagId = 0; tagId < numSymbols; ++tagId) total += mForwards[0][tagId] * mBacks[0][tagId]; double exp = mForwardExps[0] + mBackExps[0]; mLog2Total = com.aliasi.util.Math.log2(total) + exp; mTotal = total * Math.pow(2.0,exp); } private void computeForward() { if (mForwards.length == 0) return; int numSymbols = tagSymbolTable().numSymbols(); double[] forwards = mForwards[0]; for (int tagId = 0; tagId < numSymbols; ++tagId) { if (mStarts[tagId] < 0.0) { mStarts[tagId] = 0.0; } forwards[tagId] = mStarts[tagId]; // could assign array } mForwardExps[0] = log2ScaleExp(forwards); int numToks = mTokens.length; for (int tokenId = 1; tokenId < numToks; ++tokenId) { forwards = mForwards[tokenId-1]; double[][] transits = mTransitions[tokenId]; for (int tagId = 0; tagId < numSymbols; ++tagId) { double f = 0.0; for (int prevTagId = 0; prevTagId < numSymbols; ++prevTagId) { f += forwards[prevTagId] * transits[prevTagId][tagId]; } mForwards[tokenId][tagId] = f; } mForwardExps[tokenId] = log2ScaleExp(mForwards[tokenId]) + mForwardExps[tokenId-1]; } } private void computeBackward() { if (mBacks.length == 0) return; int numSymbols = tagSymbolTable().numSymbols(); int lastTok = mTokens.length - 1; double[] backs = mBacks[lastTok]; for (int tagId = 0; tagId < numSymbols; ++tagId) backs[tagId] = mEnds[tagId]; // could assign array mBackExps[lastTok] = log2ScaleExp(backs); for (int tokenId = lastTok; --tokenId >= 0; ) { backs = mBacks[tokenId+1]; double[][] transits = mTransitions[tokenId+1]; for (int tagId = 0; tagId < numSymbols; ++tagId) { double b = 0.0; for (int nextTagId = 0; nextTagId < numSymbols; ++nextTagId) { b += backs[nextTagId] * transits[tagId][nextTagId]; } mBacks[tokenId][tagId] = b; } mBackExps[tokenId] = log2ScaleExp(mBacks[tokenId]) + mBackExps[tokenId+1]; } } // xs are straight probabilities here static double log2ScaleExp(double[] xs) { if (xs.length == 0) return 0.0; double max = xs[0]; for (int i = 1; i < xs.length; ++i) if (max < xs[i]) max = xs[i]; if (max < 0.0 || max > 1.0) { String msg = "Max must be >= 0 and <= 1." + " max=" + max; throw new IllegalArgumentException(msg); } double exp = 0.0; double mult = 1.0; while (max != 0.0 && max < 0.5) { exp -= 1.0; mult *= 2.0; max *= 2.0; } for (int j = 0; j < xs.length; ++j) xs[j] = xs[j] * mult; if (exp > 0) throw new RuntimeException("exp=" + exp); return exp; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -