📄 tagwordlattice.java
字号:
/* * LingPipe v. 3.5 * Copyright (C) 2003-2008 Alias-i * * This program is licensed under the Alias-i Royalty Free License * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i * Royalty Free License Version 1 for more details. * * You should have received a copy of the Alias-i Royalty Free License * Version 1 along with this program; if not, visit * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211, * +1 (718) 290-9170. */package com.aliasi.hmm;import com.aliasi.symbol.SymbolTable;import com.aliasi.util.ScoredObject;import java.util.Arrays;/** * A <code>TagWordLattice</code> encodes a lattice resulting from * decoding a hidden Markov model (HMM). The lattice encodes the * tokens used as input and the tag symbol table, as well as matrices * for transition, forward and backward scores. * * <P>The lattice probabilities are factored into start, transition, * and end probabilities. In general, the start, transition and * forward probabilities include the emission probabilities of their * destination tag. The backward probabilities include all emissions * up to, but not including, the indexed node. * * @author Bob Carpenter * @version 3.0 * @since LingPipe2.1 */public class TagWordLattice { final double[][][] mTransitions; final double[][] mForwards; final double[] mForwardExps; final double[][] mBacks; final double[] mBackExps; final double[] mStarts; final double[] mEnds; final String[] mTokens; final SymbolTable mTagSymbolTable; double mTotal = Double.NaN; double mLog2Total = Double.NaN; /** * Construct a tag-word lattice for the specified token inputs and * the specified tag symbol table with the specified estimates. * This constructor also allocates the forward and backward arrays * which are the size of the number of tokens times the number of * tags. * * <P>There are a number of consistency conditions on the input: * * <UL> * * <LI> The tag symbol table should not be empty. * * <LI> Start and end probability arrays must be the same length * as the number of tags in the symbol table. Start tags include * the start tag estimate and the emission estimate for the first * token. End tags only include the end transition probabilities. * * <LI> Transit probability array should be dimension of number of * tokens times number of tags times number of tags. They should * contain estimates of transition from the first tag to the * second tag and emitting the token. * * <LI> Values of the transit probabilities for the first token are * ignored because they are computed by the start probs without * previous states. * * </UL> * * @param tokens Array of input tokens. * @param tagSymbolTable Symbol table for tags. * @param startProbs Array of start probabilities. * @param endProbs Array of end probabilities. * @param transitProbs Array of transition probabilities. * @throws IllegalArgumentException If any of the probabilities are * not between 0.0 and 1.0 inclusive. */ public TagWordLattice(String[] tokens, SymbolTable tagSymbolTable, double[] startProbs, double[] endProbs, double[][][] transitProbs) { for (int i = 0; i < startProbs.length; ++i) { if (startProbs[i] < 0.0 || startProbs[i] > 1.0) { String msg = "startProbs[" + i + "]=" + startProbs[i]; throw new IllegalArgumentException(msg); } } for (int i = 0; i < endProbs.length; ++i) { if (endProbs[i] < 0.0 || endProbs[i] > 1.0) { String msg = "endProbs[" + i + "]=" + endProbs[i]; throw new IllegalArgumentException(msg); } } for (int i = 1; i < transitProbs.length; ++i) { for (int j = 0; j < transitProbs[i].length; ++j) { for (int k = 0; k < transitProbs[i][j].length; ++k) { if (transitProbs[i][j][k] < 0.0 || transitProbs[i][j][k] > 1.0) { String msg = "transitProbs[" + i + "][" + j + "][" + k + "]=" + transitProbs[i][j][k]; throw new IllegalArgumentException(msg); } } } } int numTags = tagSymbolTable.numSymbols(); int numTokens = tokens.length; mStarts = startProbs; mEnds = endProbs; mTransitions = transitProbs; mTokens = tokens; mTagSymbolTable = tagSymbolTable; mForwards = new double[numTokens][numTags]; mForwardExps = new double[numTokens]; // Arrays.fill(mForwardExps,0.0); mBacks = new double[numTokens][numTags]; mBackExps = new double[numTokens]; // Arrays.fill(mBackExps,0.0); computeAll(); } /** * Returns the array of tokens underlying this tag-word lattice. * * @return The array of tokens for this lattice. */ public String[] tokens() { return mTokens; } /** * Return the symbol table for tags in this tag-word lattice. * * @return The symbol table for the lattice. */ public SymbolTable tagSymbolTable() { return mTagSymbolTable; } /** * Returns the array of tag-score pairs for the specified token * index as scored objects in order of descending score. The * scores are log (base 2) conditional probabilities of the tag * being assigned to the specified token given the token sequence. * * @param tokenIndex Token index whose tags are returned. * @return Array of scored tags for the specified index. */ public ScoredObject<String>[] log2ConditionalTags(int tokenIndex) { double log2Total = log2Total(); SymbolTable st = mTagSymbolTable; int numTags = st.numSymbols(); ScoredObject<String>[] scoredTags = (ScoredObject<String>[]) new ScoredObject[numTags]; for (int tagId = 0; tagId < numTags; ++tagId) { String tag = st.idToSymbol(tagId); double log2P = log2ForwardBackward(tokenIndex,tagId); double condLog2P = log2P - log2Total; if (condLog2P > 0.0) condLog2P = 0.0; else if (Double.isNaN(condLog2P) || Double.isInfinite(condLog2P)) condLog2P = com.aliasi.util.Math.log2(Double.MIN_VALUE); scoredTags[tagId] = new ScoredObject<String>(tag,condLog2P); } Arrays.sort(scoredTags,ScoredObject.REVERSE_SCORE_COMPARATOR); return scoredTags; } /** * Returns the array of tags with the best forward-backward * probabilities for each token position. * * <P><i>Note:</i> This is the independent optimization of each * position and is not guaranteed to yield the sequence of states * that has the highest probability. * * @return Array of tags with the best forward-backward scores. */ public String[] bestForwardBackward() { String[] bestTags = new String[mTokens.length]; int numTags = mTagSymbolTable.numSymbols(); for (int i = 0; i < bestTags.length; ++i) { int bestTagId = 0; double bestFB = forwardBackward(i,0); for (int tagId = 1; tagId < numTags; ++tagId) { double fb = forwardBackward(i,tagId); if (fb > bestFB) { bestFB = fb; bestTagId = tagId; } } bestTags[i] = mTagSymbolTable.idToSymbol(bestTagId); } return bestTags; } /** * Return the probability of the lattice starting with the tag * with the specified identifier and emitting the first input token. * * @param tagId Identifier for the tag in the symbol table. * @return Start probability. * @throws IndexOutOfBoundsException If the tagId is out of bounds. */ public double start(int tagId) { return mStarts[tagId]; } /** * Return the log (base 2) probability of the lattice starting * with the tag with the specified identifier and emitting the * first input token. See {@link #start(int)} for more * information. * * @param tagId Identifier for the tag in the symbol table. * @return Log start probability. * @throws IndexOutOfBoundsException If the tagId is out of bounds. */ public double log2Start(int tagId) { return com.aliasi.util.Math.log2(start(tagId)); } /** * Return the probability of the lattice ending with the specified * tag. Note that this does not include the probability of * emitting the final token. * * @param tagId Identifier for the tag in the symbol table. * @return End probability. * @throws IndexOutOfBoundsException If the tag identifier is out * of bounds. */ public double end(int tagId) { return mEnds[tagId]; } /** * Return the log (base 2) probability of the lattice ending with * the specified tag. See {@link #end(int)} for more information. * * @param tagId Identifier for the tag in the symbol table. * @return Log end probability. * @throws IndexOutOfBoundsException If the tag identifier is out * of bounds. */ public double log2End(int tagId) { return com.aliasi.util.Math.log2(end(tagId)); } /** * Returns the transtion probability for the specified token index * and source and target tag identifiers. This transition * probability includes the transition from the source tag * to the target tag times the probability of the target tag * emitting the token at the specified index. * * <P>Note that the token index cannot be zero here, as it is the * index of the target of a transition. * * @param tokenIndex Index of token. * @param sourceTagId Identifier for source tag in symbol table. * @param targetTagId Identifier for target tag in symbol table. * @return Transition score from source tag to target tag arriving
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -