📄 hmmtree.java
字号:
void freeze() { for (Iterator i = entryPoints.values().iterator(); i.hasNext(); ) { EntryPoint ep = (EntryPoint) i.next(); ep.freeze(); } } /** * Dumps the entry point table */ void dump() { for (Iterator i = entryPoints.values().iterator(); i.hasNext(); ) { EntryPoint ep = (EntryPoint) i.next(); ep.dump(); } } } /** * Manages a single entry point. */ class EntryPoint { Unit baseUnit; Node baseNode; // second units and beyond start here Map unitToEntryPointMap; List singleUnitWords; int nodeCount = 0; Set rcSet; float totalProbability; /** * Creates an entry point for the given usnit * * @param baseUnit the EntryPoint is created for this unit */ EntryPoint(Unit baseUnit) { this.baseUnit = baseUnit; this.baseNode = new Node(LogMath.getLogZero()); this.unitToEntryPointMap = new HashMap(); this.singleUnitWords = new ArrayList(); this.totalProbability = LogMath.getLogZero(); } /** * Given a left context get a node that represents a single * set of entry points into this unit * * @param leftContext the left context of interest * * @return the node representing the entry point */ Node getEntryPointsFromLeftContext(Unit leftContext) { return (Node) unitToEntryPointMap.get(leftContext); } /** * Accumulates the probability for this entry point * * @param probability a new probability */ void addProbability(float probability) { if (probability > totalProbability) { totalProbability = probability; } } /** * Returns the probability for all words reachable * from this node * * @return the log probability */ float getProbability() { return totalProbability; } /** * Once we have built the full entry point we can * eliminate some fields */ void freeze() { for (Iterator i = unitToEntryPointMap.values().iterator(); i.hasNext(); ) { Node node = (Node) i.next(); node.freeze(); } singleUnitWords = null; rcSet = null; } /** * Gets the base node for this entry point * * @return the base node */ Node getNode() { return baseNode; } /** * Adds a one-unit word to this entry point. Such single unit * words need to be dealt with specially. * * @param p the pronunciation of the single unit word */ void addSingleUnitWord(Pronunciation p) { singleUnitWords.add(p); } /** * Gets the set of possible right contexts that we can * transition to from this entry point * * @return the set of possible transition points. */ private Collection getEntryPointRC() { if (rcSet == null) { rcSet = new HashSet(); for (Iterator i = baseNode.getSuccessors().iterator(); i.hasNext(); ) { UnitNode node = (UnitNode) i.next(); rcSet.add(node.getBaseUnit()); } } return rcSet; } /** * Creates the entry point map for this entry point. The * entry point map is represented by the unitToEntryPointMap. * It contains a node for each possible left context. The node * successors point to the following hmm nodes (usually * associated with the next units that can follow from this * entry point. */ void createEntryPointMap() { for (Iterator i = exitPoints.iterator(); i.hasNext(); ) { Unit lc = (Unit) i.next(); Node epNode = new Node(LogMath.getLogZero()); for (Iterator j = getEntryPointRC().iterator(); j.hasNext(); ) { Unit rc = (Unit) j.next(); HMM hmm = getHMM(baseUnit, lc, rc, HMMPosition.BEGIN); Node addedNode = epNode.addSuccessor(hmm, getProbability()); nodeCount++; connectEntryPointNode(addedNode, rc); } connectSingleUnitWords(lc, epNode); unitToEntryPointMap.put(lc, epNode); } } /** * An alternate version of createEntryPointMap that compresses * common hmms across all entry points, not just those shaing * the same left context. This really doesn't speed things * up in the least bit, so it is not worth the effort. * */ void createEntryPointMap_alternateVersion() { HashMap map = new HashMap(); for (Iterator i = exitPoints.iterator(); i.hasNext(); ) { Unit lc = (Unit) i.next(); Node epNode = new Node(LogMath.getLogZero()); for (Iterator j = getEntryPointRC().iterator(); j.hasNext(); ) { Unit rc = (Unit) j.next(); HMM hmm = getHMM(baseUnit, lc, rc, HMMPosition.BEGIN); Node addedNode; if ((addedNode = (Node) map.get(hmm)) == null) { addedNode = epNode.addSuccessor(hmm, getProbability()); map.put(hmm, addedNode); } else { epNode.putSuccessor(hmm, addedNode); } nodeCount++; connectEntryPointNode(addedNode, rc); } connectSingleUnitWords(lc, epNode); unitToEntryPointMap.put(lc, epNode); } } /** * Connects the single unit words associated with this entry * point. The singleUnitWords list contains all single unit * pronunciations that have as their sole unit, the unit * associated with this entry point. Entry points for these * words are added to the epNode for all possible left (exit) * and right (entry) contexts. * * @param lc the left context * @param epNode the entry point node */ private void connectSingleUnitWords(Unit lc, Node epNode) { if (singleUnitWords.size() > 0) { for (Iterator i = entryPoints.iterator(); i.hasNext(); ) { Unit rc = (Unit) i.next(); HMM hmm = getHMM(baseUnit, lc, rc, HMMPosition.SINGLE); HMMNode tailNode = (HMMNode) epNode.addSuccessor(hmm, getProbability()); WordNode wordNode; tailNode.addRC(rc); nodeCount++; for (int j = 0; j < singleUnitWords.size(); j++) { Pronunciation p = (Pronunciation) singleUnitWords.get(j); if (p.getWord() == dictionary.getSentenceStartWord()) { initialNode = new InitialWordNode(p, tailNode); } else { float prob = getWordUnigramProbability(p.getWord()); wordNode = (WordNode) tailNode.addSuccessor(p, prob); if (p.getWord() == dictionary.getSentenceEndWord()) { sentenceEndWordNode = wordNode; } } nodeCount++; } } } } /** * Connect the entry points that match the given rc to the * given epNode * * @param epNode add matching successors here * @param rc the next unit * */ private void connectEntryPointNode(Node epNode, Unit rc) { for (Iterator i = baseNode.getSuccessors().iterator(); i.hasNext(); ) { UnitNode successor = (UnitNode) i.next(); if (successor.getBaseUnit() == rc) { epNode.addSuccessor(successor); } } } /** * Dumps the entry point */ void dump() { System.out.println("EntryPoint " + baseUnit + " RC Followers: " + getEntryPointRC().size()); int count = 0; Collection rcs = getEntryPointRC(); System.out.print(" "); for (Iterator i = rcs.iterator(); i.hasNext(); ) { Unit rc = (Unit) i.next(); System.out.print(Utilities.pad(rc.getName(), 4)); if (count++ >= 12 ) { count = 0; System.out.println(); System.out.print(" "); } } System.out.println(); } }}/** * Represents a node in the HMM Tree */// For large vocabularies we may create millions of these objects,// therefore they are extremely space sensitive. So we want to make// these objects as small as possible. The requirements for these// objects when building the tree of nodes are very different from once// we have built it. When building, we need to easily add successor// nodes and quickly identify duplicate children nodes. After the tree// is built we just need to quickly identify successors. We want the// flexibility of a map to manage successors at startup, but we don't// want the space penalty (at least 5 32 bit fields per map), instead// we'd like an array. To support this dual mode, we manage the// successors in an Object which can either be a Map or a List// depending upon whether the node has been frozen or not.class Node { private static int nodeCount = 0; private static int successorCount = 0; private static Map wordNodeMap = new HashMap(); private Object successors = null; private float logUnigramProbability; /** * Creates a node * * @param probability the unigram probability for the node */ Node(float probability) { logUnigramProbability = probability; nodeCount++; if (false) { if ((nodeCount % 10000) == 0) { System.out.println("NC " + nodeCount); } } } /** * Returns the unigram probability * * @return the unigram probability */ public float getUnigramProbability() { return logUnigramProbability; } /** * Sets the unigram probability * * @param probability the unigram probability */ public void setUnigramProbability(float probability) { logUnigramProbability = probability; } /** * Given an object get the set of successors for this object * * @param key the object key * * @return the node containing the successors */ private Node getSuccessor(Object key) { Map successors = getSuccessorMap(); return (Node) successors.get(key); } /** * Add the child to the set of successors * * @param key the object key * @param child the child to add */ void putSuccessor(Object key, Node child) { Map successors = getSuccessorMap(); successors.put(key, child); } /** * Gets the successor map for this node * * @return the successor map */ private Map getSuccessorMap() { if (successors == null) { successors = new HashMap(4); } assert successors instanceof Map; return (Map) successors; } /** * Freeze the node. Convert the successor map into an array list */ void freeze() { if (successors instanceof Map) { Map map = getSuccessorMap(); List frozenSuccessors = new ArrayList(map.values().size()); successors = null; // avoid recursive death spiral for (Iterator i = map.values().iterator(); i.hasNext();) { Node node = (Node) i.next(); frozenSuccessors.add(node); node.freeze(); } successors = frozenSuccessors; successorCount += frozenSuccessors.size(); } } static void dumpNodeInfo() { System.out.println("Nodes: " + nodeCount + " successors " + successorCount + " avg " + (successorCount / nodeCount)); } /** * Adds a child node holding an hmm to the successor. If a node similar to * the child has already been added, we use the previously * added node, otherwise we add this. Also, we record the base * unit of the child in the set of right context * * @param hmm the hmm to add * @return the node that holds the hmm (new or old) */ Node addSuccessor(HMM hmm, float probability) { Node child = null; Node matchingChild = getSuccessor(hmm); if (matchingChild == null) { child = new HMMNode(hmm, probability); putSuccessor(hmm, child); } else { if (matchingChild.getUnigramProbability() < probability) { matchingChild.setUnigramProbability(probability); } child = matchingChild; } return child; } /** * Adds a child node holding a pronunciation to the successor. * If a node similar to * the child has already been added, we use the previously * added node, otherwise we add this. Also, we record the base * unit of the child in the set of right context * * @param pronunciation the pronunciation to add * * @return the node that holds the pronunciation (new or old) */ WordNode addSuccessor(Pronunciation pronunciation, float probability) { WordNode child = null; WordNode matchingChild = (WordNode) getSuccessor(pronunciation); if (matchingChild == null) { child = getWordNode(pronunciation, probability); putSuccessor(pronunciation, child); } else { if (matchingChild.getUnigramProbability() < probability) { matchingChild.setUnigramProbability(probability); } child = matchingChild; } return child; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -