📄 latticeexpand.cc
字号:
assert(oldNode != 0);
unsigned contextLength = Vocab::length(context);
makeArray(VocabIndex, newContext, contextLength + 2);
Vocab::copy(&newContext[1], context);
TRANSITER_T<NodeIndex,LatticeTransition>
transIter(oldNode->outTransitions);
NodeIndex oldIndex2;
while (LatticeTransition *oldTrans = transIter.next(oldIndex2)) {
LatticeNode *oldNode2 = findNode(oldIndex2);
assert(oldNode2 != 0);
VocabIndex word = oldNode2->word;
// determine context used by LM
unsigned usedLength = 0;
VocabIndex *usedContext;
LogP wordProb;
// The old context is extended by word on this node, unless
// it is null or pause, which are both ignored by the LM.
// Non-events are added to the context but aren't evaluated.
if (ignoreWord(word)) {
usedContext = &newContext[1];
wordProb = LogP_One;
} else if (vocab.isNonEvent(word)) {
newContext[0] = word;
usedContext = newContext;
wordProb = LogP_One;
} else {
newContext[0] = word;
usedContext = newContext;
wordProb = lm.wordProb(word, context);
}
// find all possible following words and determine maximal context
// needed for wordProb computation
// skip pause and null nodes up to some depth
LatticeFollowIter followIter(*this, *oldNode2);
NodeIndex oldIndex3;
LatticeNode *oldNode3;
LogP weight;
while (oldNode3 = followIter.next(oldIndex3, weight)) {
// if one of the following nodes has null or pause as word then
// we don't attempt any further look-ahead and use the maximal
// context from the LM
VocabIndex nextWord = oldNode3->word;
if (ignoreWord(nextWord)) {
insufficientLookaheadNodes += 1;
nextWord = Vocab_None;
}
unsigned usedLength2;
lm.contextID(nextWord, usedContext, usedLength2);
if (usedLength2 > usedLength) {
usedLength = usedLength2;
}
}
if (!expandAddTransition(usedContext, usedLength,
word, wordProb, lm,
oldIndex2, *newIndex, oldTrans,
maxNodes, expandMap))
return false;
}
}
if (insufficientLookaheadNodes > 0) {
dout() << "Lattice::expandNodeToLM: insufficient lookahead on "
<< insufficientLookaheadNodes << " nodes\n";
}
// old node (and transitions) is fully replaced by expanded nodes
// (and transitions)
removeNode(oldIndex);
// save space in expandMap by deleting entries that are no longer used
expandMap.remove(oldIndex);
return true;
}
/*
* The "compact expansion" version makes two changes
*
* - add outgoing transitions to node duplicates with shorter contexts,
* not just the maximal context.
* - only transition out of an expanded node if the LM uses the full context
* associated with that node (*** A *** below).
* This is possible because of the first change, and it's
* where the savings compared to the general expansion are realized.
*
* The resulting algorithm is a generalization of expandToCompactTrigram().
*/
Boolean
Lattice::expandNodeToCompactLM(VocabIndex oldIndex, LM &lm, unsigned maxNodes,
Map2<NodeIndex, VocabContext, NodeIndex> &expandMap)
{
unsigned insufficientLookaheadNodes = 0;
Map2Iter2<NodeIndex, VocabContext, NodeIndex>
#ifdef USE_SARRAY_MAP2
expandIter(expandMap, oldIndex);
#else
expandIter(expandMap, oldIndex, ngramCompare);
#endif
NodeIndex *newIndex;
VocabContext context;
while (newIndex = expandIter.next(context)) {
// node structure might have been moved as a result of insertions
LatticeNode *oldNode = findNode(oldIndex);
assert(oldNode != 0);
Boolean ignoreOldNodeWord = ignoreWord(oldNode->word);
unsigned contextLength = Vocab::length(context);
makeArray(VocabIndex, newContext, contextLength + 2);
Vocab::copy(&newContext[1], context);
TRANSITER_T<NodeIndex,LatticeTransition>
transIter(oldNode->outTransitions);
NodeIndex oldIndex2;
while (LatticeTransition *oldTrans = transIter.next(oldIndex2)) {
LatticeNode *oldNode2 = findNode(oldIndex2);
assert(oldNode2 != 0);
VocabIndex word = oldNode2->word;
// Find the context length used for LM transition oldNode->oldNode2.
// Because each LM context gets its own node duplicate
// in this expansion algorithm, we only need to process
// transitions where the LM context fully matches the context
// associated with the specific oldNode duplicate we're working on.
unsigned usedLength0;
lm.contextID(word, context, usedLength0);
// if the node we're coming from has a real word then
// there is no point in using a null context
// (so we can collapse null and unigram contexts)
if (usedLength0 == 0 && !ignoreOldNodeWord) {
usedLength0 = 1;
}
// *** A ***
if (!ignoreWord(word) &&
context[0] != vocab.ssIndex() &&
usedLength0 != contextLength)
{
continue;
}
// determine context used by LM
VocabIndex *usedContext;
LogP wordProb;
// The old context is extended by word on this node, unless
// it is null or pause, which are both ignored by the LM.
// Non-events are added to the context by aren't evaluated.
if (ignoreWord(word)) {
usedContext = &newContext[1];
wordProb = LogP_One;
} else if (vocab.isNonEvent(word)) {
newContext[0] = word;
usedContext = newContext;
wordProb = LogP_One;
} else {
newContext[0] = word;
usedContext = newContext;
wordProb = lm.wordProb(word, context);
}
// find all possible following words and compute LM context
// used for their prediction.
// then create duplicate nodes for each context length
// needed for wordProb computation
LatticeFollowIter followIter(*this, *oldNode2);
NodeIndex oldIndex3;
LatticeNode *oldNode3;
LogP weight;
int lastUsedLength = -1;
while (oldNode3 = followIter.next(oldIndex3, weight)) {
unsigned usedLength;
// if one of the following nodes has null or pause as word then
// we have to back off to null context to make sure
// further expansion can connect above at *** A ***
VocabIndex nextWord = oldNode3->word;
if (ignoreWord(nextWord)) {
insufficientLookaheadNodes += 1;
usedLength = 0;
} else {
lm.contextID(nextWord, usedContext, usedLength);
}
// if the node we're going to has a real word then
// there is no point in using a null context (same as above)
if (usedLength == 0 && !ignoreWord(word)) {
usedLength = 1;
}
// optimization: no need to re-add transition if usedLength
// is same as on previous pass through
if (usedLength == lastUsedLength) {
continue;
} else {
lastUsedLength = usedLength;
}
if (!expandAddTransition(usedContext, usedLength,
word, wordProb, lm,
oldIndex2, *newIndex, oldTrans,
maxNodes, expandMap))
return false;
}
// transitions to the final node have to be handled specially
// because the above loop won't apply to it, since the final node
// has no outgoing transitions!
if (oldIndex2 == final) {
// by convention, the final node always uses context of length 0
if (!expandAddTransition(usedContext, 0,
word, wordProb, lm,
oldIndex2, *newIndex, oldTrans,
maxNodes, expandMap))
return false;
}
}
}
if (insufficientLookaheadNodes > 0) {
dout() << "Lattice::expandNodeToCompactLM: insufficient lookahead on "
<< insufficientLookaheadNodes << " nodes\n";
}
// old node (and transitions) is fully replaced by expanded nodes
// (and transitions)
removeNode(oldIndex);
// save space in expandMap by deleting entries that are no longer used
expandMap.remove(oldIndex);
return true;
}
/*
* Helper for expandNodeToLM() and expandNodeToCompactLM()
*/
Boolean
Lattice::expandAddTransition(VocabIndex *usedContext, unsigned usedLength,
VocabIndex word, LogP wordProb, LM &lm,
NodeIndex oldIndex2, VocabIndex newIndex,
LatticeTransition *oldTrans, unsigned maxNodes,
Map2<NodeIndex, VocabContext, NodeIndex> &expandMap)
{
LogP transProb;
if (ignoreWord(word)) {
transProb = LogP_One;
} else {
transProb = wordProb;
}
if (!noBackoffWeights && usedContext[0] != vocab.seIndex()) {
// back-off weight to truncate the context
// (needs to be called before context truncation below)
// Note: we check above that this is not a backoff weight after </s>,
// which some LMs contain but should be ignored for our purposes
// since lattices all end implictly in </s>.
transProb += lm.contextBOW(usedContext, usedLength);
}
// truncate context to what LM uses
VocabIndex saved = usedContext[usedLength];
usedContext[usedLength] = Vocab_None;
Boolean found;
NodeIndex *newIndex2 =
expandMap.insert(oldIndex2, (VocabContext)usedContext, found);
if (!found) {
LatticeNode *oldNode2 = findNode(oldIndex2);
assert(oldNode2 != 0);
// create new node copy and store it in map
*newIndex2 = dupNode(word, oldNode2->flags, oldNode2->htkinfo);
if (maxNodes > 0 && getNumNodes() > maxNodes) {
dout() << "Lattice::expandAddTransition: "
<< "aborting with number of nodes exceeding "
<< maxNodes << endl;
return false;
}
}
LatticeTransition newTrans(transProb, oldTrans->flags);
insertTrans(newIndex, *newIndex2, newTrans, 0);
// restore full context
usedContext[usedLength] = saved;
return true;
}
Boolean
Lattice::expandToLM(LM &lm, unsigned maxNodes, Boolean compact)
{
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::expandToLM: "
<< "starting " << (compact ? "compact " : "")
<< "expansion to general LM (maxNodes = " << maxNodes
<< ") ...\n";
}
unsigned numNodes = getNumNodes();
NodeIndex *sortedNodes = new NodeIndex[numNodes];
assert(sortedNodes != 0);
unsigned numReachable = sortNodes(sortedNodes);
if (numReachable != numNodes) {
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::expandToLM: warning: called with unreachable nodes\n";
}
}
Map2<NodeIndex, VocabContext, NodeIndex> expandMap;
// prime expansion map with initial/final nodes
LatticeNode *startNode = findNode(initial);
assert(startNode != 0);
VocabIndex newStartIndex = dupNode(startNode->word, startNode->flags,
startNode->htkinfo);
VocabIndex context[2];
context[1] = Vocab_None;
context[0] = vocab.ssIndex();
*expandMap.insert(initial, context) = newStartIndex;
LatticeNode *endNode = findNode(final);
assert(endNode != 0);
VocabIndex newEndIndex = dupNode(endNode->word, endNode->flags,
endNode->htkinfo);
context[0] = Vocab_None;
*expandMap.insert(final, context) = newEndIndex;
for (unsigned i = 0; i < numReachable; i++) {
NodeIndex nodeIndex = sortedNodes[i];
if (nodeIndex == final) {
removeNode(final);
} else if (compact) {
if (!expandNodeToCompactLM(nodeIndex, lm, maxNodes, expandMap)) {
delete [] sortedNodes;
return false;
}
} else {
if (!expandNodeToLM(nodeIndex, lm, maxNodes, expandMap)) {
delete [] sortedNodes;
return false;
}
}
}
initial = newStartIndex;
final = newEndIndex;
delete [] sortedNodes;
return true;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -