📄 latticeexpand.cc
字号:
if (!node) {
if (debug(DebugPrintFatalMessages)) {
dout() << "Fatal Error in Lattice::expandNodeToCompactTrigram: "
<< "current node has lost!\n";
}
exit(-1);
}
LatticeTransition * selfLoop = node->inTransitions.find(nodeIndex);
Boolean selfLoopFlag;
if (selfLoop) {
selfLoopFlag = true;
initASelfLoopDB(selfLoopDB, ngram, nodeIndex, node, selfLoop);
} else {
selfLoopFlag = false;
}
TRANSITER_T<NodeIndex,LatticeTransition> inTransIter(node->inTransitions);
TRANSITER_T<NodeIndex,LatticeTransition> outTransIter(node->outTransitions);
VocabIndex wordName = node->word;
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< " processing word name: " << getWord(wordName) << ", Index: "
<< nodeIndex << "\n";
}
// going through all its incoming edges
unsigned numInTrans = node->inTransitions.numEntries();
while (LatticeTransition *inTrans = inTransIter.next(fromNodeIndex)) {
if (nodeIndex == fromNodeIndex) {
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "jump over self loop: "
<< fromNodeIndex << "\n";
}
continue;
}
fromNode = findNode(fromNodeIndex);
if (!fromNode) {
if (debug(DebugPrintFatalMessages)) {
dout() << "Fatal Error in Lattice::expandNodeToCompactTrigram: "
<< "fromNode "
<< fromNodeIndex << " doesn't exist!\n";
}
exit(-1);
}
VocabIndex fromWordName = fromNode->word;
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "processing incoming edge: ("
<< fromNodeIndex
<< ", " << nodeIndex << ")\n"
<< " (" << getWord(fromWordName) << ", "
<< getWord(wordName) << ")\n";
}
// compute in-coming bigram prob
LogP inWeight;
if (fromNodeIndex == getInitial()) {
context[0] = fromWordName;
context[1] = Vocab_None;
// this transition can have never been processed.
inWeight = ngram.wordProb(wordName, context);
inTrans->weight = inWeight;
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "processing incoming edge: ("
<< fromNodeIndex
<< ", " << nodeIndex << ")\n"
<< " (" << getWord(fromWordName)
<< ", " << getWord(wordName) << ") = "
<< " " << inWeight << ";\n";
}
} else {
// the in-coming trans has been processed and
// we should preserve this value.
inWeight = inTrans->weight;
}
context[0] = wordName;
context[1] = fromWordName;
context[2] = Vocab_None;
// LogP inWeight = ngram.wordProb(wordName, context);
unsigned inFlag = inTrans->flags;
// initialize it for self loop processing.
if (selfLoopFlag) {
initBSelfLoopDB(selfLoopDB, ngram, fromNodeIndex, fromNode, inTrans); }
// going through all the outgoing edges
outTransIter.init();
while (LatticeTransition *outTrans = outTransIter.next(toNodeIndex)) {
if (nodeIndex == toNodeIndex) {
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "self loop: "
<< toNodeIndex << "\n";
}
continue;
}
LatticeNode * toNode = findNode(toNodeIndex);
if (!toNode) {
if (debug(DebugPrintFatalMessages)) {
dout() << "Fatal Error in Lattice::expandNodeToCompactTrigram: "
<< "toNode "
<< toNode << " doesn't exist!\n";
}
exit(-1);
}
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "the toNodeIndex ("
<< toNodeIndex << " has name "
<< getWord(toNode->word) << ")\n";
}
// initialize selfLoopDB;
if (selfLoopFlag) {
initCSelfLoopDB(selfLoopDB, toNodeIndex, outTrans);
}
// duplicate a node if the trigram exists.
// see class Ngram in file /home/srilm/devel/lm/src/Ngram.h
LogP * triProb;
if (triProb = ngram.findProb(toNode->word, context)) {
LogP logProb = *triProb;
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "tripleIndex ("
<< toNodeIndex << " | " << nodeIndex << ", "
<< fromNodeIndex << ")\n"
<< " trigram prob: ("
<< getWord(toNode->word) << " | " << context << ") found!\n";
}
// create one node and two edges to place trigram prob
PackInput packInput;
packInput.fromWordName = fromWordName;
packInput.wordName = wordName;
packInput.toWordName = toNode->word;
packInput.fromNodeIndex = fromNodeIndex;
packInput.toNodeIndex = toNodeIndex;
packInput.inWeight = inWeight;
packInput.inFlag = inFlag;
packInput.outWeight = logProb;
packInput.outFlag = outTrans->flags;
packInput.nodeIndex = nodeIndex;
packInput.toNodeId = 0;
packInput.lm = 0;
packedNodeList.packNodes(*this, packInput);
// to remove the outGoing edge if all the outgoing nodes have
// trigram probs.
if (numInTrans == 1 &&
!(outTrans->getFlag(reservedTFlag))) {
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "outgoing edge: ("
<< nodeIndex << ", " << toNodeIndex << ") is removed\n";
}
removeTrans(nodeIndex, toNodeIndex);
}
if (maxNodes > 0 && getNumNodes() > maxNodes) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "aborting with number of nodes exceeding "
<< maxNodes << endl;
return false;
}
} else {
// there is no trigram prob for this context
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "no trigram context ("
<< context << ") has been found -- keep "
<< fromNodeIndex << "\n";
}
// note down backoff context and in-coming node for
// preservation, in case explicit trigram does not exist.
bowContext = context;
outTrans->markTrans(reservedTFlag);
backoffNodeIndex = fromNodeIndex;
}
// processing selfLoop
if (selfLoopFlag) {
expandSelfLoop(ngram, selfLoopDB, packedSelfLoopNodeList);
}
} // end of inter-loop
// processing incoming bigram cases.
if (!bowContext) {
// for this context, all the toNodes have trigram probs
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "incoming edge ("
<< fromNodeIndex << ", " << nodeIndex
<< ") is removed\n";
}
removeTrans(fromNodeIndex, nodeIndex);
} else {
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "updating trigram backoffs on edge("
<< fromNodeIndex << ", " << nodeIndex << ")\n";
}
LogP * wordBOW = ngram.findBOW(bowContext);
if (!(wordBOW)) {
if (debug(DebugPrintOutLoop)) {
dout() << "nonFatal Error in Lattice::expandNodeToCompactTrigram: "
<< "language model - BOW ("
<< bowContext << ") missing!\n";
}
static LogP zerobow = 0.0;
wordBOW = &zerobow;
}
LogP logProbW = *wordBOW;
LogP weight = combWeights(inWeight, logProbW);
setWeightTrans(backoffNodeIndex, nodeIndex, weight);
bowContext = 0;
inBOW = 1;
}
numInTrans--;
} // end of out-loop
// if trigram prob exist for all the tri-node paths
if (!inBOW) {
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::expandNodeToCompactTrigram: "
<< "node "
<< getWord(wordName) << " (" << nodeIndex
<< ") has trigram probs for all its contexts\n"
<< " and its bigram lattice node is removed\n";
}
removeNode(nodeIndex);
} else {
node = findNode(nodeIndex);
if (selfLoopFlag) {
node->inTransitions.remove(nodeIndex);
node = findNode(nodeIndex);
selfLoop = node->outTransitions.remove(nodeIndex);
if (!selfLoop) {
if (debug(DebugPrintFatalMessages)) {
dout() << "nonFatal Error in Lattice::expandNodeToCompactTrigram: "
<< "non symetric setting \n";
}
exit(-1);
}
}
// process backoff to bigram weights.
TRANSITER_T<NodeIndex,LatticeTransition>
outTransIter(node->outTransitions);
while (LatticeTransition *outTrans = outTransIter.next(toNodeIndex)) {
LatticeNode * toNode = findNode(toNodeIndex);
context[0] = wordName;
context[1] = Vocab_None;
LogP weight = ngram.wordProb(toNode->word, context);
setWeightTrans(nodeIndex, toNodeIndex, weight);
}
}
return true;
}
Boolean
Lattice::expandToCompactTrigram(Ngram &ngram, unsigned maxNodes)
{
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::expandToCompactTrigram: "
<< "starting expansion to compact trigram lattice ...\n";
}
unsigned numNodes = getNumNodes();
NodeIndex *sortedNodes = new NodeIndex[numNodes];
assert(sortedNodes != 0);
unsigned numReachable = sortNodes(sortedNodes);
if (numReachable != numNodes) {
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::expandToCompactTrigram: warning: called with unreachable nodes\n";
}
}
for (unsigned i = 0; i < numReachable; i++) {
NodeIndex nodeIndex = sortedNodes[i];
if (nodeIndex == initial || nodeIndex == final) {
continue;
}
if (!expandNodeToCompactTrigram(nodeIndex, ngram, maxNodes)) {
delete [] sortedNodes;
return false;
}
}
delete [] sortedNodes;
return true;
}
/*
* Expand lattice to implement general LMs
* Algorithm: replace each node in lattice with copies that are
* associated with specific LM contexts. The mapping
* (original node, context) -> new node
* is constructed incrementally as the lattice is traversed in topological
* order.
*
* expandMap[startNode, <s>] := newStartNode;
* expandMap[endNode, </s>] := newEndNode;
*
* for oldNode in topological order
* for expandMap[oldNode, c] = newNode
* for oldNode2 in successors(oldNode)
* c2 = lmcontext(c + word(oldNode2));
* find or create expandMap[oldNode2, c2] = newNode2;
* word(newNode2) := word(oldNodes2);
* prob(newNode->newNode2) := P(word(newNode2) | c);
* delete oldNode;
* delete expandMap[oldNode]; # to save space
*
* As an optimization, we let
*
* lmcontext(c + word(oldNode2)) be the longest context used by the LM
* for predicting words following oldNode2 in the lattice, and
* BOW(c2) be the backoff weight associated with backing off from the
* full LM context (c + word(oldNode2)) to c2
*
* Nodes with NULL or pause are handled by ignoring them in context
* construction, but otherwise handling (i.e., duplicating) them as above.
*/
Boolean
Lattice::expandNodeToLM(VocabIndex oldIndex, LM &lm, unsigned maxNodes,
Map2<NodeIndex, VocabContext, NodeIndex> &expandMap)
{
unsigned insufficientLookaheadNodes = 0;
Map2Iter2<NodeIndex, VocabContext, NodeIndex>
#ifdef USE_SARRAY_MAP2
expandIter(expandMap, oldIndex);
#else
expandIter(expandMap, oldIndex, ngramCompare);
#endif
NodeIndex *newIndex;
VocabContext context;
while (newIndex = expandIter.next(context)) {
// node structure might have been moved as a result of insertions
LatticeNode *oldNode = findNode(oldIndex);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -