📄 latticereduce.cc
字号:
// merging incoming nodes
TRANSITER_T<NodeIndex,LatticeTransition>
inTransIterJ(nodeJ->inTransitions);
while (LatticeTransition *trans = inTransIterJ.next(fromNodeIndex)) {
insertTrans(fromNodeIndex, nodeIndexI, *trans);
}
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::approxMatchOutTrans: "
<< "(" << nodeIndexJ << ") has been merged with "
<< "(" << nodeIndexI << ") and has been removed\n";
}
// delete this redudant node.
removeNode(nodeIndexJ);
return true;
}
/* ********************************************************
this procedure is to pack two nodes if their OutNode sets
overlap beyong certain threshold
input:
1) nodeIndex: the node to be processed;
2) nodeQueue: the current queue.
3) base: overlap base
4) ratio: overlap ratio
function:
going through all the in-nodes of nodeIndex, and tries to merge
as many as possible.
******************************************************** */
Boolean
Lattice::approxRedNodeB(NodeIndex nodeIndex, NodeQueue &nodeQueue,
int base, double ratio)
{
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::approxRedNodeB: "
<< "processing (" << nodeIndex << ") ***********\n";
}
NodeIndex fromNodeIndex, toNodeIndex;
LatticeNode * node = findNode(nodeIndex);
if (!node) { // skip through this node, being merged.
return true;
}
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::approxRedNodeB: "
<< " i.e., (" << getWord(node->word) << ")\n";
}
// going in a backward direction
TRANSITER_T<NodeIndex,LatticeTransition>
inTransIter(node->inTransitions);
unsigned numTransitions = node->inTransitions.numEntries();
makeArray(NodeIndex, list, numTransitions+1);
unsigned position = 0;
// collect all the in-nodes of nodeIndex
while (inTransIter.next(fromNodeIndex)) {
list[position++] = fromNodeIndex;
}
// do a pair-wise comparison for all the in-nodes.
unsigned i, j;
for (i = 0; i< position; i++) {
j = i+1;
if (j >= position) {
break; }
LatticeNode * nodeI = findNode(list[i]);
// ***********************************
// compare nodeI with nodeJ:
// Notice that numInTransI changes because
// merger occurs in the loop. this is different
// from exact match case.
// ***********************************
unsigned merged = 0;
while (j < position) {
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::approxRedNodeB: "
<< "comparing (" << list[i] << ", "
<< list[j] << ")\n";
}
LatticeNode * nodeI = findNode(list[i]);
unsigned numOutTransI = nodeI->outTransitions.numEntries();
LatticeNode * nodeJ = findNode(list[j]);
unsigned numOutTransJ = nodeJ->outTransitions.numEntries();
if (nodeI->word == nodeJ->word) {
unsigned overlap;
if ((!base && (numOutTransI < numOutTransJ)) ||
(base && (numOutTransI > numOutTransJ))) {
overlap = (unsigned ) floor(numOutTransI*ratio + 0.5);
} else {
overlap = (unsigned ) floor(numOutTransJ*ratio + 0.5);
}
overlap = (overlap > 0) ? overlap : 1;
if (approxMatchOutTrans(list[i], list[j], overlap)) {
merged = 1;
list[j] = list[--position];
continue;
}
}
j++;
}
// clear marks on the outNodes, if nodeI matches some other nodes.
if (merged) {
nodeI = findNode(list[i]);
TRANSITER_T<NodeIndex,LatticeTransition>
outTransIter(nodeI->outTransitions);
while (outTransIter.next(toNodeIndex)) {
LatticeNode * toNode = findNode(toNodeIndex);
toNode->unmarkNode(markedFlag);
}
}
}
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::approxRedNodeB: "
<< "adding nodes (";
}
// **************************************************
// preparing next level nodes for merging processing.
// **************************************************
node = findNode(nodeIndex);
if (!node) {
if (debug(DebugPrintFatalMessages)) {
dout() << "warning: (Lattice::approxRedNodeB) "
<< " this node " << nodeIndex << " get deleted!\n";
}
return false;
}
TRANSITER_T<NodeIndex,LatticeTransition>
transIter(node->inTransitions);
while (transIter.next(fromNodeIndex)) {
if (debug(DebugPrintInnerLoop)) {
dout() << "Lattice::approxRedNodeB: "
<< " " << fromNodeIndex;
}
nodeQueue.addToNodeQueueNoSamePrev(fromNodeIndex);
// nodeQueue.addToNodeQueue(fromNodeIndex);
}
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::approxRedNodeB: "
<< ") to the queue\n";
}
// mark that the current node has been processed.
node->markNode(markedFlag);
return true;
}
/* ********************************************************
An approximating algorithm for reducing bigram lattices.
combine nodes when their incoming or outgoing node sets overlap
a significant amount (decided by base and ratio).
******************************************************** */
Boolean
Lattice::approxRedBigramLattice(unsigned iters, int base, double ratio)
{
// keep track of number of node to know when to stop iteration
unsigned numNodes = getNumNodes();
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::approxRedBigramLattice: "
<< "starting reducing...."
<< " (" << numNodes << " nodes)\n";
}
// if it is a fast mode (default), it returns.
if (iters == 0) {
clearMarkOnAllNodes(markedFlag);
NodeQueue nodeQueue;
nodeQueue.addToNodeQueue(final);
// use width first approach to go through the whole lattice.
// mark the first level nodes and put them in the queue.
// going through the queue to process all the nodes in the lattice
// this is in a backward direction.
while (nodeQueue.is_empty() == false) {
NodeIndex nodeIndex = nodeQueue.popNodeQueue();
if (nodeIndex == initial) {
continue;
}
LatticeNode * node = findNode(nodeIndex);
if (!node) {
continue;
}
if (node->getFlag(markedFlag)) {
continue;
}
approxRedNodeB(nodeIndex, nodeQueue, base, ratio);
}
numNodes = getNumNodes();
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::approxRedBigramLattice: "
<< "done with only one B Reduction"
<< " (" << numNodes << " nodes)\n";
}
return true;
}
while (iters-- > 0) {
clearMarkOnAllNodes(markedFlag);
// the queue must be empty. Going Backward
NodeQueue nodeQueue;
nodeQueue.addToNodeQueue(final);
// use width first approach to go through the whole lattice.
// mark the first level nodes and put them in the queue.
// going through the queue to process all the nodes in the lattice
// this is in a backward direction.
while (nodeQueue.is_empty() == false) {
NodeIndex nodeIndex = nodeQueue.popNodeQueue();
if (nodeIndex == initial) {
continue;
}
LatticeNode * node = findNode(nodeIndex);
if (!node) {
continue;
}
if (node->getFlag(markedFlag)) {
continue;
}
approxRedNodeB(nodeIndex, nodeQueue, base, ratio);
}
// finish the Backward reduction
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::approxRedBigramLattice: "
<< "done with B Reduction\n";
}
clearMarkOnAllNodes(markedFlag);
// the queue must be empty. Going Forward
nodeQueue.addToNodeQueue(initial);
// use width first approach to go through the whole lattice.
// mark the first level nodes and put them in the queue.
// going through the queue to process all the nodes in the lattice
// this is in a forward direction.
while (nodeQueue.is_empty() == false) {
NodeIndex nodeIndex = nodeQueue.popNodeQueue();
if (nodeIndex == final) {
continue;
}
LatticeNode * node = findNode(nodeIndex);
if (!node) {
continue;
}
if (node->getFlag(markedFlag)) {
continue;
}
approxRedNodeF(nodeIndex, nodeQueue, base, ratio);
}
unsigned newNumNodes = getNumNodes();
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::approxRedBigramLattice: "
<< "done with F Reduction"
<< " (" << newNumNodes << " nodes)\n";
}
// finish one F-B iteration
// check that lattices got smaller -- if not, stop
if (newNumNodes == numNodes) {
break;
}
numNodes = newNumNodes;
}
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::approxRedBigramLattice: "
<< "done with multiple iteration(s)\n";
}
return true;
}
/*
* Reduce lattice by collapsing all nodes with the same word
* (except those in the exceptions sub-vocabulary)
*/
Boolean
Lattice::collapseSameWordNodes(SubVocab &exceptions)
{
LHash<VocabIndex, NodeIndex> wordToNodeMap;
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::collapseSameWordNodes: "
<< "starting with " << getNumNodes() << " nodes\n";
}
NodeIndex nodeIndex;
LHashIter<NodeIndex, LatticeNode> nodeIter(nodes);
while (LatticeNode *node = nodeIter.next(nodeIndex)) {
if (node->word != Vocab_None &&
!ignoreWord(node->word) &&
!exceptions.getWord(node->word))
{
Boolean foundP;
NodeIndex *oldNode = wordToNodeMap.insert(node->word, foundP);
if (!foundP) {
// word is new, save its node index
*oldNode = nodeIndex;
} else {
// word has been found before -- merge nodes
mergeNodes(*oldNode, nodeIndex);
}
}
}
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::collapseSameWordNodes: "
<< "finished with " << getNumNodes() << " nodes\n";
}
return true;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -