📄 latticereduce.cc
字号:
/*
* LatticeReduce.cc --
* Lattice compaction algorithms
*
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 1997-2006 SRI International. All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lattice/src/RCS/LatticeReduce.cc,v 1.4 2006/01/06 05:35:36 stolcke Exp $";
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "Lattice.h"
#include "SArray.cc"
#include "LHash.cc"
#include "Array.cc"
#define DebugPrintFatalMessages 0
#define DebugPrintFunctionality 1
// for large functionality listed in options of the program
#define DebugPrintOutLoop 2
// for out loop of the large functions or small functions
#define DebugPrintInnerLoop 3
// for inner loop of the large functions or outloop of small functions
/*
* Compare two transition lists for equality
*/
Boolean
compareTransitions(const TRANS_T<NodeIndex,LatticeTransition> &transList1,
const TRANS_T<NodeIndex,LatticeTransition> &transList2)
{
if (transList1.numEntries() != transList2.numEntries()) {
return false;
}
#ifdef USE_SARRAY
// SArray already sorts indices internally
TRANSITER_T<NodeIndex,LatticeTransition> transIter1(transList1);
TRANSITER_T<NodeIndex,LatticeTransition> transIter2(transList2);
NodeIndex node1, node2;
while (transIter1.next(node1)) {
if (!transIter2.next(node2) || node1 != node2) {
return false;
}
}
#else
// assume random access is efficient
TRANSITER_T<NodeIndex,LatticeTransition> transIter1(transList1);
NodeIndex node1;
while (transIter1.next(node1)) {
if (!transList2.find(node1)) {
return false;
}
}
#endif
return true;
}
/*
* Merge two nodes
*/
void
Lattice::mergeNodes(NodeIndex nodeIndex1, NodeIndex nodeIndex2,
LatticeNode *node1, LatticeNode *node2, Boolean maxAdd)
{
if (nodeIndex1 == nodeIndex2) {
return;
}
if (node1 == 0) node1 = findNode(nodeIndex1);
if (node2 == 0) node2 = findNode(nodeIndex2);
assert(node1 != 0 && node2 != 0);
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::mergeNodes: "
<< " i.e., (" << getWord(node1->word)
<< ", " << getWord(node2->word) << ")\n";
}
// add the incoming trans of nodeIndex2 to nodeIndex1
TRANSITER_T<NodeIndex,LatticeTransition>
inTransIter2(node2->inTransitions);
NodeIndex fromNodeIndex;
while (LatticeTransition *trans = inTransIter2.next(fromNodeIndex)) {
insertTrans(fromNodeIndex, nodeIndex1, *trans, maxAdd);
}
// add the outgoing trans of nodeIndex2 to nodeIndex1
TRANSITER_T<NodeIndex,LatticeTransition>
outTransIter2(node2->outTransitions);
NodeIndex toNodeIndex;
while (LatticeTransition *trans = outTransIter2.next(toNodeIndex)) {
insertTrans(nodeIndex1, toNodeIndex, *trans, maxAdd);
}
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::mergeNodes: "
<< "(" << nodeIndex2 << ") has been removed\n";
}
// delete this redudant node.
removeNode(nodeIndex2);
}
/*
* Try merging successors of nodeIndex
*/
void
Lattice::packNodeF(NodeIndex nodeIndex, Boolean maxAdd)
{
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::packNodeF: "
<< "processing (" << nodeIndex << ") ***********\n";
}
LatticeNode *node = findNode(nodeIndex);
// skip nodes that have already beend deleted
if (!node) {
return;
}
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::packNodeF: "
<< " i.e., (" << getWord(node->word) << ")\n";
}
unsigned numTransitions = node->outTransitions.numEntries();
makeArray(NodeIndex, nodeList, numTransitions);
makeArray(VocabIndex, wordList, numTransitions);
// going in a forward direction
// collect all the out-nodes of nodeIndex, because we need to
// be able to delete transitions while iterating over them later
// Also, store the words on each node to allow quick equality checks
// without findNode() later. Note we cannot save the findNode() result
// itself since node objects may more around as a result of deletions.
TRANSITER_T<NodeIndex,LatticeTransition>
outTransIter(node->outTransitions);
unsigned position = 0;
NodeIndex toNodeIndex;
while (outTransIter.next(toNodeIndex)) {
wordList[position] = findNode(toNodeIndex)->word;
nodeList[position] = toNodeIndex;
position ++;
}
// do a pair-wise comparison for all the successor nodes.
for (unsigned i = 0; i < numTransitions; i ++) {
// check if node has been merged
if (Map_noKeyP(nodeList[i])) continue;
for (unsigned j = i + 1; j < numTransitions; j ++) {
LatticeNode *nodeI, *nodeJ;
if (!Map_noKeyP(nodeList[j]) &&
wordList[i] == wordList[j] &&
(nodeI = findNode(nodeList[i]),
nodeJ = findNode(nodeList[j]),
compareTransitions(nodeI->inTransitions,
nodeJ->inTransitions)))
{
mergeNodes(nodeList[i], nodeList[j], nodeI, nodeJ, maxAdd);
// mark node j as merged for check above
Map_noKey(nodeList[j]);
}
}
}
}
/*
* Try merging predecessors of nodeIndex
*/
void
Lattice::packNodeB(NodeIndex nodeIndex, Boolean maxAdd)
{
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::packNodeB: "
<< "processing (" << nodeIndex << ") ***********\n";
}
LatticeNode *node = findNode(nodeIndex);
// skip nodes that have already beend deleted
if (!node) {
return;
}
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::packNodeB: "
<< " i.e., (" << getWord(node->word) << ")\n";
}
unsigned numTransitions = node->inTransitions.numEntries();
makeArray(NodeIndex, nodeList, numTransitions);
makeArray(VocabIndex, wordList, numTransitions);
// going in a reverse direction
// collect all the in-nodes of nodeIndex, because we need to
// be able to delete transitions while iterating over them
// Also, store the words on each node to allow quick equality checks
// without findNode() later. Note we cannot save the findNode() result
// itself since node objects may more around as a result of deletions.
TRANSITER_T<NodeIndex,LatticeTransition>
inTransIter(node->inTransitions);
unsigned position = 0;
NodeIndex fromNodeIndex;
while (inTransIter.next(fromNodeIndex)) {
wordList[position] = findNode(fromNodeIndex)->word;
nodeList[position] = fromNodeIndex;
position ++;
}
// do a pair-wise comparison for all the predecessor nodes.
for (unsigned i = 0; i < numTransitions; i ++) {
// check if node has been merged
if (Map_noKeyP(nodeList[i])) continue;
for (unsigned j = i + 1; j < numTransitions; j ++) {
LatticeNode *nodeI, *nodeJ;
if (!Map_noKeyP(nodeList[j]) &&
wordList[i] == wordList[j] &&
(nodeI = findNode(nodeList[i]),
nodeJ = findNode(nodeList[j]),
compareTransitions(nodeI->outTransitions,
nodeJ->outTransitions)))
{
mergeNodes(nodeList[i], nodeList[j], nodeI, nodeJ, maxAdd);
// mark node j as merged for check above
Map_noKey(nodeList[j]);
}
}
}
}
/* ********************************************************
A straight forward implementation for packing bigram lattices.
combine nodes when their incoming or outgoing node sets are the
same.
******************************************************** */
Boolean
Lattice::simplePackBigramLattice(unsigned iters, Boolean maxAdd)
{
// keep track of number of node to know when to stop iteration
unsigned numNodes = getNumNodes();
Boolean onlyOne = false;
if (iters == 0) {
iters = 1;
onlyOne = true; // do only one backward pass
}
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::simplePackBigramLattice: "
<< "starting packing...."
<< " (" << numNodes << " nodes)\n";
}
NodeIndex *sortedNodes = new NodeIndex[numNodes];
assert(sortedNodes != 0);
while (iters-- > 0) {
unsigned numReachable = sortNodes(sortedNodes, true);
if (numReachable != numNodes) {
dout() << "Lattice::simplePackBigramLattice: "
<< "warning: there are " << (numNodes - numReachable)
<< " unreachable nodes\n";
}
for (unsigned i = 0; i < numReachable; i ++) {
packNodeB(sortedNodes[i], maxAdd);
}
unsigned newNumNodes = getNumNodes();
// finish the Backward reduction
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::simplePackBigramLattice: "
<< "done with B Reduction"
<< " (" << newNumNodes << " nodes)\n";
}
if (onlyOne) {
break;
}
// now sort into forward topological order
numReachable = sortNodes(sortedNodes, false);
if (numReachable != newNumNodes) {
dout() << "Lattice::simplePackBigramLattice: "
<< "warning: there are " << (newNumNodes - numReachable)
<< " unreachable nodes\n";
}
for (unsigned i = 0; i < numReachable; i ++) {
packNodeF(sortedNodes[i], maxAdd);
}
newNumNodes = getNumNodes();
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::simplePackBigramLattice: "
<< "done with F Reduction"
<< " (" << newNumNodes << " nodes)\n";
}
// finish one F-B iteration
// check that lattices got smaller -- if not, stop
if (newNumNodes == numNodes) {
break;
}
numNodes = newNumNodes;
}
delete [] sortedNodes;
return true;
}
/* ********************************************************
this procedure tries to compare two nodes to see whether their
outgoing node sets overlap more than x.
******************************************************** */
Boolean
Lattice::approxMatchInTrans(NodeIndex nodeIndexI, NodeIndex nodeIndexJ,
int overlap)
{
if (debug(DebugPrintOutLoop)) {
dout() << "Lattice::approxMatchInTrans: "
<< "try to match (" << nodeIndexI
<< ", " << nodeIndexJ << ") with overlap "
<< overlap << "\n";
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -