📄 latticereduce.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*
 * LatticeReduce.cc --
 *	Lattice compaction algorithms
 *
 */

#ifndef lint
static char Copyright[] = "Copyright (c) 1997-2006 SRI International.  All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lattice/src/RCS/LatticeReduce.cc,v 1.4 2006/01/06 05:35:36 stolcke Exp $";
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include "Lattice.h"

#include "SArray.cc"
#include "LHash.cc"
#include "Array.cc"

#define DebugPrintFatalMessages         0 

#define DebugPrintFunctionality         1 
// for large functionality listed in options of the program
#define DebugPrintOutLoop               2
// for out loop of the large functions or small functions
#define DebugPrintInnerLoop             3
// for inner loop of the large functions or outloop of small functions


/*
 * Compare two transition lists for equality
 */
Boolean
compareTransitions(const TRANS_T<NodeIndex,LatticeTransition> &transList1,
		   const TRANS_T<NodeIndex,LatticeTransition> &transList2)
{
    if (transList1.numEntries() != transList2.numEntries()) {
	return false;
    }

#ifdef USE_SARRAY
    // SArray already sorts indices internally
    TRANSITER_T<NodeIndex,LatticeTransition> transIter1(transList1);
    TRANSITER_T<NodeIndex,LatticeTransition> transIter2(transList2);

    NodeIndex node1, node2;
    while (transIter1.next(node1)) {
	if (!transIter2.next(node2) || node1 != node2) {
	    return false;
	}
    }
#else
    // assume random access is efficient
    TRANSITER_T<NodeIndex,LatticeTransition> transIter1(transList1);
    NodeIndex node1;
    while (transIter1.next(node1)) {
	if (!transList2.find(node1)) {
	    return false;
	}
    }
#endif

    return true;
}

/*
 * Merge two nodes 
 */
void
Lattice::mergeNodes(NodeIndex nodeIndex1, NodeIndex nodeIndex2,
			LatticeNode *node1, LatticeNode *node2, Boolean maxAdd)
{
    if (nodeIndex1 == nodeIndex2) {
	return;
    }

    if (node1 == 0) node1 = findNode(nodeIndex1);
    if (node2 == 0) node2 = findNode(nodeIndex2);

    assert(node1 != 0 && node2 != 0);

    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::mergeNodes: "
	     << "      i.e., (" << getWord(node1->word)
	     << ", " << getWord(node2->word) << ")\n";
    }

    // add the incoming trans of nodeIndex2 to nodeIndex1
    TRANSITER_T<NodeIndex,LatticeTransition> 
      inTransIter2(node2->inTransitions);
    NodeIndex fromNodeIndex;
    while (LatticeTransition *trans = inTransIter2.next(fromNodeIndex)) {
      insertTrans(fromNodeIndex, nodeIndex1, *trans, maxAdd);
    }

    // add the outgoing trans of nodeIndex2 to nodeIndex1
    TRANSITER_T<NodeIndex,LatticeTransition> 
      outTransIter2(node2->outTransitions);
    NodeIndex toNodeIndex;
    while (LatticeTransition *trans = outTransIter2.next(toNodeIndex)) {
      insertTrans(nodeIndex1, toNodeIndex, *trans, maxAdd);
    }

    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::mergeNodes: "
	     << "(" << nodeIndex2 << ") has been removed\n";
    }
    // delete this redudant node.
    removeNode(nodeIndex2); 
}

/*
 * Try merging successors of nodeIndex
 */
void
Lattice::packNodeF(NodeIndex nodeIndex, Boolean maxAdd)
{
    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::packNodeF: "
	     << "processing (" << nodeIndex << ") ***********\n"; 
    }

    LatticeNode *node = findNode(nodeIndex); 
    // skip nodes that have already beend deleted
    if (!node) {
      return;
    }

    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::packNodeF: "
	     << "      i.e., (" << getWord(node->word) << ")\n";
    }

    unsigned numTransitions = node->outTransitions.numEntries();
    makeArray(NodeIndex, nodeList, numTransitions); 
    makeArray(VocabIndex, wordList, numTransitions);
    
    // going in a forward direction
    // collect all the out-nodes of nodeIndex, because we need to 
    // be able to delete transitions while iterating over them later
    // Also, store the words on each node to allow quick equality checks
    // without findNode() later.  Note we cannot save the findNode() result
    // itself since node objects may more around as a result of deletions.
    TRANSITER_T<NodeIndex,LatticeTransition> 
      outTransIter(node->outTransitions);
    unsigned position = 0; 
    NodeIndex toNodeIndex;
    while (outTransIter.next(toNodeIndex)) {
	wordList[position] = findNode(toNodeIndex)->word;
	nodeList[position] = toNodeIndex; 
	position ++;
    }

    // do a pair-wise comparison for all the successor nodes.
    for (unsigned i = 0; i < numTransitions; i ++) {
	// check if node has been merged
	if (Map_noKeyP(nodeList[i])) continue;

	for (unsigned j = i + 1; j < numTransitions; j ++) {
	    LatticeNode *nodeI, *nodeJ;

	    if (!Map_noKeyP(nodeList[j]) &&
		wordList[i] == wordList[j] &&
		(nodeI = findNode(nodeList[i]),
		 nodeJ = findNode(nodeList[j]),
		 compareTransitions(nodeI->inTransitions,
				    nodeJ->inTransitions)))
	    {
		mergeNodes(nodeList[i], nodeList[j], nodeI, nodeJ, maxAdd);

		// mark node j as merged for check above
		Map_noKey(nodeList[j]);
	    }
	}
    }
}

/*
 * Try merging predecessors of nodeIndex
 */
void
Lattice::packNodeB(NodeIndex nodeIndex, Boolean maxAdd)
{
    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::packNodeB: "
	     << "processing (" << nodeIndex << ") ***********\n"; 
    }

    LatticeNode *node = findNode(nodeIndex); 
    // skip nodes that have already beend deleted
    if (!node) {
      return;
    }

    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::packNodeB: "
	     << "      i.e., (" << getWord(node->word) << ")\n";
    }

    unsigned numTransitions = node->inTransitions.numEntries();
    makeArray(NodeIndex, nodeList, numTransitions); 
    makeArray(VocabIndex, wordList, numTransitions);
    
    // going in a reverse direction
    // collect all the in-nodes of nodeIndex, because we need to 
    // be able to delete transitions while iterating over them
    // Also, store the words on each node to allow quick equality checks
    // without findNode() later.  Note we cannot save the findNode() result
    // itself since node objects may more around as a result of deletions.
    TRANSITER_T<NodeIndex,LatticeTransition> 
      inTransIter(node->inTransitions);
    unsigned position = 0; 
    NodeIndex fromNodeIndex;
    while (inTransIter.next(fromNodeIndex)) {
	wordList[position] = findNode(fromNodeIndex)->word;
	nodeList[position] = fromNodeIndex; 
	position ++;
    }

    // do a pair-wise comparison for all the predecessor nodes.
    for (unsigned i = 0; i < numTransitions; i ++) {
	// check if node has been merged
	if (Map_noKeyP(nodeList[i])) continue;

	for (unsigned j = i + 1; j < numTransitions; j ++) {
	    LatticeNode *nodeI, *nodeJ;

	    if (!Map_noKeyP(nodeList[j]) &&
	 	wordList[i] == wordList[j] &&
		(nodeI = findNode(nodeList[i]),
		 nodeJ = findNode(nodeList[j]),
		 compareTransitions(nodeI->outTransitions,
				    nodeJ->outTransitions)))
	    {
		mergeNodes(nodeList[i], nodeList[j], nodeI, nodeJ, maxAdd);

		// mark node j as merged for check above
		Map_noKey(nodeList[j]);
	    }
	}
    }
}

/* ******************************************************** 

   A straight forward implementation for packing bigram lattices.
   combine nodes when their incoming or outgoing node sets are the
   same.

   ******************************************************** */
Boolean 
Lattice::simplePackBigramLattice(unsigned iters, Boolean maxAdd)
{
    // keep track of number of node to know when to stop iteration
    unsigned numNodes = getNumNodes();

    Boolean onlyOne = false;

    if (iters == 0) {
	iters = 1;
	onlyOne = true; // do only one backward pass
    }
	
    if (debug(DebugPrintFunctionality)) {
      dout() << "Lattice::simplePackBigramLattice: "
	     << "starting packing...."
	     << " (" << numNodes  << " nodes)\n";
    }

    NodeIndex *sortedNodes = new NodeIndex[numNodes];
    assert(sortedNodes != 0);

    while (iters-- > 0) { 
      unsigned numReachable = sortNodes(sortedNodes, true);

      if (numReachable != numNodes) {
	dout() << "Lattice::simplePackBigramLattice: "
	       << "warning: there are " << (numNodes - numReachable)
	       << " unreachable nodes\n";
      }

      for (unsigned i = 0; i < numReachable; i ++) {
	packNodeB(sortedNodes[i], maxAdd);
      }

      unsigned newNumNodes = getNumNodes();
      
      // finish the Backward reduction
      if (debug(DebugPrintFunctionality)) {
	dout() << "Lattice::simplePackBigramLattice: "
	       << "done with B Reduction"
	       << " (" << newNumNodes  << " nodes)\n";
      }

      if (onlyOne) {
	break;
      }

      // now sort into forward topological order
      numReachable = sortNodes(sortedNodes, false);

      if (numReachable != newNumNodes) {
	dout() << "Lattice::simplePackBigramLattice: "
	       << "warning: there are " << (newNumNodes - numReachable)
	       << " unreachable nodes\n";
      }

      for (unsigned i = 0; i < numReachable; i ++) {
	packNodeF(sortedNodes[i], maxAdd);
      }

      newNumNodes = getNumNodes();

      if (debug(DebugPrintFunctionality)) {
	dout() << "Lattice::simplePackBigramLattice: "
	       << "done with F Reduction"
	       << " (" << newNumNodes  << " nodes)\n";
      }
      // finish one F-B iteration

      // check that lattices got smaller -- if not, stop
      if (newNumNodes == numNodes) {
	break;
      }
      numNodes = newNumNodes;
    }

    delete [] sortedNodes;
    return true; 
}

/* ********************************************************
   this procedure tries to compare two nodes to see whether their
   outgoing node sets overlap more than x.
   ******************************************************** */

Boolean 
Lattice::approxMatchInTrans(NodeIndex nodeIndexI, NodeIndex nodeIndexJ,
			     int overlap)
{
    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::approxMatchInTrans: "
	     << "try to match (" << nodeIndexI
	     << ", " << nodeIndexJ << ") with overlap "
	     << overlap << "\n"; 
    }
12 3 下一页
💿 文件大小 3034 K
👤 上传用户 wanghaihah
📂 所属分类其他
🏷️ 相关标签

#工具包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -