📄 latticereduce.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 3 页
字号:
上一页 1 23

    // merging incoming nodes
    TRANSITER_T<NodeIndex,LatticeTransition> 
      inTransIterJ(nodeJ->inTransitions);
    while (LatticeTransition *trans = inTransIterJ.next(fromNodeIndex)) {
      insertTrans(fromNodeIndex, nodeIndexI, *trans);
    }
    
    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::approxMatchOutTrans: "
	     << "(" << nodeIndexJ << ") has been merged with "
	     << "(" << nodeIndexI << ") and has been removed\n";
    }

    // delete this redudant node.
    removeNode(nodeIndexJ); 
    
    return true;
}

/* ********************************************************
   this procedure is to pack two nodes if their OutNode sets 
   overlap beyong certain threshold
   
   input: 
   1) nodeIndex: the node to be processed;
   2) nodeQueue: the current queue.
   3) base: overlap base
   4) ratio: overlap ratio

   function:
       going through all the in-nodes of nodeIndex, and tries to merge
       as many as possible.
   ******************************************************** */
Boolean 
Lattice::approxRedNodeB(NodeIndex nodeIndex, NodeQueue &nodeQueue, 
			int base, double ratio)
{

    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::approxRedNodeB: "
	     << "processing (" << nodeIndex << ") ***********\n"; 
    }

    NodeIndex fromNodeIndex, toNodeIndex; 
    LatticeNode * node = findNode(nodeIndex); 
    if (!node) { // skip through this node, being merged.
      return true; 
    }

    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::approxRedNodeB: "
	     << "      i.e., (" << getWord(node->word) << ")\n";
    }

    // going in a backward direction
    TRANSITER_T<NodeIndex,LatticeTransition> 
      inTransIter(node->inTransitions);
    unsigned numTransitions = node->inTransitions.numEntries();
    makeArray(NodeIndex, list, numTransitions+1); 
    unsigned position = 0; 
    
    // collect all the in-nodes of nodeIndex
    while (inTransIter.next(fromNodeIndex)) {
      list[position++] = fromNodeIndex; 
    }

    // do a pair-wise comparison for all the in-nodes.
    unsigned i, j; 
    for (i = 0; i< position; i++) {
        j = i+1; 

	if (j >= position) { 
	  break; }

	LatticeNode * nodeI = findNode(list[i]);
	// ***********************************
	// compare nodeI with nodeJ:
	//    Notice that numInTransI changes because
	//    merger occurs in the loop. this is different
	//    from exact match case.   
	// ***********************************
	unsigned merged = 0; 
	while (j < position) {
	  if (debug(DebugPrintInnerLoop)) {
	    dout() << "Lattice::approxRedNodeB: "
		   << "comparing (" << list[i] << ", "
		   << list[j] << ")\n"; 
	  }

          LatticeNode * nodeI = findNode(list[i]);
	  unsigned numOutTransI = nodeI->outTransitions.numEntries();

	  LatticeNode * nodeJ = findNode(list[j]);
	  unsigned numOutTransJ = nodeJ->outTransitions.numEntries();
	  if (nodeI->word == nodeJ->word) {
	    unsigned overlap; 
	    if ((!base && (numOutTransI < numOutTransJ)) ||
		(base && (numOutTransI > numOutTransJ))) {
	      overlap = (unsigned ) floor(numOutTransI*ratio + 0.5);
	    } else {
	      overlap = (unsigned ) floor(numOutTransJ*ratio + 0.5);
	    }

	    overlap = (overlap > 0) ? overlap : 1;
	    if (approxMatchOutTrans(list[i], list[j], overlap)) {
	      merged = 1; 
	      list[j] = list[--position];
	      continue;
	    } 
	  } 
	  j++; 
	}

	// clear marks on the outNodes, if nodeI matches some other nodes.
	if (merged) {
	  nodeI = findNode(list[i]);
	  TRANSITER_T<NodeIndex,LatticeTransition> 
	    outTransIter(nodeI->outTransitions);
	  while (outTransIter.next(toNodeIndex)) {
	    LatticeNode * toNode = findNode(toNodeIndex); 
	    toNode->unmarkNode(markedFlag); 
	  }
	}
    }

    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::approxRedNodeB: "
	     << "adding nodes ("; 
    }

    // **************************************************
    // preparing next level nodes for merging processing.
    // **************************************************
    node = findNode(nodeIndex); 

    if (!node) { 
      if (debug(DebugPrintFatalMessages)) {
	dout() << "warning: (Lattice::approxRedNodeB) "
	       <<  " this node " << nodeIndex << " get deleted!\n";
      }
      return false;
    }
	
    TRANSITER_T<NodeIndex,LatticeTransition> 
      transIter(node->inTransitions);
    while (transIter.next(fromNodeIndex)) {
      if (debug(DebugPrintInnerLoop)) {
	dout() << "Lattice::approxRedNodeB: "
	       << " " << fromNodeIndex; 
      }
	  
      nodeQueue.addToNodeQueueNoSamePrev(fromNodeIndex); 
      // nodeQueue.addToNodeQueue(fromNodeIndex); 
    }

    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::approxRedNodeB: "
	     << ") to the queue\n"; 
    }

    // mark that the current node has been processed.
    node->markNode(markedFlag);
    
    return true;
}



/* ******************************************************** 

   An approximating algorithm for reducing bigram lattices.
   combine nodes when their incoming or outgoing node sets overlap
   a significant amount (decided by base and ratio).

   ******************************************************** */
Boolean 
Lattice::approxRedBigramLattice(unsigned iters, int base, double ratio)
{
    // keep track of number of node to know when to stop iteration
    unsigned numNodes = getNumNodes();

    if (debug(DebugPrintFunctionality)) {
      dout() << "Lattice::approxRedBigramLattice: "
	     << "starting reducing...."
	     << " (" << numNodes  << " nodes)\n";
    }
    // if it is a fast mode (default), it returns.
    if (iters == 0) {
      clearMarkOnAllNodes(markedFlag);

      NodeQueue nodeQueue; 
      nodeQueue.addToNodeQueue(final); 

      // use width first approach to go through the whole lattice.
      // mark the first level nodes and put them in the queue.
      // going through the queue to process all the nodes in the lattice
      // this is in a backward direction.
      while (nodeQueue.is_empty() == false) {
	NodeIndex nodeIndex = nodeQueue.popNodeQueue();

	if (nodeIndex == initial) {
	  continue;
	}
	LatticeNode * node = findNode(nodeIndex);
	if (!node) { 
	  continue; 
	}
	if (node->getFlag(markedFlag)) {
	  continue;
	}
	approxRedNodeB(nodeIndex, nodeQueue, base, ratio); 
      }

      numNodes = getNumNodes();

      if (debug(DebugPrintFunctionality)) {
	dout() << "Lattice::approxRedBigramLattice: "
	       << "done with only one B Reduction"
	       << " (" << numNodes  << " nodes)\n";
      }
      return true;
    }

    while (iters-- > 0) { 
      clearMarkOnAllNodes(markedFlag);

      // the queue must be empty. Going Backward
      NodeQueue nodeQueue; 
      nodeQueue.addToNodeQueue(final); 

      // use width first approach to go through the whole lattice.
      // mark the first level nodes and put them in the queue.
      // going through the queue to process all the nodes in the lattice
      // this is in a backward direction.
      while (nodeQueue.is_empty() == false) {
	NodeIndex nodeIndex = nodeQueue.popNodeQueue();
	
	if (nodeIndex == initial) {
	  continue;
	}
	LatticeNode * node = findNode(nodeIndex);
	if (!node) { 
	  continue; 
	}
	if (node->getFlag(markedFlag)) {
	  continue;
	}

	approxRedNodeB(nodeIndex, nodeQueue, base, ratio); 
      }
      // finish the Backward reduction
      if (debug(DebugPrintFunctionality)) {
	dout() << "Lattice::approxRedBigramLattice: "
	       << "done with B Reduction\n"; 
      }
      clearMarkOnAllNodes(markedFlag);

      // the queue must be empty. Going Forward
      nodeQueue.addToNodeQueue(initial); 

      // use width first approach to go through the whole lattice.
      // mark the first level nodes and put them in the queue.
      // going through the queue to process all the nodes in the lattice
      // this is in a forward direction.
      while (nodeQueue.is_empty() == false) {
	NodeIndex nodeIndex = nodeQueue.popNodeQueue();

	if (nodeIndex == final) {
	  continue;
	}
	LatticeNode * node = findNode(nodeIndex);
	if (!node) { 
	  continue; 
	}
	if (node->getFlag(markedFlag)) {
	  continue;
	}
	
	approxRedNodeF(nodeIndex, nodeQueue, base, ratio); 
      }

      unsigned newNumNodes = getNumNodes();

      if (debug(DebugPrintFunctionality)) {
	dout() << "Lattice::approxRedBigramLattice: "
	       << "done with F Reduction"
	       << " (" << newNumNodes  << " nodes)\n";
      }
      // finish one F-B iteration

      // check that lattices got smaller -- if not, stop
      if (newNumNodes == numNodes) {
	break;
      }
      numNodes = newNumNodes;
    }
    if (debug(DebugPrintFunctionality)) {
      dout() << "Lattice::approxRedBigramLattice: "
	     << "done with multiple iteration(s)\n"; 
    }
    return true; 
}

/*
 * Reduce lattice by collapsing all nodes with the same word
 *	(except those in the exceptions sub-vocabulary)
 */
Boolean
Lattice::collapseSameWordNodes(SubVocab &exceptions)
{
    LHash<VocabIndex, NodeIndex> wordToNodeMap;

    if (debug(DebugPrintFunctionality)) {
      dout() << "Lattice::collapseSameWordNodes: "
	     << "starting with " << getNumNodes() << " nodes\n";
    }

    NodeIndex nodeIndex;
    LHashIter<NodeIndex, LatticeNode> nodeIter(nodes);
    while (LatticeNode *node = nodeIter.next(nodeIndex)) {

	if (node->word != Vocab_None &&
	    !ignoreWord(node->word) &&
	    !exceptions.getWord(node->word))
	{
	    Boolean foundP;

	    NodeIndex *oldNode = wordToNodeMap.insert(node->word, foundP);

	    if (!foundP) {
		// word is new, save its node index
		*oldNode = nodeIndex;
	    } else {
		// word has been found before -- merge nodes
		mergeNodes(*oldNode, nodeIndex);
	    }
	}
    }

    if (debug(DebugPrintFunctionality)) {
      dout() << "Lattice::collapseSameWordNodes: "
	     << "finished with " << getNumNodes() << " nodes\n";
    }

    return true;
}
上一页 1 23
💿 文件大小 3034 K
👤 上传用户 wanghaihah
📂 所属分类其他
🏷️ 相关标签

#工具包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -