⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 latticeexpand.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 4 页
字号:
    if (!node) {
        if (debug(DebugPrintFatalMessages)) {
	  dout() << "Fatal Error in Lattice::expandNodeToCompactTrigram: "
		 << "current node has lost!\n";
	}
	exit(-1); 
    }

    LatticeTransition * selfLoop = node->inTransitions.find(nodeIndex); 
    Boolean selfLoopFlag; 
    if (selfLoop) { 
      selfLoopFlag = true; 
      initASelfLoopDB(selfLoopDB, ngram, nodeIndex, node, selfLoop); 
    } else {
      selfLoopFlag = false; 
    }

    TRANSITER_T<NodeIndex,LatticeTransition> inTransIter(node->inTransitions);
    TRANSITER_T<NodeIndex,LatticeTransition> outTransIter(node->outTransitions);

    VocabIndex wordName = node->word; 

    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::expandNodeToCompactTrigram: "
	     << " processing word name: " << getWord(wordName) << ", Index: " 
	     << nodeIndex << "\n";
    }

    // going through all its incoming edges
    unsigned numInTrans = node->inTransitions.numEntries(); 

    while (LatticeTransition *inTrans = inTransIter.next(fromNodeIndex)) {

      if (nodeIndex == fromNodeIndex) {
	if (debug(DebugPrintInnerLoop)) {
	  dout() << "Lattice::expandNodeToCompactTrigram: "
		 << "jump over self loop: " 
		 << fromNodeIndex << "\n"; 
	}
	continue; 
      }

      fromNode = findNode(fromNodeIndex); 
      if (!fromNode) {
	if (debug(DebugPrintFatalMessages)) {
	  dout() << "Fatal Error in Lattice::expandNodeToCompactTrigram: "
		 << "fromNode " 
		 << fromNodeIndex << " doesn't exist!\n";
	}
	exit(-1); 
      }
      VocabIndex fromWordName = fromNode->word; 

      if (debug(DebugPrintInnerLoop)) {
	dout() << "Lattice::expandNodeToCompactTrigram: "
	       << "processing incoming edge: (" 
	       << fromNodeIndex 
	       << ", " << nodeIndex << ")\n"
	       << "      (" << getWord(fromWordName) << ", "
	       << getWord(wordName) << ")\n"; 
      }

      // compute in-coming bigram prob
      LogP inWeight; 
      if (fromNodeIndex == getInitial()) {
	context[0] = fromWordName; 
	context[1] = Vocab_None; 
	// this transition can have never been processed.
	inWeight = ngram.wordProb(wordName, context);
	inTrans->weight = inWeight; 

	if (debug(DebugPrintInnerLoop)) {
	  dout() << "Lattice::expandNodeToCompactTrigram: "
		 << "processing incoming edge: (" 
		 << fromNodeIndex 
		 << ", " << nodeIndex << ")\n"
		 << "      (" << getWord(fromWordName)
		 << ", " << getWord(wordName) << ") = "
		 << "  " << inWeight << ";\n"; 
	}
      } else { 
	// the in-coming trans has been processed and 
	// we should preserve this value.
	inWeight = inTrans->weight;
      }

      context[0] = wordName; 
      context[1] = fromWordName; 
      context[2] = Vocab_None; 

      // LogP inWeight = ngram.wordProb(wordName, context);
      unsigned inFlag = inTrans->flags; 

      // initialize it for self loop processing.
      if (selfLoopFlag) {
	initBSelfLoopDB(selfLoopDB, ngram, fromNodeIndex, fromNode, inTrans); }

      // going through all the outgoing edges
      outTransIter.init(); 
      while (LatticeTransition *outTrans = outTransIter.next(toNodeIndex)) {
	
	if (nodeIndex == toNodeIndex) {
	  if (debug(DebugPrintInnerLoop)) {
	    dout() << "Lattice::expandNodeToCompactTrigram: "
		   << "self loop: " 
		   << toNodeIndex << "\n"; 
	  }
	  continue; 
	}

	LatticeNode * toNode = findNode(toNodeIndex); 
	if (!toNode) {
	  if (debug(DebugPrintFatalMessages)) {
	    dout() << "Fatal Error in Lattice::expandNodeToCompactTrigram: "
		   << "toNode " 
		   << toNode << " doesn't exist!\n";
	  }
	  exit(-1); 
	}

	if (debug(DebugPrintInnerLoop)) {
	  dout() << "Lattice::expandNodeToCompactTrigram: "
		 << "the toNodeIndex (" 
		 << toNodeIndex << " has name "
		 << getWord(toNode->word) << ")\n"; 
	}

	// initialize selfLoopDB;
	if (selfLoopFlag) { 
	  initCSelfLoopDB(selfLoopDB, toNodeIndex, outTrans); 
	}

	// duplicate a node if the trigram exists.
	// see class Ngram in file /home/srilm/devel/lm/src/Ngram.h
	
	LogP * triProb; 

	if (triProb = ngram.findProb(toNode->word, context)) {
	  LogP logProb = *triProb; 
	  
	  if (debug(DebugPrintInnerLoop)) {
	    dout() << "Lattice::expandNodeToCompactTrigram: "
		   << "tripleIndex (" 
		   << toNodeIndex << " | " << nodeIndex << ", "
		   << fromNodeIndex << ")\n"
		   << "      trigram prob: (" 
		   << getWord(toNode->word) << " | " << context << ") found!\n"; 
	  }

	  // create one node and two edges to place trigram prob
	  PackInput packInput;
	  packInput.fromWordName = fromWordName;
	  packInput.wordName = wordName;
	  packInput.toWordName = toNode->word;
	  packInput.fromNodeIndex = fromNodeIndex;
	  packInput.toNodeIndex = toNodeIndex; 
	  packInput.inWeight = inWeight; 
	  packInput.inFlag = inFlag; 
	  packInput.outWeight = logProb; 
	  packInput.outFlag = outTrans->flags; 
	  packInput.nodeIndex = nodeIndex; 
	  packInput.toNodeId = 0;
	  packInput.lm = 0;
	  
	  packedNodeList.packNodes(*this, packInput); 

	  // to remove the outGoing edge if all the outgoing nodes have
	  // trigram probs.
	  if (numInTrans == 1 && 
	      !(outTrans->getFlag(reservedTFlag))) {

	    if (debug(DebugPrintInnerLoop)) {
	      dout() << "Lattice::expandNodeToCompactTrigram: "
		     << "outgoing edge: (" 
		     << nodeIndex << ", " << toNodeIndex << ") is removed\n"; 
	    }
	    removeTrans(nodeIndex, toNodeIndex); 
	  }

	  if (maxNodes > 0 && getNumNodes() > maxNodes) {
	    dout() << "Lattice::expandNodeToCompactTrigram: "
		   << "aborting with number of nodes exceeding "
		   << maxNodes << endl;
	    return false;
	  }
	} else {
	  // there is no trigram prob for this context

	  if (debug(DebugPrintInnerLoop)) {
	    dout() << "Lattice::expandNodeToCompactTrigram: "
		   << "no trigram context (" 
		   << context << ") has been found -- keep " 
		   << fromNodeIndex << "\n"; 
	  }

	  // note down backoff context and in-coming node for 
	  // preservation, in case explicit trigram does not exist.
	  bowContext = context; 
	  outTrans->markTrans(reservedTFlag); 
	  backoffNodeIndex = fromNodeIndex;
	}
	
	// processing selfLoop
	if (selfLoopFlag) { 
	  expandSelfLoop(ngram, selfLoopDB, packedSelfLoopNodeList); 
	}
      }	  // end of inter-loop

      // processing incoming bigram cases.
      if (!bowContext) {
  	  // for this context, all the toNodes have trigram probs

	  if (debug(DebugPrintInnerLoop)) {
	    dout() << "Lattice::expandNodeToCompactTrigram: "
		   << "incoming edge ("
		   << fromNodeIndex << ", " << nodeIndex
		   << ") is removed\n"; 
	  }

  	  removeTrans(fromNodeIndex, nodeIndex); 
      } else {
	  if (debug(DebugPrintInnerLoop)) {
	      dout() << "Lattice::expandNodeToCompactTrigram: "
		     << "updating trigram backoffs on edge("
		     << fromNodeIndex << ", " << nodeIndex << ")\n"; 
	  }

 	  LogP * wordBOW = ngram.findBOW(bowContext);
	  if (!(wordBOW)) {
  	      if (debug(DebugPrintOutLoop)) {
		dout() << "nonFatal Error in Lattice::expandNodeToCompactTrigram: "
		       << "language model - BOW (" 
		       << bowContext << ") missing!\n";
	      }

	      static LogP zerobow = 0.0; 
	      wordBOW = &zerobow; 
	  }

	  LogP logProbW = *wordBOW; 
	  LogP weight = combWeights(inWeight, logProbW); 

	  setWeightTrans(backoffNodeIndex, nodeIndex, weight); 

	  bowContext = 0;
	  inBOW = 1; 
      }

      numInTrans--;
    } // end of out-loop

    // if trigram prob exist for all the tri-node paths
    if (!inBOW) {

        if (debug(DebugPrintInnerLoop)) {
	  dout() << "Lattice::expandNodeToCompactTrigram: "
		 << "node "
		 << getWord(wordName) << " (" << nodeIndex
		 << ") has trigram probs for all its contexts\n"
		 << " and its bigram lattice node is removed\n"; 
	}

        removeNode(nodeIndex); 
    } else {
        node = findNode(nodeIndex);
        if (selfLoopFlag) { 
	  node->inTransitions.remove(nodeIndex);
	  node = findNode(nodeIndex);
	  selfLoop = node->outTransitions.remove(nodeIndex);
	  if (!selfLoop) {

 	      if (debug(DebugPrintFatalMessages)) {
		dout() << "nonFatal Error in Lattice::expandNodeToCompactTrigram: "
		       << "non symetric setting \n";
	      }
	      exit(-1); 
	  }
	} 

	// process backoff to bigram weights. 
	TRANSITER_T<NodeIndex,LatticeTransition> 
	  outTransIter(node->outTransitions);
	while (LatticeTransition *outTrans = outTransIter.next(toNodeIndex)) {
	  
	  LatticeNode * toNode = findNode(toNodeIndex);
	  context[0] = wordName; 
	  context[1] = Vocab_None; 
	  LogP weight = ngram.wordProb(toNode->word, context); 

	  setWeightTrans(nodeIndex, toNodeIndex, weight); 
	}
    }

    return true; 
}

Boolean 
Lattice::expandToCompactTrigram(Ngram &ngram, unsigned maxNodes)
{
    if (debug(DebugPrintFunctionality)) {
      dout() << "Lattice::expandToCompactTrigram: "
	     << "starting expansion to compact trigram lattice ...\n";
    }

    unsigned numNodes = getNumNodes(); 

    NodeIndex *sortedNodes = new NodeIndex[numNodes];
    assert(sortedNodes != 0);
    unsigned numReachable = sortNodes(sortedNodes);

    if (numReachable != numNodes) {
      if (debug(DebugPrintOutLoop)) {
	dout() << "Lattice::expandToCompactTrigram: warning: called with unreachable nodes\n";
      }
    }

    for (unsigned i = 0; i < numReachable; i++) {
      NodeIndex nodeIndex = sortedNodes[i];

      if (nodeIndex == initial || nodeIndex == final) {
	continue;
      }
      if (!expandNodeToCompactTrigram(nodeIndex, ngram, maxNodes)) {
        delete [] sortedNodes;
	return false;
      }
    }

    delete [] sortedNodes;
    return true; 
}

/*
 * Expand lattice to implement general LMs
 * Algorithm: replace each node in lattice with copies that are 
 * associated with specific LM contexts. The mapping 
 *	(original node, context) -> new node
 * is constructed incrementally as the lattice is traversed in topological
 * order.
 *
 *	expandMap[startNode, <s>] := newStartNode;
 *	expandMap[endNode, </s>] := newEndNode;
 * 
 *	for oldNode in topological order
 *	    for expandMap[oldNode, c] = newNode
 *		for oldNode2 in successors(oldNode)
 *		    c2 = lmcontext(c + word(oldNode2));
 *		    find or create expandMap[oldNode2, c2] = newNode2;
 *		    word(newNode2) := word(oldNodes2);
 *		    prob(newNode->newNode2) := P(word(newNode2) | c);
 *	    delete oldNode;
 *	    delete expandMap[oldNode]; # to save space
 *
 * As an optimization, we let
 *
 *	lmcontext(c + word(oldNode2)) be the longest context used by the LM
 *		for predicting words following oldNode2 in the lattice, and
 *	BOW(c2) be the backoff weight associated with backing off from the 
 *		full LM context (c + word(oldNode2)) to c2
 *
 * Nodes with NULL or pause are handled by ignoring them in context
 * construction, but otherwise handling (i.e., duplicating) them as above.
 */
Boolean
Lattice::expandNodeToLM(VocabIndex oldIndex, LM &lm, unsigned maxNodes,
			Map2<NodeIndex, VocabContext, NodeIndex> &expandMap)
{
    unsigned insufficientLookaheadNodes = 0;

    Map2Iter2<NodeIndex, VocabContext, NodeIndex>
#ifdef USE_SARRAY_MAP2
				expandIter(expandMap, oldIndex);
#else
				expandIter(expandMap, oldIndex, ngramCompare);
#endif
    NodeIndex *newIndex;
    VocabContext context;

    while (newIndex = expandIter.next(context)) {

	// node structure might have been moved as a result of insertions
	LatticeNode *oldNode = findNode(oldIndex);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -