⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 latticeexpand.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 4 页
字号:
    context[2] = Vocab_None; 

    LatticeNode *toNode = findNode(toNodeIndex); 
    VocabIndex toWordName = toNode->word; 

    LogP triProb = lm.wordProb(toWordName, context);

    unsigned usedContextLength;
    lm.contextID(context, usedContextLength);

    context[1] = Vocab_None;
    LogP biProb = lm.wordProb(toWordName, context);

    LogP postToProb; 

    if (usedContextLength > 1) {

      // get trigram prob for (post, to) edge: p(c|a, a)
      postToProb = triProb; 

      // create post node and loop if it doesn't exist;
      if (!selfLoopDB.postNodeIndex3) {
	selfLoopDB.postNodeIndex3 = 
	  postNodeIndex = dupNode(wordName, markedFlag);
	
	// create the loop, put trigram prob p(a|a,a) on the loop
	LatticeTransition t(selfLoopDB.loopProb, selfLoopDB.selfTransFlags);
	insertTrans(postNodeIndex, postNodeIndex, t); 
	// end of creating of loop
      }

      postNodeIndex = selfLoopDB.postNodeIndex3; 
      id = 3; 

    } else {
      
      // get an adjusted weight for the link between preNode to postNode
      LogP wordBOW = triProb - biProb;

      prePostProb = combWeights(prePostProb, wordBOW); 

      // get existing weight of (node, toNode) as the weight for (post, to).
      LatticeNode *node = findNode(selfLoopDB.nodeIndex); 
      if (!node) {
	if (debug(DebugPrintFatalMessages)) {
	  dout() << "Fatal Error in Lattice::expandSelfLoop: "
		 << "can't find node " << selfLoopDB.nodeIndex << "\n"; 
	}
	exit(-1);
      }

      // compute postToProb
      postToProb = biProb; 

      // create post node and loop if it doesn't exist;
      if (!selfLoopDB.postNodeIndex2) {
	selfLoopDB.postNodeIndex2 = 
	  postNodeIndex = dupNode(wordName, markedFlag);

	// create the loop, put trigram prob p(a|a,a) on the loop
	LatticeTransition t(selfLoopDB.loopProb, selfLoopDB.selfTransFlags);
	insertTrans(postNodeIndex, postNodeIndex, t); 
	// end of creating loop
      }
      postNodeIndex = selfLoopDB.postNodeIndex2; 
      id = 2; 
    }

    // create link from postNode to toNode if (postNode, toNode) doesn't exist;
    toNode = findNode(toNodeIndex); 
    LatticeTransition *postToTrans = toNode->inTransitions.find(postNodeIndex);
    if (!postToTrans) {
      // create link from postNode to toNode;
      LatticeTransition t(postToProb, selfLoopDB.selfToTransFlags); 
      insertTrans(postNodeIndex, toNodeIndex, t); 
    }
    // done with first part of the network. 

    // create the part of the network from fromNode to postNode.
    // create preNode and (from, pre) edge.
    NodeIndex preNodeIndex = selfLoopDB.preNodeIndex; 

    PackInput packSelfLoop;
    packSelfLoop.wordName = wordName; 
    packSelfLoop.fromWordName = selfLoopDB.fromWordName;
    packSelfLoop.toWordName = toNode->word; 
    packSelfLoop.fromNodeIndex = selfLoopDB.fromNodeIndex; 
    packSelfLoop.toNodeIndex = postNodeIndex; 
    packSelfLoop.inWeight = selfLoopDB.fromPreProb;
    packSelfLoop.inFlag = selfLoopDB.fromSelfTransFlags; 
    packSelfLoop.outWeight = prePostProb; 
    packSelfLoop.toNodeId = id; 
    packSelfLoop.lm = 0; 

    packedSelfLoopNodeList.packNodes(*this, packSelfLoop); 

    return true;
}

Boolean 
Lattice::expandNodeToTrigram(NodeIndex nodeIndex, LM &lm, unsigned maxNodes)
{
    SelfLoopDB selfLoopDB; 

    PackedNodeList packedNodeList, 
      packedSelfLoopNodeList; 

    LatticeTransition *outTrans;
    NodeIndex fromNodeIndex;
    NodeIndex toNodeIndex;
    LatticeTransition *inTrans;
    LatticeNode *fromNode; 
    VocabIndex context[3];
    LatticeNode *node = findNode(nodeIndex); 
    if (!node) {
	if (debug(DebugPrintFatalMessages)) {
            dout() << "Lattice::expandNodeToTrigram: "
		   << "Fatal Error: current node doesn't exist!\n";
	}
	exit(-1); 
    }

    LatticeTransition * selfLoop = node->inTransitions.find(nodeIndex); 
    Boolean selfLoopFlag; 
    if (selfLoop) { 
      selfLoopFlag = true; 
      initASelfLoopDB(selfLoopDB, lm, nodeIndex, node, selfLoop); 
    } else {
      selfLoopFlag = false; 
    }

    TRANSITER_T<NodeIndex,LatticeTransition> inTransIter(node->inTransitions);
    TRANSITER_T<NodeIndex,LatticeTransition> outTransIter(node->outTransitions);

    VocabIndex wordName = node->word; 

    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::expandNodeToTrigram: "
	     << "processing word name: " << getWord(wordName) << ", Index: " 
	     << nodeIndex << "\n";
    }

    // going through all its incoming edges
    while (inTrans = inTransIter.next(fromNodeIndex)) {

      if (nodeIndex == fromNodeIndex) {

	if (debug(DebugPrintOutLoop)) {
	  dout() << "Lattice::expandNodeToTrigram: jump over self loop: " 
	         << fromNodeIndex << "\n"; 
	}

	continue; 
      }

      fromNode = findNode(fromNodeIndex); 
      if (!fromNode) {
	if (debug(DebugPrintFatalMessages)) {
	    dout() << "Lattice::expandNodeToTrigram: "
		   << "Fatal Error: fromNode " 
	           << fromNodeIndex << " doesn't exist!\n";
	}
	exit(-1); 
      }
      VocabIndex fromWordName = fromNode->word; 

      if (debug(DebugPrintOutLoop)) {
	dout() << "Lattice::expandNodeToTrigram: processing incoming edge: (" 
	       << fromNodeIndex << ", " << nodeIndex << ")\n" 
	       << "      (" << getWord(fromWordName)
	       << ", " << getWord(wordName) << ")\n"; 
      }

      // compute in bigram prob
      LogP inWeight; 
      if (fromNodeIndex == getInitial()) {
	context[0] = fromWordName; 
	context[1] = Vocab_None; 
	inWeight = lm.wordProb(wordName, context);
      } else { 
	inWeight = inTrans->weight;
      }

      context[0] = wordName; 
      context[1] = fromWordName; 
      context[2] = Vocab_None; 

      unsigned inFlag = inTrans->flags; 

      // initialize it for self loop processing.
      if (selfLoopFlag) {
	initBSelfLoopDB(selfLoopDB, lm, fromNodeIndex, fromNode, inTrans);
      }

      // going through all the outgoing edges
      //       node = findNode(nodeIndex); 

      outTransIter.init(); 
      while (LatticeTransition * outTrans = outTransIter.next(toNodeIndex)) {
	
	if (nodeIndex == toNodeIndex) {
	  dout() << " In expandNodeToTrigram: self loop: " 
	         << toNodeIndex << "\n"; 
	  
	  continue; 
	}

	LatticeNode * toNode = findNode(toNodeIndex); 
	if (!toNode) {
	    if (debug(DebugPrintFatalMessages)) {
	        dout() << "Lattice::expandNodeToTrigram: "
		       << "Fatal Error: toNode " 
	               << toNode << " doesn't exist!\n";
	    }
	    exit(-1); 
	}

	if (debug(DebugPrintInnerLoop)) {
	  dout() << "Lattice::expandNodeToTrigram: the toNodeIndex (" 
	         << toNodeIndex << " has name "
		 << getWord(toNode->word) << ")\n"; 
	}

	// initialize selfLoopDB;
	if (selfLoopFlag) { 
	  initCSelfLoopDB(selfLoopDB, toNodeIndex, outTrans); 
	}

	// duplicate a node if the trigram exists.

	// computed on demand in packNodes(), saving work for cached transitions
	// LogP logProb = lm.wordProb(toNode->word, context);
	  
	if (debug(DebugPrintInnerLoop)) {
	  dout() << "Lattice::expandNodeToTrigram: tripleIndex (" 
	         << toNodeIndex << " | " << nodeIndex << ", "
	         << fromNodeIndex << ")\n"
	         << "      trigram prob: (" 
	         << getWord(toNode->word) << " | "
		 << context << ") found!!!!!!!!\n"; 
	}

	// create one node and two edges to place trigram prob
	// I need to do packing nodes here.

	PackInput packInput;
	packInput.fromWordName = fromWordName;
	packInput.wordName = wordName;
	packInput.toWordName = toNode->word;
	packInput.fromNodeIndex = fromNodeIndex;
	packInput.toNodeIndex = toNodeIndex; 
	packInput.inWeight = inWeight; 
	// computed on demand in packNodes()
	//packInput.outWeight = logProb; 
	packInput.lm = &lm;
	packInput.inFlag = inFlag; 
	packInput.outFlag = outTrans->flags; 
	packInput.nodeIndex = nodeIndex; 
	packInput.toNodeId = 0;
	
	if (debug(DebugPrintInnerLoop)) {
	  dout() << "Lattice::expandNodeToTrigram: "
	         << "outgoing edge for first incoming edge: (" 
	         << nodeIndex << ", " << toNodeIndex << ") is reused\n"; 
	}

	packedNodeList.packNodes(*this, packInput); 

        if (maxNodes > 0 && getNumNodes() > maxNodes) {
	  dout() << "Lattice::expandNodeToTrigram: "
	         << "aborting with number of nodes exceeding "
	         << maxNodes << endl;
	  return false;
	}
	
	// processing selfLoop
	if (selfLoopFlag) { 
	  expandSelfLoop(lm, selfLoopDB, packedSelfLoopNodeList); 
	}
      }	  // end of inter-loop
    } // end of out-loop

    // processing selfLoop case
    if (selfLoopFlag) { 
          node = findNode(nodeIndex);
	  node->inTransitions.remove(nodeIndex);
	  node = findNode(nodeIndex);
	  selfLoop = node->outTransitions.remove(nodeIndex);
	  if (!selfLoop) {
	    dout() << "Lattice::expandNodeToTrigram: "
	           << "nonFatal Error: non symetric setting\n";
	    exit(-1); 
	  }
    }

    // remove bigram transitions along with the old node
    removeNode(nodeIndex); 
    return true; 
}

Boolean 
Lattice::expandToTrigram(LM &lm, unsigned maxNodes)
{
    if (debug(DebugPrintFunctionality)) {
      dout() << "Lattice::expandToTrigram: "
	     << "starting expansion to conventional trigram lattice ...\n";
    }

    unsigned numNodes = getNumNodes(); 

    NodeIndex *sortedNodes = new NodeIndex[numNodes];
    assert(sortedNodes != 0);
    unsigned numReachable = sortNodes(sortedNodes);

    if (numReachable != numNodes) {
      if (debug(DebugPrintOutLoop)) {
	dout() << "Lattice::expandToTrigram: warning: called with unreachable nodes\n";
      }
    }

    for (unsigned i = 0; i < numReachable; i++) {
      NodeIndex nodeIndex = sortedNodes[i];

      if (nodeIndex == initial || nodeIndex == final) {
	continue;
      }
      if (!expandNodeToTrigram(nodeIndex, lm, maxNodes)) {
        delete [] sortedNodes;
	return false;
      }
    }

    delete [] sortedNodes;
    return true; 
}

/*
 * Expand bigram lattice to trigram, with bigram packing 
 *   (just like in nodearray.)
 *
 *   BASIC ALGORITHM: 
 *      1) foreach node u connecting with the initial NULL node, 
 *              let W be the set of nodes that have edge go into
 *              node u.
 *              a) get the set of outgoing edges e(u) of node u whose 
 *                      the other ends of nodes are not marked as processed.
 *
 *              b) for each edge e = (u, v) in e(u): 
 *                      for each node w in W do:
 *                        i)  if p(v | u, w) exists, 
 *                              duplicate u to get u' ( word name ), 
 *                              and edge (w, u') and (u', v)
 *                              with all the attributes.
 *                              place p(v | u, w) on edge (u', v)
 *
 *                       ii)  if p(v | u, w) does not exist,
 *                              add p(v | u) on edge (u, v)
 *                              multiply bo(w,u) to p(u | w) on (w, u)
 * 
 *  reservedFlag: to indicate that not all the outGoing nodes from the 
 *      current node have trigram probs and this bigram edge needs to be 
 *      preserved for bigram prob.
 */
Boolean 
Lattice::expandNodeToCompactTrigram(NodeIndex nodeIndex, Ngram &ngram,
							unsigned maxNodes)
{
    if (debug(DebugPrintOutLoop)) {
      dout() << "Lattice::expandNodeToCompactTrigram: \n";
    }

    SelfLoopDB selfLoopDB; 
    PackedNodeList packedNodeList, 
      packedSelfLoopNodeList; 
    LatticeTransition *outTrans;
    NodeIndex fromNodeIndex, backoffNodeIndex;
    NodeIndex toNodeIndex;
    LatticeNode *fromNode; 
    VocabIndex * bowContext = 0; 
    int inBOW = 0; 
    VocabIndex context[3];
    LatticeNode *node = findNode(nodeIndex); 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -