📄 latticeexpand.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
	assert(oldNode != 0);

	unsigned contextLength = Vocab::length(context);

	makeArray(VocabIndex, newContext, contextLength + 2);
	Vocab::copy(&newContext[1], context);

	TRANSITER_T<NodeIndex,LatticeTransition> 
			      transIter(oldNode->outTransitions);
	NodeIndex oldIndex2;
	while (LatticeTransition *oldTrans = transIter.next(oldIndex2)) {
	    LatticeNode *oldNode2 = findNode(oldIndex2);
	    assert(oldNode2 != 0);

	    VocabIndex word = oldNode2->word;

	    // determine context used by LM
	    unsigned usedLength = 0;
	    VocabIndex *usedContext;
	    LogP wordProb;

	    // The old context is extended by word on this node, unless
	    // it is null or pause, which are both ignored by the LM.
	    // Non-events are added to the context but aren't evaluated.
	    if (ignoreWord(word)) {
		usedContext = &newContext[1];
		wordProb = LogP_One;
	    } else if (vocab.isNonEvent(word)) {
		newContext[0] = word;
		usedContext = newContext;
		wordProb = LogP_One;
	    } else {
		newContext[0] = word;
		usedContext = newContext;
	        wordProb = lm.wordProb(word, context);
	    }

	    // find all possible following words and determine maximal context
	    // needed for wordProb computation
	    // skip pause and null nodes up to some depth
	    LatticeFollowIter followIter(*this, *oldNode2);

	    NodeIndex oldIndex3;
	    LatticeNode *oldNode3;
	    LogP weight;
	    while (oldNode3 = followIter.next(oldIndex3, weight)) {

		// if one of the following nodes has null or pause as word then
		// we don't attempt any further look-ahead and use the maximal
		// context from the LM
		VocabIndex nextWord = oldNode3->word;
		if (ignoreWord(nextWord)) {
		    insufficientLookaheadNodes += 1;
		    nextWord = Vocab_None;
		}
		
		unsigned usedLength2;
		lm.contextID(nextWord, usedContext, usedLength2);

		if (usedLength2 > usedLength) {
		    usedLength = usedLength2;
		}
	    }

	    if (!expandAddTransition(usedContext, usedLength,
					word, wordProb, lm,
					oldIndex2, *newIndex, oldTrans,
					maxNodes, expandMap))
		return false;
	} 
    }

    if (insufficientLookaheadNodes > 0) {
	dout() << "Lattice::expandNodeToLM: insufficient lookahead on "
	       << insufficientLookaheadNodes << " nodes\n";
    }

    // old node (and transitions) is fully replaced by expanded nodes 
    // (and transitions)
    removeNode(oldIndex);

    // save space in expandMap by deleting entries that are no longer used
    expandMap.remove(oldIndex);

    return true;
}

/* 
 * The "compact expansion" version makes two changes
 *
 * - add outgoing transitions to node duplicates with shorter contexts,
 *   not just the maximal context.
 * - only transition out of an expanded node if the LM uses the full context
 *   associated with that node (*** A *** below).
 *   This is possible because of the first change, and it's 
 *   where the savings compared to the general expansion are realized.
 *
 * The resulting algorithm is a generalization of expandToCompactTrigram().
 */
Boolean
Lattice::expandNodeToCompactLM(VocabIndex oldIndex, LM &lm, unsigned maxNodes, 
		        Map2<NodeIndex, VocabContext, NodeIndex> &expandMap)
{
    unsigned insufficientLookaheadNodes = 0;

    Map2Iter2<NodeIndex, VocabContext, NodeIndex>
#ifdef USE_SARRAY_MAP2
				expandIter(expandMap, oldIndex);
#else
				expandIter(expandMap, oldIndex, ngramCompare);
#endif
    NodeIndex *newIndex;
    VocabContext context;

    while (newIndex = expandIter.next(context)) {

	// node structure might have been moved as a result of insertions
	LatticeNode *oldNode = findNode(oldIndex);
	assert(oldNode != 0);

	Boolean ignoreOldNodeWord = ignoreWord(oldNode->word);

	unsigned contextLength = Vocab::length(context);

	makeArray(VocabIndex, newContext, contextLength + 2);
	Vocab::copy(&newContext[1], context);

	TRANSITER_T<NodeIndex,LatticeTransition> 
			      transIter(oldNode->outTransitions);
	NodeIndex oldIndex2;
	while (LatticeTransition *oldTrans = transIter.next(oldIndex2)) {
	    LatticeNode *oldNode2 = findNode(oldIndex2);
	    assert(oldNode2 != 0);

	    VocabIndex word = oldNode2->word;

	    // Find the context length used for LM transition oldNode->oldNode2.
	    // Because each LM context gets its own node duplicate
	    // in this expansion algorithm, we only need to process
	    // transitions where the LM context fully matches the context 
	    // associated with the specific oldNode duplicate we're working on.
	    unsigned usedLength0;
	    lm.contextID(word, context, usedLength0);

	    // if the node we're coming from has a real word then
	    // there is no point in using a null context
	    // (so we can collapse null and unigram contexts)
	    if (usedLength0 == 0 && !ignoreOldNodeWord) {
		usedLength0 = 1;
	    }

	    // *** A ***
	    if (!ignoreWord(word) &&
		context[0] != vocab.ssIndex() &&
		usedLength0 != contextLength)
	    {
		continue;
	    }

	    // determine context used by LM
	    VocabIndex *usedContext;
	    LogP wordProb;

	    // The old context is extended by word on this node, unless
	    // it is null or pause, which are both ignored by the LM.
	    // Non-events are added to the context by aren't evaluated.
	    if (ignoreWord(word)) {
		usedContext = &newContext[1];
		wordProb = LogP_One;
	    } else if (vocab.isNonEvent(word)) {
		newContext[0] = word;
		usedContext = newContext;
		wordProb = LogP_One;
	    } else {
		newContext[0] = word;
		usedContext = newContext;
	        wordProb = lm.wordProb(word, context);
	    }

	    // find all possible following words and compute LM context
	    // used for their prediction.
	    // then create duplicate nodes for each context length
	    // needed for wordProb computation
	    LatticeFollowIter followIter(*this, *oldNode2);

	    NodeIndex oldIndex3;
	    LatticeNode *oldNode3;
	    LogP weight;
	    int lastUsedLength = -1;
	    while (oldNode3 = followIter.next(oldIndex3, weight)) {

		unsigned usedLength;

		// if one of the following nodes has null or pause as word then
		// we have to back off to null context to make sure 
		// further expansion can connect above at *** A ***
		VocabIndex nextWord = oldNode3->word;
		if (ignoreWord(nextWord)) {
		    insufficientLookaheadNodes += 1;
		    usedLength = 0;
		} else {
		    lm.contextID(nextWord, usedContext, usedLength);
		}

		// if the node we're going to has a real word then
		// there is no point in using a null context (same as above)
		if (usedLength == 0 && !ignoreWord(word)) {
		    usedLength = 1;
		}

		// optimization: no need to re-add transition if usedLength
		// is same as on previous pass through
		if (usedLength == lastUsedLength) {
		    continue;
		} else {
		    lastUsedLength = usedLength;
		}

		if (!expandAddTransition(usedContext, usedLength,
					    word, wordProb, lm,
					    oldIndex2, *newIndex, oldTrans,
					    maxNodes, expandMap))
		    return false;
	    }

	    // transitions to the final node have to be handled specially 
	    // because the above loop won't apply to it, since the final node
	    // has no outgoing transitions!
	    if (oldIndex2 == final) {
		// by convention, the final node always uses context of length 0
		if (!expandAddTransition(usedContext, 0,
					    word, wordProb, lm,
					    oldIndex2, *newIndex, oldTrans,
					    maxNodes, expandMap))
		    return false;
	    }
	} 
    }

    if (insufficientLookaheadNodes > 0) {
	dout() << "Lattice::expandNodeToCompactLM: insufficient lookahead on "
	       << insufficientLookaheadNodes << " nodes\n";
    }

    // old node (and transitions) is fully replaced by expanded nodes 
    // (and transitions)
    removeNode(oldIndex);

    // save space in expandMap by deleting entries that are no longer used
    expandMap.remove(oldIndex);

    return true;
}

/*
 * Helper for expandNodeToLM() and expandNodeToCompactLM()
 */
Boolean
Lattice::expandAddTransition(VocabIndex *usedContext, unsigned usedLength,
		    VocabIndex word, LogP wordProb, LM &lm,
		    NodeIndex oldIndex2, VocabIndex newIndex,
		    LatticeTransition *oldTrans, unsigned maxNodes,
		    Map2<NodeIndex, VocabContext, NodeIndex> &expandMap)
{
    LogP transProb;
    if (ignoreWord(word)) {
	transProb = LogP_One;
    } else {
	transProb = wordProb;
    }

   
    if (!noBackoffWeights && usedContext[0] != vocab.seIndex()) {
	// back-off weight to truncate the context
	// (needs to be called before context truncation below)
	// Note: we check above that this is not a backoff weight after </s>,
	// which some LMs contain but should be ignored for our purposes
	// since lattices all end implictly in </s>.
	transProb += lm.contextBOW(usedContext, usedLength);
    }

    // truncate context to what LM uses
    VocabIndex saved = usedContext[usedLength];
    usedContext[usedLength] = Vocab_None;

    Boolean found;
    NodeIndex *newIndex2 =
	expandMap.insert(oldIndex2, (VocabContext)usedContext, found);

    if (!found) {
	LatticeNode *oldNode2 = findNode(oldIndex2);
	assert(oldNode2 != 0);

	// create new node copy and store it in map
	*newIndex2 = dupNode(word, oldNode2->flags, oldNode2->htkinfo);

	if (maxNodes > 0 && getNumNodes() > maxNodes) {
	    dout() << "Lattice::expandAddTransition: "
		   << "aborting with number of nodes exceeding "
		   << maxNodes << endl;
	    return false;
	}
    }

    LatticeTransition newTrans(transProb, oldTrans->flags);
    insertTrans(newIndex, *newIndex2, newTrans, 0);

    // restore full context
    usedContext[usedLength] = saved;

    return true;
}

Boolean 
Lattice::expandToLM(LM &lm, unsigned maxNodes, Boolean compact)
{
    if (debug(DebugPrintFunctionality)) {
      dout() << "Lattice::expandToLM: "
 	     << "starting " << (compact ? "compact " : "")
	     << "expansion to general LM (maxNodes = " << maxNodes
 	     << ") ...\n";
    }

    unsigned numNodes = getNumNodes(); 

    NodeIndex *sortedNodes = new NodeIndex[numNodes];
    assert(sortedNodes != 0);
    unsigned numReachable = sortNodes(sortedNodes);

    if (numReachable != numNodes) {
      if (debug(DebugPrintOutLoop)) {
	dout() << "Lattice::expandToLM: warning: called with unreachable nodes\n";
      }
    }

    Map2<NodeIndex, VocabContext, NodeIndex> expandMap;

    // prime expansion map with initial/final nodes
    LatticeNode *startNode = findNode(initial);
    assert(startNode != 0);
    VocabIndex newStartIndex = dupNode(startNode->word, startNode->flags,
							startNode->htkinfo);

    VocabIndex context[2];
    context[1] = Vocab_None;
    context[0] = vocab.ssIndex();
    *expandMap.insert(initial, context) = newStartIndex;

    LatticeNode *endNode = findNode(final);
    assert(endNode != 0);
    VocabIndex newEndIndex = dupNode(endNode->word, endNode->flags,
							endNode->htkinfo);

    context[0] = Vocab_None;
    *expandMap.insert(final, context) = newEndIndex;

    for (unsigned i = 0; i < numReachable; i++) {
      NodeIndex nodeIndex = sortedNodes[i];

      if (nodeIndex == final) {
	removeNode(final);
      } else if (compact) {
        if (!expandNodeToCompactLM(nodeIndex, lm, maxNodes, expandMap)) {
	  delete [] sortedNodes;
	  return false;
	}
      } else {
        if (!expandNodeToLM(nodeIndex, lm, maxNodes, expandMap)) {
	  delete [] sortedNodes;
	  return false;
        }
      }
    }

    initial = newStartIndex;
    final = newEndIndex;

    delete [] sortedNodes;
    return true; 
}
上一页 1 2 34
💿 文件大小 3034 K
👤 上传用户 wanghaihah
📂 所属分类其他
🏷️ 相关标签

#工具包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -