segment-nbest.cc

来自「这是一款很好用的工具包」· CC 代码 · 共 993 行 · 第 1/2 页
993 行
/*
 * segment-nbest --
 *	Appply a hidden segment boundary model to a sequence of N-best lists,
 *	across utterance boundaries.
 *
 */

#ifndef lint
static char Copyright[] = "Copyright (c) 1995-2006 SRI International.  All Rights Reserved.";
static char RcsId[] = "@(#)$Id: segment-nbest.cc,v 1.25 2006/01/05 20:21:27 stolcke Exp $";
#endif

#include <iostream>
using namespace std;
#include <stdio.h>
#include <stdlib.h>
#include <locale.h>

#include "option.h"
#include "version.h"
#include "File.h"

#include "Vocab.h"
#include "NBest.h"
#include "VocabMap.h"
#include "Ngram.h"
#include "DecipherNgram.h"
#include "BayesMix.h"
#include "Trellis.cc"
#include "Array.cc"

#define DEBUG_PROGRESS		1
#define DEBUG_TRANSITIONS	2

static int version = 0;
static unsigned order = 3;
static unsigned debug = 0;
static char *lmFile = 0;
static const char *sTag = 0;
static const char *startTag = 0;
static const char *endTag = 0;
static double bias = 1.0;
static int toLower = 0;

static char *nbestFiles = 0;
static unsigned maxNbest = 0;
static unsigned maxRescore = 0;
static char *decipherLM = 0;
static double decipherLMW = 8.0;
static double decipherWTW = 0.0;
static double rescoreLMW = 8.0;
static double rescoreWTW = 0.0;
static int noReorder = 0;
static int fbRescore = 0;
static char *writeNbestDir = 0;
static char *noiseTag = 0;
static char *noiseVocabFile = 0;

static char *mixFile  = 0;
static int bayesLength = -1;
static double bayesScale = 1.0;
static double mixLambda = 0.5;

const LogP LogP_PseudoZero = -100;

static int contextLength = 0;

static Option options[] = {
    { OPT_TRUE, "version", &version, "print version information" },
    { OPT_STRING, "lm", &lmFile, "hidden token sequence model" },
    { OPT_UINT, "order", &order, "ngram order to use for lm" },
    { OPT_UINT, "debug", &debug, "debugging level for lm" },
    { OPT_STRING, "stag", &sTag, "segment tag to use in output" },
    { OPT_STRING, "start-tag", &startTag, "tag to insert in front of N-best hyps" },
    { OPT_STRING, "end-tag", &endTag, "tag to insert at end of N-best hyps" },
    { OPT_FLOAT, "bias", &bias, "bias for segment model" },
    { OPT_TRUE, "tolower", &toLower, "map vocabulary to lowercase" },

    { OPT_STRING, "nbest-files", &nbestFiles, "list of n-best filenames" },
    { OPT_UINT, "max-nbest", &maxNbest, "maximum number of hyps to consider" },
    { OPT_UINT, "max-rescore", &maxRescore, "maximum number of hyps to rescore" },
    { OPT_TRUE, "no-reorder", &noReorder, "don't reorder N-best hyps before rescoring" },
    { OPT_STRING, "decipher-lm", &decipherLM, "DECIPHER(TM) LM for nbest list generation" },
    { OPT_FLOAT, "decipher-lmw", &decipherLMW, "DECIPHER(TM) LM weight" },
    { OPT_FLOAT, "decipher-wtw", &decipherWTW, "DECIPHER(TM) word transition weight" },
    { OPT_FLOAT, "rescore-lmw", &rescoreLMW, "rescoring LM weight" },
    { OPT_FLOAT, "rescore-wtw", &rescoreWTW, "rescoring word transition weight" },
    { OPT_STRING, "noise", &noiseTag, "noise tag to skip" },
    { OPT_STRING, "noise-vocab", &noiseVocabFile, "noise vocabulary to skip" },
    { OPT_TRUE, "fb-rescore", &fbRescore, "rescore N-best lists with forward-backward algorithm" },
    { OPT_STRING, "write-nbest-dir", &writeNbestDir, "output directory for rescored N-best lists" },
    { OPT_UINT, "bayes", &bayesLength, "context length for Bayes mixture LM" },
    { OPT_FLOAT, "bayes-scale", &bayesScale, "log likelihood scale for -bayes" },
    { OPT_STRING, "mix-lm", &mixFile, "LM to mix in" },
    { OPT_FLOAT, "lambda", &mixLambda, "mixture weight for -mix-lm" },
    { OPT_DOC, 0, 0, "plus any number of additional n-best list filenames" },
};

typedef enum {
	NOS, S, NOSTATE
} SegmentState;

const char *stateNames[] = {
	"NOS", "S"
};

/*
 * Define null key for SegmentState trellis
 */
inline void
Map_noKey(SegmentState &state)
{
    state = NOSTATE;
}

inline Boolean
Map_noKeyP(SegmentState &state)
{
    return state == NOSTATE;
}

/*
 * Segment a sentence by finding the SegmentState sequence 
 * that has the highest probability
 */
void
segmentHyp(NBestHyp &hyp, const VocabIndex *leftContext, LM &lm,
			LogP &NOSscore, LogP &Sscore, SegmentState lastState)
{
    unsigned len = Vocab::length(hyp.words);
    LogP logBias = ProbToLogP(bias);

    if (len == 0) {
	NOSscore = Sscore = 0.0;
	if (lastState != NOSTATE) {
	    cout << endl;
	}
	return;
    }

    Trellis<SegmentState> trellis(len);

    Vocab &vocab = lm.vocab;

    VocabIndex sContext[2];
    sContext[0] = vocab.ssIndex();
    sContext[1] = Vocab_None;
    VocabIndex *noContext = &sContext[1];

    /*
     * Replace zero probs with a small non-zero prob, so the Viterbi
     * can still distinguish between paths of different probabilities.
     */
#define Z(p) (isZero ? LogP_PseudoZero : (p))

    /*
     * Prime the trellis as follows using the context passed to us.
     */
    {
	Boolean isZero = (lm.wordProb(hyp.words[0], leftContext) == LogP_Zero);

	if (bias > 0.0) {
	    LogP probS = lm.wordProb(vocab.seIndex(), leftContext) +
			 Z(lm.wordProb(hyp.words[0], sContext));
	    trellis.setProb(S, probS + logBias);
	}

	LogP probNOS = Z(lm.wordProb(hyp.words[0], leftContext));
        trellis.setProb(NOS, probNOS);

	if (debug >= DEBUG_TRANSITIONS) {
	    cerr << 0 << ": p(NOS) = " << trellis.getProb(NOS)
			<< ", P(S) = " << trellis.getProb(S) << endl;
	}
    }

    unsigned pos = 1;
    while (hyp.words[pos] != Vocab_None) {
	
	trellis.step();

	Boolean isZero = (lm.wordProb(hyp.words[pos], noContext) == LogP_Zero);

	/*
	 * Set up the contet for next word prob
	 */
	makeArray(VocabIndex, context, contextLength + 1);
	unsigned i;

	for (i = 0; i < contextLength && i < pos; i ++) {
	    context[i] = hyp.words[pos - i - 1];
	}
	for (unsigned k = 0; i < contextLength; i ++, k ++) {
	    context[i] = leftContext[k];
	}
	context[i] = Vocab_None;

	/*
	 * Iterate over all combinations of hidden tags for the previous and
	 * the current word
	 */
	trellis.update(NOS, NOS, Z(lm.wordProb(hyp.words[pos], context)));

	if (bias > 0.0) {
	    trellis.update(NOS, S, lm.wordProb(vocab.seIndex(), context) +
				   Z(lm.wordProb(hyp.words[pos], sContext)));

	    context[1] = vocab.ssIndex();
	    context[2] = Vocab_None;

	    trellis.update(S, NOS,
			Z(lm.wordProb(hyp.words[pos], context)) + logBias);
	    trellis.update(S, S, lm.wordProb(vocab.seIndex(), context) +
			Z(lm.wordProb(hyp.words[pos], sContext)) + logBias);
	}

	if (debug >= DEBUG_TRANSITIONS) {
	    cerr << pos << ": p(NOS) = " << trellis.getProb(NOS)
			<< ", P(S) = " << trellis.getProb(S) << endl;
	}

	pos ++;
    }

    NOSscore = trellis.getLogP(NOS);
    Sscore = trellis.getLogP(S);

    /*
     * If caller gave a final state to backtrace from, do so and print
     * the hyp with their segmentation.
     */
    if (lastState != NOSTATE) {
	makeArray(SegmentState, segs, len);

	if (trellis.viterbi(segs, len, lastState) != len) {
	    cerr << "trellis.viterbi failed\n";
	} else {
	    for (unsigned i = 0; i < len; i++) {
		if (segs[i] == S) {
		    cout << sTag << " ";
		}
		cout << vocab.getWord(hyp.words[i]);
		if (i != len - 1) {
		    cout << " ";
		}
	    }
	    cout << endl;
	}
    }
}

/*
 * Segment a list of nbest lists, i.e., find the sequences of
 * hyps that has the highest joint probability.
 * Algorithm:
 *	- Do forward dynamic programming on a trellis representing
 *	the possible hyp sequences (the `nbest trellis').  For each
 *	nbest segment, there are two states for each hyp.
 *	The first one represents the hyp ending in a NOS state, the
 *	second one represents the hyp ending in an S state (i.e.,
 *	the last word of the hyp is the first word of a linguistic segment).
 *	- To compute the local cost for each hyp, we do a forward
 *	dynamic programming on a second trellis (the `hyp trellis')
 *	representing the possible segmentations of that hyp.
 *	- Do a Viterbi traceback through the nbest trellis to find the
 *	best sequence of hyps overall.
 *	- For each hyp in that sequence, do a Viterbi traceback on the
 *	hyp trellis to recover the best segmentation for it.
 */

/*
 * The following macros construct and access aggregate states for the nbest
 * trellis, encoding a hyp number and the segmentation info for the last
 * word.
 */
#define NBEST_STATE(j,seg)	(((j) << 1) | (seg))
#define NBEST_SEG(state)	((SegmentState)((state) & 1))
#define NBEST_HYP(state)	((state) >> 1)

/*
 * Forward probability compution on nbest trellis
 */
Trellis<unsigned> *
forwardNbest(Array<NBestList *> &nbestLists, unsigned numLists,
	     LM &lm, double lmw, double wtw)
{
    Vocab &vocab = lm.vocab;

    assert(numLists > 0);

    Trellis<unsigned> *nbestTrellis = new Trellis<unsigned>(numLists + 1);
    assert(nbestTrellis != 0);

    if (debug >= DEBUG_PROGRESS) {
	cerr << "Forward pass...0";
    }

    /*
     * Initialize the trellis with the scores from the first nbest list
     * The first context is a sentence start.
     */
    {
	VocabIndex context[3];	/* word context from preceding nbest hyp */
	context[0] = vocab.ssIndex();
	context[1] = Vocab_None;

	for (unsigned j = 0;
	     j < nbestLists[0]->numHyps() &&
		 (maxRescore <= 0 || j < maxRescore);
	     j ++)
	{
	    NBestHyp &hyp = nbestLists[0]->getHyp(j);

	    LogP NOSscore, Sscore;
	    segmentHyp(hyp, context, lm, NOSscore, Sscore, NOSTATE);

	    nbestTrellis->setProb(NBEST_STATE(j, NOS),
		    hyp.acousticScore + lmw * NOSscore + wtw * hyp.numWords);
	    if (bias > 0.0) {
		nbestTrellis->setProb(NBEST_STATE(j, S),
			hyp.acousticScore + lmw * Sscore + wtw * hyp.numWords);
	    }
		

//cerr << "FORW:list " << 0 << " hyp " << j
//	<< " forw " << nbestTrellis->getLogP(NBEST_STATE(j, NOS))
//	<< " " << nbestTrellis->getLogP(NBEST_STATE(j, S))
//	<< endl;

	}
    }

    for (unsigned i = 1; i < numLists; i ++) {
	makeArray(VocabIndex, context, contextLength + 1);
				/* word context from preceding nbest hyp */

	if (debug >= DEBUG_PROGRESS) {
	    cerr << "..." << i;
	}

	nbestTrellis->step();

	for (unsigned j = 0;
	     j < nbestLists[i]->numHyps() &&
		(maxRescore <= 0 || j < maxRescore);
	     j ++)
	{
	    NBestHyp &hyp = nbestLists[i]->getHyp(j);

	    for (unsigned k = 0;
		 k < nbestLists[i-1]->numHyps() &&
		    (maxRescore <= 0 || k < maxRescore);
		 k ++)
	    {
		NBestHyp &prevHyp = nbestLists[i-1]->getHyp(k);
		unsigned prevLen = Vocab::length(prevHyp.words);

		LogP NOSscore, Sscore;

		/*
		 * Set up the last words from the previous hyp
		 * as context for this one.
		 */
		{
		    unsigned k;

		    for (k = 0; k < contextLength && k < prevLen; k ++) {
			context[k] = prevHyp.words[prevLen - 1 - k];
		    }
		    context[k] = Vocab_None;
		}

		segmentHyp(hyp, context, lm, NOSscore, Sscore, NOSTATE);

		nbestTrellis->update(NBEST_STATE(k, NOS), NBEST_STATE(j, NOS),
			hyp.acousticScore + lmw * NOSscore + wtw * hyp.numWords);
		if (bias > 0.0) {
		    nbestTrellis->update(NBEST_STATE(k, NOS), NBEST_STATE(j, S),
			hyp.acousticScore + lmw * Sscore + wtw * hyp.numWords);

		    /*
		     * Now the same for the case where the previous hyp's
		     * last word starts a sentence.
		     */
		    if (prevLen == 0) {
			context[0] = Vocab_None;
		    } else {
			context[0] = prevHyp.words[prevLen - 1];
			context[1] = vocab.ssIndex();
			context[2] = Vocab_None;
		    }
		    segmentHyp(hyp, context, lm, NOSscore, Sscore, NOSTATE);

		    nbestTrellis->update(NBEST_STATE(k, S), NBEST_STATE(j, NOS),
		       hyp.acousticScore + lmw * NOSscore + wtw * hyp.numWords);
		    nbestTrellis->update(NBEST_STATE(k, S), NBEST_STATE(j, S),
		       hyp.acousticScore + lmw * Sscore + wtw * hyp.numWords);
		}
	    }
//cerr << "FORW:list " << i << " hyp " << j
//	<< " forw " << nbestTrellis->getLogP(NBEST_STATE(j, NOS))
//	<< " " << nbestTrellis->getLogP(NBEST_STATE(j, S))
//	<< endl;
	}
    }

    if (debug >= DEBUG_PROGRESS) {
	cerr << endl;
    }

    return nbestTrellis;
}

/*
 * Forward-backward on nbest trellis
 */
void
forwardBackNbest(Array<NBestList *> &nbestLists, Array<char *> &nbestNames,
		 unsigned numLists, LM &lm, double lmw, double wtw)
{
    Vocab &vocab = lm.vocab;

    Trellis<unsigned> *nbestTrellis =
			forwardNbest(nbestLists, numLists, lm, lmw, wtw);

    if (debug >= DEBUG_PROGRESS) {
	cerr << "Backward pass..." << numLists - 1;
    }

    nbestTrellis->initBack();

    /*
     * Initialize the backward probs scores for the last nbest list
     */
    {
	VocabIndex context[3];	/* word context from preceding nbest hyp */
	context[0] = vocab.ssIndex();
	context[1] = Vocab_None;

	for (unsigned j = 0;
	     j < nbestLists[numLists - 1]->numHyps() &&
		(maxRescore <= 0 || j < maxRescore);
	     j ++)
	{
	    NBestHyp &hyp = nbestLists[numLists - 1]->getHyp(j);

	    /*
	     * We don't assume that the final utterance is a complete
	     * sentence, so initialize the beta's to 1.
	     * If we did, this should be p(</s> | hyp) .
	     */
	    nbestTrellis->setBackProb(NBEST_STATE(j, NOS), LogP_One);
	    if (bias > 0.0) {
		nbestTrellis->setBackProb(NBEST_STATE(j, S), LogP_One);
	    }
	}
    }

    for (int i = numLists - 2; i >= 0; i --) {
	makeArray(VocabIndex, context, contextLength + 1);
				/* word context from preceding nbest hyp */

	if (debug >= DEBUG_PROGRESS) {
	    cerr << "..." << i;
	}

	nbestTrellis->stepBack();

	for (unsigned j = 0;
	     j < nbestLists[i]->numHyps() &&
		(maxRescore <= 0 || j < maxRescore);
	     j ++)
	{
	    NBestHyp &hyp = nbestLists[i]->getHyp(j);
	    unsigned hypLen = Vocab::length(hyp.words);

	    for (unsigned k = 0;
		 k < nbestLists[i+1]->numHyps() &&
		    (maxRescore <= 0 || k < maxRescore);
		 k ++)
	    {
		NBestHyp &nextHyp = nbestLists[i+1]->getHyp(k);

		LogP NOSscore, Sscore;

		/*
		 * Set up the last two words from the previous hyp
		 * as context for this one.
		 */
		{
		    unsigned k;

		    for (k = 0; k < contextLength && k < hypLen; k ++) {
			context[k] = hyp.words[hypLen - 1 - k];
		    }
		    context[k] = Vocab_None;
		}
segment-nbest.cc - 源码说明

本页面展示了「这是一款很好用的工具包」中的 segment-nbest.cc 源码文件，采用 CC 编程语言编写，共 993 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与工具包相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?