📄 segment-nbest.cc
字号:
/*
* segment-nbest --
* Appply a hidden segment boundary model to a sequence of N-best lists,
* across utterance boundaries.
*
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 1995-2006 SRI International. All Rights Reserved.";
static char RcsId[] = "@(#)$Id: segment-nbest.cc,v 1.25 2006/01/05 20:21:27 stolcke Exp $";
#endif
#include <iostream>
using namespace std;
#include <stdio.h>
#include <stdlib.h>
#include <locale.h>
#include "option.h"
#include "version.h"
#include "File.h"
#include "Vocab.h"
#include "NBest.h"
#include "VocabMap.h"
#include "Ngram.h"
#include "DecipherNgram.h"
#include "BayesMix.h"
#include "Trellis.cc"
#include "Array.cc"
#define DEBUG_PROGRESS 1
#define DEBUG_TRANSITIONS 2
static int version = 0;
static unsigned order = 3;
static unsigned debug = 0;
static char *lmFile = 0;
static const char *sTag = 0;
static const char *startTag = 0;
static const char *endTag = 0;
static double bias = 1.0;
static int toLower = 0;
static char *nbestFiles = 0;
static unsigned maxNbest = 0;
static unsigned maxRescore = 0;
static char *decipherLM = 0;
static double decipherLMW = 8.0;
static double decipherWTW = 0.0;
static double rescoreLMW = 8.0;
static double rescoreWTW = 0.0;
static int noReorder = 0;
static int fbRescore = 0;
static char *writeNbestDir = 0;
static char *noiseTag = 0;
static char *noiseVocabFile = 0;
static char *mixFile = 0;
static int bayesLength = -1;
static double bayesScale = 1.0;
static double mixLambda = 0.5;
const LogP LogP_PseudoZero = -100;
static int contextLength = 0;
static Option options[] = {
{ OPT_TRUE, "version", &version, "print version information" },
{ OPT_STRING, "lm", &lmFile, "hidden token sequence model" },
{ OPT_UINT, "order", &order, "ngram order to use for lm" },
{ OPT_UINT, "debug", &debug, "debugging level for lm" },
{ OPT_STRING, "stag", &sTag, "segment tag to use in output" },
{ OPT_STRING, "start-tag", &startTag, "tag to insert in front of N-best hyps" },
{ OPT_STRING, "end-tag", &endTag, "tag to insert at end of N-best hyps" },
{ OPT_FLOAT, "bias", &bias, "bias for segment model" },
{ OPT_TRUE, "tolower", &toLower, "map vocabulary to lowercase" },
{ OPT_STRING, "nbest-files", &nbestFiles, "list of n-best filenames" },
{ OPT_UINT, "max-nbest", &maxNbest, "maximum number of hyps to consider" },
{ OPT_UINT, "max-rescore", &maxRescore, "maximum number of hyps to rescore" },
{ OPT_TRUE, "no-reorder", &noReorder, "don't reorder N-best hyps before rescoring" },
{ OPT_STRING, "decipher-lm", &decipherLM, "DECIPHER(TM) LM for nbest list generation" },
{ OPT_FLOAT, "decipher-lmw", &decipherLMW, "DECIPHER(TM) LM weight" },
{ OPT_FLOAT, "decipher-wtw", &decipherWTW, "DECIPHER(TM) word transition weight" },
{ OPT_FLOAT, "rescore-lmw", &rescoreLMW, "rescoring LM weight" },
{ OPT_FLOAT, "rescore-wtw", &rescoreWTW, "rescoring word transition weight" },
{ OPT_STRING, "noise", &noiseTag, "noise tag to skip" },
{ OPT_STRING, "noise-vocab", &noiseVocabFile, "noise vocabulary to skip" },
{ OPT_TRUE, "fb-rescore", &fbRescore, "rescore N-best lists with forward-backward algorithm" },
{ OPT_STRING, "write-nbest-dir", &writeNbestDir, "output directory for rescored N-best lists" },
{ OPT_UINT, "bayes", &bayesLength, "context length for Bayes mixture LM" },
{ OPT_FLOAT, "bayes-scale", &bayesScale, "log likelihood scale for -bayes" },
{ OPT_STRING, "mix-lm", &mixFile, "LM to mix in" },
{ OPT_FLOAT, "lambda", &mixLambda, "mixture weight for -mix-lm" },
{ OPT_DOC, 0, 0, "plus any number of additional n-best list filenames" },
};
typedef enum {
NOS, S, NOSTATE
} SegmentState;
const char *stateNames[] = {
"NOS", "S"
};
/*
* Define null key for SegmentState trellis
*/
inline void
Map_noKey(SegmentState &state)
{
state = NOSTATE;
}
inline Boolean
Map_noKeyP(SegmentState &state)
{
return state == NOSTATE;
}
/*
* Segment a sentence by finding the SegmentState sequence
* that has the highest probability
*/
void
segmentHyp(NBestHyp &hyp, const VocabIndex *leftContext, LM &lm,
LogP &NOSscore, LogP &Sscore, SegmentState lastState)
{
unsigned len = Vocab::length(hyp.words);
LogP logBias = ProbToLogP(bias);
if (len == 0) {
NOSscore = Sscore = 0.0;
if (lastState != NOSTATE) {
cout << endl;
}
return;
}
Trellis<SegmentState> trellis(len);
Vocab &vocab = lm.vocab;
VocabIndex sContext[2];
sContext[0] = vocab.ssIndex();
sContext[1] = Vocab_None;
VocabIndex *noContext = &sContext[1];
/*
* Replace zero probs with a small non-zero prob, so the Viterbi
* can still distinguish between paths of different probabilities.
*/
#define Z(p) (isZero ? LogP_PseudoZero : (p))
/*
* Prime the trellis as follows using the context passed to us.
*/
{
Boolean isZero = (lm.wordProb(hyp.words[0], leftContext) == LogP_Zero);
if (bias > 0.0) {
LogP probS = lm.wordProb(vocab.seIndex(), leftContext) +
Z(lm.wordProb(hyp.words[0], sContext));
trellis.setProb(S, probS + logBias);
}
LogP probNOS = Z(lm.wordProb(hyp.words[0], leftContext));
trellis.setProb(NOS, probNOS);
if (debug >= DEBUG_TRANSITIONS) {
cerr << 0 << ": p(NOS) = " << trellis.getProb(NOS)
<< ", P(S) = " << trellis.getProb(S) << endl;
}
}
unsigned pos = 1;
while (hyp.words[pos] != Vocab_None) {
trellis.step();
Boolean isZero = (lm.wordProb(hyp.words[pos], noContext) == LogP_Zero);
/*
* Set up the contet for next word prob
*/
makeArray(VocabIndex, context, contextLength + 1);
unsigned i;
for (i = 0; i < contextLength && i < pos; i ++) {
context[i] = hyp.words[pos - i - 1];
}
for (unsigned k = 0; i < contextLength; i ++, k ++) {
context[i] = leftContext[k];
}
context[i] = Vocab_None;
/*
* Iterate over all combinations of hidden tags for the previous and
* the current word
*/
trellis.update(NOS, NOS, Z(lm.wordProb(hyp.words[pos], context)));
if (bias > 0.0) {
trellis.update(NOS, S, lm.wordProb(vocab.seIndex(), context) +
Z(lm.wordProb(hyp.words[pos], sContext)));
context[1] = vocab.ssIndex();
context[2] = Vocab_None;
trellis.update(S, NOS,
Z(lm.wordProb(hyp.words[pos], context)) + logBias);
trellis.update(S, S, lm.wordProb(vocab.seIndex(), context) +
Z(lm.wordProb(hyp.words[pos], sContext)) + logBias);
}
if (debug >= DEBUG_TRANSITIONS) {
cerr << pos << ": p(NOS) = " << trellis.getProb(NOS)
<< ", P(S) = " << trellis.getProb(S) << endl;
}
pos ++;
}
NOSscore = trellis.getLogP(NOS);
Sscore = trellis.getLogP(S);
/*
* If caller gave a final state to backtrace from, do so and print
* the hyp with their segmentation.
*/
if (lastState != NOSTATE) {
makeArray(SegmentState, segs, len);
if (trellis.viterbi(segs, len, lastState) != len) {
cerr << "trellis.viterbi failed\n";
} else {
for (unsigned i = 0; i < len; i++) {
if (segs[i] == S) {
cout << sTag << " ";
}
cout << vocab.getWord(hyp.words[i]);
if (i != len - 1) {
cout << " ";
}
}
cout << endl;
}
}
}
/*
* Segment a list of nbest lists, i.e., find the sequences of
* hyps that has the highest joint probability.
* Algorithm:
* - Do forward dynamic programming on a trellis representing
* the possible hyp sequences (the `nbest trellis'). For each
* nbest segment, there are two states for each hyp.
* The first one represents the hyp ending in a NOS state, the
* second one represents the hyp ending in an S state (i.e.,
* the last word of the hyp is the first word of a linguistic segment).
* - To compute the local cost for each hyp, we do a forward
* dynamic programming on a second trellis (the `hyp trellis')
* representing the possible segmentations of that hyp.
* - Do a Viterbi traceback through the nbest trellis to find the
* best sequence of hyps overall.
* - For each hyp in that sequence, do a Viterbi traceback on the
* hyp trellis to recover the best segmentation for it.
*/
/*
* The following macros construct and access aggregate states for the nbest
* trellis, encoding a hyp number and the segmentation info for the last
* word.
*/
#define NBEST_STATE(j,seg) (((j) << 1) | (seg))
#define NBEST_SEG(state) ((SegmentState)((state) & 1))
#define NBEST_HYP(state) ((state) >> 1)
/*
* Forward probability compution on nbest trellis
*/
Trellis<unsigned> *
forwardNbest(Array<NBestList *> &nbestLists, unsigned numLists,
LM &lm, double lmw, double wtw)
{
Vocab &vocab = lm.vocab;
assert(numLists > 0);
Trellis<unsigned> *nbestTrellis = new Trellis<unsigned>(numLists + 1);
assert(nbestTrellis != 0);
if (debug >= DEBUG_PROGRESS) {
cerr << "Forward pass...0";
}
/*
* Initialize the trellis with the scores from the first nbest list
* The first context is a sentence start.
*/
{
VocabIndex context[3]; /* word context from preceding nbest hyp */
context[0] = vocab.ssIndex();
context[1] = Vocab_None;
for (unsigned j = 0;
j < nbestLists[0]->numHyps() &&
(maxRescore <= 0 || j < maxRescore);
j ++)
{
NBestHyp &hyp = nbestLists[0]->getHyp(j);
LogP NOSscore, Sscore;
segmentHyp(hyp, context, lm, NOSscore, Sscore, NOSTATE);
nbestTrellis->setProb(NBEST_STATE(j, NOS),
hyp.acousticScore + lmw * NOSscore + wtw * hyp.numWords);
if (bias > 0.0) {
nbestTrellis->setProb(NBEST_STATE(j, S),
hyp.acousticScore + lmw * Sscore + wtw * hyp.numWords);
}
//cerr << "FORW:list " << 0 << " hyp " << j
// << " forw " << nbestTrellis->getLogP(NBEST_STATE(j, NOS))
// << " " << nbestTrellis->getLogP(NBEST_STATE(j, S))
// << endl;
}
}
for (unsigned i = 1; i < numLists; i ++) {
makeArray(VocabIndex, context, contextLength + 1);
/* word context from preceding nbest hyp */
if (debug >= DEBUG_PROGRESS) {
cerr << "..." << i;
}
nbestTrellis->step();
for (unsigned j = 0;
j < nbestLists[i]->numHyps() &&
(maxRescore <= 0 || j < maxRescore);
j ++)
{
NBestHyp &hyp = nbestLists[i]->getHyp(j);
for (unsigned k = 0;
k < nbestLists[i-1]->numHyps() &&
(maxRescore <= 0 || k < maxRescore);
k ++)
{
NBestHyp &prevHyp = nbestLists[i-1]->getHyp(k);
unsigned prevLen = Vocab::length(prevHyp.words);
LogP NOSscore, Sscore;
/*
* Set up the last words from the previous hyp
* as context for this one.
*/
{
unsigned k;
for (k = 0; k < contextLength && k < prevLen; k ++) {
context[k] = prevHyp.words[prevLen - 1 - k];
}
context[k] = Vocab_None;
}
segmentHyp(hyp, context, lm, NOSscore, Sscore, NOSTATE);
nbestTrellis->update(NBEST_STATE(k, NOS), NBEST_STATE(j, NOS),
hyp.acousticScore + lmw * NOSscore + wtw * hyp.numWords);
if (bias > 0.0) {
nbestTrellis->update(NBEST_STATE(k, NOS), NBEST_STATE(j, S),
hyp.acousticScore + lmw * Sscore + wtw * hyp.numWords);
/*
* Now the same for the case where the previous hyp's
* last word starts a sentence.
*/
if (prevLen == 0) {
context[0] = Vocab_None;
} else {
context[0] = prevHyp.words[prevLen - 1];
context[1] = vocab.ssIndex();
context[2] = Vocab_None;
}
segmentHyp(hyp, context, lm, NOSscore, Sscore, NOSTATE);
nbestTrellis->update(NBEST_STATE(k, S), NBEST_STATE(j, NOS),
hyp.acousticScore + lmw * NOSscore + wtw * hyp.numWords);
nbestTrellis->update(NBEST_STATE(k, S), NBEST_STATE(j, S),
hyp.acousticScore + lmw * Sscore + wtw * hyp.numWords);
}
}
//cerr << "FORW:list " << i << " hyp " << j
// << " forw " << nbestTrellis->getLogP(NBEST_STATE(j, NOS))
// << " " << nbestTrellis->getLogP(NBEST_STATE(j, S))
// << endl;
}
}
if (debug >= DEBUG_PROGRESS) {
cerr << endl;
}
return nbestTrellis;
}
/*
* Forward-backward on nbest trellis
*/
void
forwardBackNbest(Array<NBestList *> &nbestLists, Array<char *> &nbestNames,
unsigned numLists, LM &lm, double lmw, double wtw)
{
Vocab &vocab = lm.vocab;
Trellis<unsigned> *nbestTrellis =
forwardNbest(nbestLists, numLists, lm, lmw, wtw);
if (debug >= DEBUG_PROGRESS) {
cerr << "Backward pass..." << numLists - 1;
}
nbestTrellis->initBack();
/*
* Initialize the backward probs scores for the last nbest list
*/
{
VocabIndex context[3]; /* word context from preceding nbest hyp */
context[0] = vocab.ssIndex();
context[1] = Vocab_None;
for (unsigned j = 0;
j < nbestLists[numLists - 1]->numHyps() &&
(maxRescore <= 0 || j < maxRescore);
j ++)
{
NBestHyp &hyp = nbestLists[numLists - 1]->getHyp(j);
/*
* We don't assume that the final utterance is a complete
* sentence, so initialize the beta's to 1.
* If we did, this should be p(</s> | hyp) .
*/
nbestTrellis->setBackProb(NBEST_STATE(j, NOS), LogP_One);
if (bias > 0.0) {
nbestTrellis->setBackProb(NBEST_STATE(j, S), LogP_One);
}
}
}
for (int i = numLists - 2; i >= 0; i --) {
makeArray(VocabIndex, context, contextLength + 1);
/* word context from preceding nbest hyp */
if (debug >= DEBUG_PROGRESS) {
cerr << "..." << i;
}
nbestTrellis->stepBack();
for (unsigned j = 0;
j < nbestLists[i]->numHyps() &&
(maxRescore <= 0 || j < maxRescore);
j ++)
{
NBestHyp &hyp = nbestLists[i]->getHyp(j);
unsigned hypLen = Vocab::length(hyp.words);
for (unsigned k = 0;
k < nbestLists[i+1]->numHyps() &&
(maxRescore <= 0 || k < maxRescore);
k ++)
{
NBestHyp &nextHyp = nbestLists[i+1]->getHyp(k);
LogP NOSscore, Sscore;
/*
* Set up the last two words from the previous hyp
* as context for this one.
*/
{
unsigned k;
for (k = 0; k < contextLength && k < hypLen; k ++) {
context[k] = hyp.words[hypLen - 1 - k];
}
context[k] = Vocab_None;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -