📄 nbest.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 2 页
字号:
上一页 12
	    vocab.addWords(justWords, words, actualNumWords + 1);
	}

	if (decipherFormat == 2 && backtrace) {
	    delete [] wordInfo;
	    wordInfo = new NBestWordInfo[actualNumWords + 1];

	    for (unsigned j = 0; j < actualNumWords; j ++) {
		wordInfo[j] = backtraceInfo[j];
		wordInfo[j].word = words[j];
	    }
	    wordInfo[actualNumWords].word = Vocab_None;
	} else {
	    wordInfo = 0;
	}
    } else {
	unsigned i = 0;
	for (unsigned j = 0; justWords[j] != 0; j ++) {
	    char *start = (char *)justWords[j];
	    char *cp;

	    while (cp = strchr(start, multiwordSeparator)) {
		*cp = '\0';
		words[i++] =
		    unkIsWord ? vocab.getIndex(start, vocab.unkIndex())
			      : vocab.addWord(start);
		*cp = multiwordSeparator;
		start = cp + 1;
	    }

	    words[i++] =
		unkIsWord ? vocab.getIndex(start, vocab.unkIndex())
			  : vocab.addWord(start);
	}
	words[i] = Vocab_None;
    }

    return true;
}

void
NBestHyp::write(File &file, Vocab &vocab, Boolean decipherFormat,
						LogP acousticOffset)
{
    if (decipherFormat) {
	fprintf(file, "(%d)", (int)LogPtoBytelog(totalScore + acousticOffset));
    } else {
	fprintf(file, "%g %g %d", acousticScore + acousticOffset,
					      languageScore, numWords);
    }

    for (unsigned i = 0; words[i] != Vocab_None; i++) {
	/*
	 * Write backtrace information if known and Decipher format is desired
	 */
	if (decipherFormat && wordInfo) {
	    fprintf(file, " %s ( st: %.2f et: %.2f g: %d a: %d )", 
			   vocab.getWord(wordInfo[i].word),
			   wordInfo[i].start,
			   wordInfo[i].start+wordInfo[i].duration - frameLength,
			   (int)LogPtoBytelog(wordInfo[i].languageScore),
			   (int)LogPtoBytelog(wordInfo[i].acousticScore));
	} else {
	    fprintf(file, " %s", vocab.getWord(words[i]));
	}
    }

    fprintf(file, "\n");
}

void
NBestHyp::rescore(LM &lm, double lmScale, double wtScale)
{
    TextStats stats;

    /*
     * LM score is recomputed,
     * numWords is set to take non-word tokens into account
     */
    languageScore = weightLogP(lmScale, lm.sentenceProb(words, stats));
    numWords = stats.numWords;

    /*
     * Note: In the face of zero probaility words we do NOT
     * set the LM probability to zero.  These cases typically
     * reflect a vocabulary mismatch between the rescoring LM
     * and the recognizer, and it is more useful to rescore based on
     * the known words alone.  The warning hopefull will cause
     * someone to asssess the problem.
     */
    if (stats.zeroProbs > 0) {
	cerr << "warning: hyp contains zero prob words: "
	     << (lm.vocab.use(), words) << endl;
    }

    if (stats.numOOVs > 0) {
	cerr << "warning: hyp contains OOV words: "
	     << (lm.vocab.use(), words) << endl;
    }

    totalScore = (LogP)(acousticScore +
			    languageScore +
			    wtScale * numWords);
}

void
NBestHyp::reweight(double lmScale, double wtScale, double amScale)
{
    totalScore = weightLogP(amScale, acousticScore) +
			    weightLogP(lmScale, languageScore) +
			    (LogP)wtScale * numWords;
}

void
NBestHyp::decipherFix(LM &lm, double lmScale, double wtScale)
{
    TextStats stats;

    /*
     * LM score is recomputed,
     * numWords is set to take non-word tokens into account
     */
    languageScore = weightLogP(lmScale, lm.sentenceProb(words, stats));
    numWords = stats.numWords;

    /*
     * Arguably a bug, but Decipher actually applies WTW to pauses.
     * So we have to do the same when subtracting the non-acoustic
     * scores below.
     */
    unsigned numAllWords = Vocab::length(words);

    if (stats.zeroProbs > 0) {
	cerr << "warning: hyp contains zero prob words: "
	     << (lm.vocab.use(), words) << endl;
	languageScore = LogP_Zero;
    }

    if (stats.numOOVs > 0) {
	cerr << "warning: hyp contains OOV words: "
	     << (lm.vocab.use(), words) << endl;
	languageScore = LogP_Zero;
    }

    acousticScore = totalScore -
			    languageScore -
			    (LogP)wtScale * numAllWords;
}


/*
 * N-Best lists
 */

unsigned NBestList::initialSize = 100;

NBestList::NBestList(Vocab &vocab, unsigned maxSize,
				    Boolean multiwords, Boolean backtrace)
    : vocab(vocab), _numHyps(0),
      hypList(0, initialSize), maxSize(maxSize), multiwords(multiwords),
      backtrace(backtrace), acousticOffset(0.0)
{
}

/* 
 * Compute memory usage
 */
void
NBestList::memStats(MemStats &stats)
{
    stats.total += sizeof(*this) - sizeof(hypList);
    hypList.memStats(stats);

    /*
     * Add space taken up by hyp strings
     */
    for (unsigned h = 0; h < _numHyps; h++) {
	unsigned numWords = Vocab::length(hypList[h].words);
	stats.total += (numWords + 1) * sizeof(VocabIndex);
	if (hypList[h].wordInfo) {
	    stats.total += (numWords + 1) * sizeof(NBestWordInfo);
	}
    }
}

static int
compareHyps(const void *h1, const void *h2)
{
    LogP score1 = ((NBestHyp *)h1)->totalScore;
    LogP score2 = ((NBestHyp *)h2)->totalScore;
    
    return score1 > score2 ? -1 :
		score1 < score2 ? 1 : 0;
}

void
NBestList::sortHyps()
{
    /*
     * Sort the underlying array in place, in order of descending scores
     */
    qsort(hypList.data(), _numHyps, sizeof(NBestHyp), compareHyps);
}

Boolean
NBestList::read(File &file)
{
    char *line = file.getline();
    unsigned decipherFormat = 0;

    /*
     * If the first line contains the Decipher magic string
     * we enforce Decipher format for the entire N-best list.
     */
    if (line != 0) {
	if (strncmp(line, nbest1Magic, sizeof(nbest1Magic) - 1) == 0) {
	    decipherFormat = 1;
	    line = file.getline();
	} else if (strncmp(line, nbest2Magic, sizeof(nbest2Magic) - 1) == 0) {
	    decipherFormat = 2;
	    line = file.getline();
	}
    }

    unsigned int howmany = 0;

    while (line && (maxSize == 0 || howmany < maxSize)) {
	if (! hypList[howmany].parse(line, vocab, decipherFormat,
					acousticOffset, multiwords, backtrace))
	{
	    file.position() << "bad n-best hyp\n";
	    return false;
	}

	hypList[howmany].rank = howmany;

	howmany ++;

	line = file.getline();
    }

    _numHyps = howmany;

    return true;
}

Boolean
NBestList::write(File &file, Boolean decipherFormat, unsigned numHyps)
{
    if (decipherFormat) {
	fprintf(file, "%s\n", backtrace ? nbest2Magic : nbest1Magic);
    }

    for (unsigned h = 0;
	 h < _numHyps && (numHyps == 0 || h < numHyps);
	 h++)
    {
	hypList[h].write(file, vocab, decipherFormat, acousticOffset);
    }

    return true;
}

/*
 * Recompute total scores by recomputing LM scores and adding them to the
 * acoustic scores including a word transition penalty.
 */
void
NBestList::rescoreHyps(LM &lm, double lmScale, double wtScale)
{
    for (unsigned h = 0; h < _numHyps; h++) {
	hypList[h].rescore(lm, lmScale, wtScale);
    }
}

/*
 * Recompute total hyp scores using new scaling constants.
 */
void
NBestList::reweightHyps(double lmScale, double wtScale, double amScale)
{
    for (unsigned h = 0; h < _numHyps; h++) {
	hypList[h].reweight(lmScale, wtScale, amScale);
    }
}

/*
 * Compute posterior probabilities
 */
void
NBestList::computePosteriors(double lmScale, double wtScale,
					    double postScale, double amScale)
{
    /*
     * First compute the numerators for the posteriors
     */
    LogP2 totalNumerator = LogP_Zero;
    LogP scoreOffset;

    unsigned h;
    for (h = 0; h < _numHyps; h++) {
	NBestHyp &hyp = hypList[h];

	/*
	 * This way of computing the total score differs from 
	 * hyp.reweight() in that we're scaling back the acoustic
	 * scores, rather than scaling up the LM scores.
	 *
	 * Store the score back into the nbest list so we can
	 * sort on it later.
	 *
	 * The posterior weight is a parameter that controls the
	 * peakedness of the posterior distribution.
	 *
	 * As a special case, if all weights are zero, we compute the
	 * posterios directly from the stored aggregate scores.
	 */
	LogP totalScore;
	
	if (amScale == 0.0 && lmScale == 0.0 && wtScale == 0.0) {
	    totalScore = (LogP)(hyp.totalScore / postScale);
	} else {
	    totalScore = (LogP)((weightLogP(amScale, hyp.acousticScore) +
				weightLogP(lmScale, hyp.languageScore) +
				(LogP)wtScale * hyp.numWords) /
			     postScale);
	}

	/*
	 * To prevent underflow when converting LogP's to Prob's, we 
	 * subtract off the LogP of the first hyp.
	 * This is equivalent to a constant factor on all Prob's, which
	 * cancels in the normalization.
	 */
	if (h == 0) {
	    scoreOffset = totalScore;
	    totalScore = 0.0;
	} else {
	    totalScore -= scoreOffset;
	}

	/*
	 * temporarily store unnormalized log posterior in hyp
	 */
	hyp.posterior = totalScore;

	totalNumerator = AddLogP(totalNumerator, hyp.posterior);
    }

    /*
     * Normalize posteriors
     */
    for (h = 0; h < _numHyps; h++) {
	NBestHyp &hyp = hypList[h];

	hyp.posterior = LogPtoProb(hyp.posterior - totalNumerator);
    }
}

/*
 * Recompute acoustic scores by subtracting recognizer LM scores
 * from totals.
 */
void
NBestList::decipherFix(LM &lm, double lmScale, double wtScale)
{
    for (unsigned h = 0; h < _numHyps; h++) {
	hypList[h].decipherFix(lm, lmScale, wtScale);
    }
}

/*
 * Remove noise and pause words from hyps
 */
void
NBestList::removeNoise(LM &lm)
{
    NBestWordInfo endOfHyp;
    endOfHyp.word = Vocab_None;

    for (unsigned h = 0; h < _numHyps; h++) {
	lm.removeNoise(hypList[h].words);

	NBestWordInfo *wordInfo = hypList[h].wordInfo;

	// remove corresponding tokens from wordInfo array
	if (wordInfo) {
	    unsigned from, to;

	    for (from = 0, to = 0; wordInfo[from].word != Vocab_None; from ++) {
		if (wordInfo[from].word != vocab.pauseIndex() &&
		    !lm.noiseVocab.getWord(wordInfo[from].word))
		{
		    wordInfo[to++] = wordInfo[from];
		}
	    }
	    wordInfo[to] = endOfHyp;
	}
    }
}

/*
 * Normalize acoustic scores so that maximum is 0
 */
void
NBestList::acousticNorm()
{
    unsigned h;
    LogP maxScore;

    /*
     * Find maximum acoustic score
     */
    for (h = 0; h < _numHyps; h++) {
	if (h == 0 || hypList[h].acousticScore > maxScore) {
	    maxScore = hypList[h].acousticScore;
	}
    }

    /* 
     * Normalize all scores
     */
    for (h = 0; h < _numHyps; h++) {
	hypList[h].acousticScore -= maxScore;
	hypList[h].totalScore -= maxScore;
    }

    acousticOffset = maxScore;
}

/*
 * Restore acoustic scores to their un-normalized values
 */
void
NBestList::acousticDenorm()
{
    for (unsigned h = 0; h < _numHyps; h++) {
	hypList[h].acousticScore += acousticOffset;
	hypList[h].totalScore -= acousticOffset;
    }

    acousticOffset = 0.0;
}

/*
 * compute minimal word error of all hyps in the list
 * (and set hyp error counts)
 */
unsigned
NBestList::wordError(const VocabIndex *words,
				unsigned &sub, unsigned &ins, unsigned &del)
{
    unsigned minErr = (unsigned)(-1);

    for (unsigned h = 0; h < _numHyps; h++) {
	unsigned s, i, d;
	unsigned werr = ::wordError(hypList[h].words, words, s, i, d);

	if (h == 0 || werr < minErr) {
	    minErr = werr;
	    sub = s;
	    ins = i;
	    del = d;
	}

	hypList[h].numErrors = werr;
    }

    if (_numHyps == 0) {
	/* 
	 * If the n-best lists is empty we count all reference words as deleted.
	 */
	minErr = del = Vocab::length(words);
	sub = 0;
	ins = 0;
    }

    return minErr;
}

/*
 * Return hyp with minimum expected word error
 */
double
NBestList::minimizeWordError(VocabIndex *words, unsigned length,
				double &subs, double &inss, double &dels,
				unsigned maxRescore, Prob postPrune)
{
    /*
     * Compute expected word errors
     */
    double bestError;
    unsigned bestHyp;

    unsigned howmany = (maxRescore > 0) ? maxRescore : _numHyps;
    if (howmany > _numHyps) {
	howmany = _numHyps;
    }

    for (unsigned i = 0; i < howmany; i ++) {
	NBestHyp &hyp = getHyp(i);

	double totalErrors = 0.0;
	double totalSubs = 0.0;
	double totalInss = 0.0;
	double totalDels = 0.0;
	Prob totalPost = 0.0;

	for (unsigned j = 0; j < _numHyps; j ++) {
	    NBestHyp &otherHyp = getHyp(j);

	    if (i != j) {
		unsigned sub, ins, del;
		totalErrors += otherHyp.posterior *
			::wordError(hyp.words, otherHyp.words, sub, ins, del);
		totalSubs += otherHyp.posterior * sub;
		totalInss += otherHyp.posterior * ins;
		totalDels += otherHyp.posterior * del;
	    }

	    /*
	     * Optimization: if the partial accumulated error exceeds the
	     * current best error then this cannot be a new best.
	     */
	    if (i > 0 && totalErrors > bestError) {
		break;
	    }

	    /*
	     * Ignore hyps whose cummulative posterior mass is below threshold
	     */
	    totalPost += otherHyp.posterior;
	    if (postPrune > 0.0 && totalPost > 1.0 - postPrune) {
		break;
	    }
	}

	if (i == 0 || totalErrors < bestError) {
	    bestHyp = i;
	    bestError = totalErrors;
	    subs = totalSubs;
	    inss = totalInss;
	    dels = totalDels;
	}
    }

    if (debug(DEBUG_PRINT_RANK)) {
	cerr << "best hyp = " << bestHyp
	     << " post = " << getHyp(bestHyp).posterior
	     << " wer = " << bestError << endl;
    }

    if (howmany > 0) {
	for (unsigned j = 0; j < length; j ++) {
	    words[j] = getHyp(bestHyp).words[j];

	    if (words[j] == Vocab_None) break;
	}

	return bestError;
    } else {
	if (length > 0) {
	    words[0] = Vocab_None;
	}

	return 0.0;
    }
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -