📄 nbest.cc
字号:
vocab.addWords(justWords, words, actualNumWords + 1);
}
if (decipherFormat == 2 && backtrace) {
delete [] wordInfo;
wordInfo = new NBestWordInfo[actualNumWords + 1];
for (unsigned j = 0; j < actualNumWords; j ++) {
wordInfo[j] = backtraceInfo[j];
wordInfo[j].word = words[j];
}
wordInfo[actualNumWords].word = Vocab_None;
} else {
wordInfo = 0;
}
} else {
unsigned i = 0;
for (unsigned j = 0; justWords[j] != 0; j ++) {
char *start = (char *)justWords[j];
char *cp;
while (cp = strchr(start, multiwordSeparator)) {
*cp = '\0';
words[i++] =
unkIsWord ? vocab.getIndex(start, vocab.unkIndex())
: vocab.addWord(start);
*cp = multiwordSeparator;
start = cp + 1;
}
words[i++] =
unkIsWord ? vocab.getIndex(start, vocab.unkIndex())
: vocab.addWord(start);
}
words[i] = Vocab_None;
}
return true;
}
void
NBestHyp::write(File &file, Vocab &vocab, Boolean decipherFormat,
LogP acousticOffset)
{
if (decipherFormat) {
fprintf(file, "(%d)", (int)LogPtoBytelog(totalScore + acousticOffset));
} else {
fprintf(file, "%g %g %d", acousticScore + acousticOffset,
languageScore, numWords);
}
for (unsigned i = 0; words[i] != Vocab_None; i++) {
/*
* Write backtrace information if known and Decipher format is desired
*/
if (decipherFormat && wordInfo) {
fprintf(file, " %s ( st: %.2f et: %.2f g: %d a: %d )",
vocab.getWord(wordInfo[i].word),
wordInfo[i].start,
wordInfo[i].start+wordInfo[i].duration - frameLength,
(int)LogPtoBytelog(wordInfo[i].languageScore),
(int)LogPtoBytelog(wordInfo[i].acousticScore));
} else {
fprintf(file, " %s", vocab.getWord(words[i]));
}
}
fprintf(file, "\n");
}
void
NBestHyp::rescore(LM &lm, double lmScale, double wtScale)
{
TextStats stats;
/*
* LM score is recomputed,
* numWords is set to take non-word tokens into account
*/
languageScore = weightLogP(lmScale, lm.sentenceProb(words, stats));
numWords = stats.numWords;
/*
* Note: In the face of zero probaility words we do NOT
* set the LM probability to zero. These cases typically
* reflect a vocabulary mismatch between the rescoring LM
* and the recognizer, and it is more useful to rescore based on
* the known words alone. The warning hopefull will cause
* someone to asssess the problem.
*/
if (stats.zeroProbs > 0) {
cerr << "warning: hyp contains zero prob words: "
<< (lm.vocab.use(), words) << endl;
}
if (stats.numOOVs > 0) {
cerr << "warning: hyp contains OOV words: "
<< (lm.vocab.use(), words) << endl;
}
totalScore = (LogP)(acousticScore +
languageScore +
wtScale * numWords);
}
void
NBestHyp::reweight(double lmScale, double wtScale, double amScale)
{
totalScore = weightLogP(amScale, acousticScore) +
weightLogP(lmScale, languageScore) +
(LogP)wtScale * numWords;
}
void
NBestHyp::decipherFix(LM &lm, double lmScale, double wtScale)
{
TextStats stats;
/*
* LM score is recomputed,
* numWords is set to take non-word tokens into account
*/
languageScore = weightLogP(lmScale, lm.sentenceProb(words, stats));
numWords = stats.numWords;
/*
* Arguably a bug, but Decipher actually applies WTW to pauses.
* So we have to do the same when subtracting the non-acoustic
* scores below.
*/
unsigned numAllWords = Vocab::length(words);
if (stats.zeroProbs > 0) {
cerr << "warning: hyp contains zero prob words: "
<< (lm.vocab.use(), words) << endl;
languageScore = LogP_Zero;
}
if (stats.numOOVs > 0) {
cerr << "warning: hyp contains OOV words: "
<< (lm.vocab.use(), words) << endl;
languageScore = LogP_Zero;
}
acousticScore = totalScore -
languageScore -
(LogP)wtScale * numAllWords;
}
/*
* N-Best lists
*/
unsigned NBestList::initialSize = 100;
NBestList::NBestList(Vocab &vocab, unsigned maxSize,
Boolean multiwords, Boolean backtrace)
: vocab(vocab), _numHyps(0),
hypList(0, initialSize), maxSize(maxSize), multiwords(multiwords),
backtrace(backtrace), acousticOffset(0.0)
{
}
/*
* Compute memory usage
*/
void
NBestList::memStats(MemStats &stats)
{
stats.total += sizeof(*this) - sizeof(hypList);
hypList.memStats(stats);
/*
* Add space taken up by hyp strings
*/
for (unsigned h = 0; h < _numHyps; h++) {
unsigned numWords = Vocab::length(hypList[h].words);
stats.total += (numWords + 1) * sizeof(VocabIndex);
if (hypList[h].wordInfo) {
stats.total += (numWords + 1) * sizeof(NBestWordInfo);
}
}
}
static int
compareHyps(const void *h1, const void *h2)
{
LogP score1 = ((NBestHyp *)h1)->totalScore;
LogP score2 = ((NBestHyp *)h2)->totalScore;
return score1 > score2 ? -1 :
score1 < score2 ? 1 : 0;
}
void
NBestList::sortHyps()
{
/*
* Sort the underlying array in place, in order of descending scores
*/
qsort(hypList.data(), _numHyps, sizeof(NBestHyp), compareHyps);
}
Boolean
NBestList::read(File &file)
{
char *line = file.getline();
unsigned decipherFormat = 0;
/*
* If the first line contains the Decipher magic string
* we enforce Decipher format for the entire N-best list.
*/
if (line != 0) {
if (strncmp(line, nbest1Magic, sizeof(nbest1Magic) - 1) == 0) {
decipherFormat = 1;
line = file.getline();
} else if (strncmp(line, nbest2Magic, sizeof(nbest2Magic) - 1) == 0) {
decipherFormat = 2;
line = file.getline();
}
}
unsigned int howmany = 0;
while (line && (maxSize == 0 || howmany < maxSize)) {
if (! hypList[howmany].parse(line, vocab, decipherFormat,
acousticOffset, multiwords, backtrace))
{
file.position() << "bad n-best hyp\n";
return false;
}
hypList[howmany].rank = howmany;
howmany ++;
line = file.getline();
}
_numHyps = howmany;
return true;
}
Boolean
NBestList::write(File &file, Boolean decipherFormat, unsigned numHyps)
{
if (decipherFormat) {
fprintf(file, "%s\n", backtrace ? nbest2Magic : nbest1Magic);
}
for (unsigned h = 0;
h < _numHyps && (numHyps == 0 || h < numHyps);
h++)
{
hypList[h].write(file, vocab, decipherFormat, acousticOffset);
}
return true;
}
/*
* Recompute total scores by recomputing LM scores and adding them to the
* acoustic scores including a word transition penalty.
*/
void
NBestList::rescoreHyps(LM &lm, double lmScale, double wtScale)
{
for (unsigned h = 0; h < _numHyps; h++) {
hypList[h].rescore(lm, lmScale, wtScale);
}
}
/*
* Recompute total hyp scores using new scaling constants.
*/
void
NBestList::reweightHyps(double lmScale, double wtScale, double amScale)
{
for (unsigned h = 0; h < _numHyps; h++) {
hypList[h].reweight(lmScale, wtScale, amScale);
}
}
/*
* Compute posterior probabilities
*/
void
NBestList::computePosteriors(double lmScale, double wtScale,
double postScale, double amScale)
{
/*
* First compute the numerators for the posteriors
*/
LogP2 totalNumerator = LogP_Zero;
LogP scoreOffset;
unsigned h;
for (h = 0; h < _numHyps; h++) {
NBestHyp &hyp = hypList[h];
/*
* This way of computing the total score differs from
* hyp.reweight() in that we're scaling back the acoustic
* scores, rather than scaling up the LM scores.
*
* Store the score back into the nbest list so we can
* sort on it later.
*
* The posterior weight is a parameter that controls the
* peakedness of the posterior distribution.
*
* As a special case, if all weights are zero, we compute the
* posterios directly from the stored aggregate scores.
*/
LogP totalScore;
if (amScale == 0.0 && lmScale == 0.0 && wtScale == 0.0) {
totalScore = (LogP)(hyp.totalScore / postScale);
} else {
totalScore = (LogP)((weightLogP(amScale, hyp.acousticScore) +
weightLogP(lmScale, hyp.languageScore) +
(LogP)wtScale * hyp.numWords) /
postScale);
}
/*
* To prevent underflow when converting LogP's to Prob's, we
* subtract off the LogP of the first hyp.
* This is equivalent to a constant factor on all Prob's, which
* cancels in the normalization.
*/
if (h == 0) {
scoreOffset = totalScore;
totalScore = 0.0;
} else {
totalScore -= scoreOffset;
}
/*
* temporarily store unnormalized log posterior in hyp
*/
hyp.posterior = totalScore;
totalNumerator = AddLogP(totalNumerator, hyp.posterior);
}
/*
* Normalize posteriors
*/
for (h = 0; h < _numHyps; h++) {
NBestHyp &hyp = hypList[h];
hyp.posterior = LogPtoProb(hyp.posterior - totalNumerator);
}
}
/*
* Recompute acoustic scores by subtracting recognizer LM scores
* from totals.
*/
void
NBestList::decipherFix(LM &lm, double lmScale, double wtScale)
{
for (unsigned h = 0; h < _numHyps; h++) {
hypList[h].decipherFix(lm, lmScale, wtScale);
}
}
/*
* Remove noise and pause words from hyps
*/
void
NBestList::removeNoise(LM &lm)
{
NBestWordInfo endOfHyp;
endOfHyp.word = Vocab_None;
for (unsigned h = 0; h < _numHyps; h++) {
lm.removeNoise(hypList[h].words);
NBestWordInfo *wordInfo = hypList[h].wordInfo;
// remove corresponding tokens from wordInfo array
if (wordInfo) {
unsigned from, to;
for (from = 0, to = 0; wordInfo[from].word != Vocab_None; from ++) {
if (wordInfo[from].word != vocab.pauseIndex() &&
!lm.noiseVocab.getWord(wordInfo[from].word))
{
wordInfo[to++] = wordInfo[from];
}
}
wordInfo[to] = endOfHyp;
}
}
}
/*
* Normalize acoustic scores so that maximum is 0
*/
void
NBestList::acousticNorm()
{
unsigned h;
LogP maxScore;
/*
* Find maximum acoustic score
*/
for (h = 0; h < _numHyps; h++) {
if (h == 0 || hypList[h].acousticScore > maxScore) {
maxScore = hypList[h].acousticScore;
}
}
/*
* Normalize all scores
*/
for (h = 0; h < _numHyps; h++) {
hypList[h].acousticScore -= maxScore;
hypList[h].totalScore -= maxScore;
}
acousticOffset = maxScore;
}
/*
* Restore acoustic scores to their un-normalized values
*/
void
NBestList::acousticDenorm()
{
for (unsigned h = 0; h < _numHyps; h++) {
hypList[h].acousticScore += acousticOffset;
hypList[h].totalScore -= acousticOffset;
}
acousticOffset = 0.0;
}
/*
* compute minimal word error of all hyps in the list
* (and set hyp error counts)
*/
unsigned
NBestList::wordError(const VocabIndex *words,
unsigned &sub, unsigned &ins, unsigned &del)
{
unsigned minErr = (unsigned)(-1);
for (unsigned h = 0; h < _numHyps; h++) {
unsigned s, i, d;
unsigned werr = ::wordError(hypList[h].words, words, s, i, d);
if (h == 0 || werr < minErr) {
minErr = werr;
sub = s;
ins = i;
del = d;
}
hypList[h].numErrors = werr;
}
if (_numHyps == 0) {
/*
* If the n-best lists is empty we count all reference words as deleted.
*/
minErr = del = Vocab::length(words);
sub = 0;
ins = 0;
}
return minErr;
}
/*
* Return hyp with minimum expected word error
*/
double
NBestList::minimizeWordError(VocabIndex *words, unsigned length,
double &subs, double &inss, double &dels,
unsigned maxRescore, Prob postPrune)
{
/*
* Compute expected word errors
*/
double bestError;
unsigned bestHyp;
unsigned howmany = (maxRescore > 0) ? maxRescore : _numHyps;
if (howmany > _numHyps) {
howmany = _numHyps;
}
for (unsigned i = 0; i < howmany; i ++) {
NBestHyp &hyp = getHyp(i);
double totalErrors = 0.0;
double totalSubs = 0.0;
double totalInss = 0.0;
double totalDels = 0.0;
Prob totalPost = 0.0;
for (unsigned j = 0; j < _numHyps; j ++) {
NBestHyp &otherHyp = getHyp(j);
if (i != j) {
unsigned sub, ins, del;
totalErrors += otherHyp.posterior *
::wordError(hyp.words, otherHyp.words, sub, ins, del);
totalSubs += otherHyp.posterior * sub;
totalInss += otherHyp.posterior * ins;
totalDels += otherHyp.posterior * del;
}
/*
* Optimization: if the partial accumulated error exceeds the
* current best error then this cannot be a new best.
*/
if (i > 0 && totalErrors > bestError) {
break;
}
/*
* Ignore hyps whose cummulative posterior mass is below threshold
*/
totalPost += otherHyp.posterior;
if (postPrune > 0.0 && totalPost > 1.0 - postPrune) {
break;
}
}
if (i == 0 || totalErrors < bestError) {
bestHyp = i;
bestError = totalErrors;
subs = totalSubs;
inss = totalInss;
dels = totalDels;
}
}
if (debug(DEBUG_PRINT_RANK)) {
cerr << "best hyp = " << bestHyp
<< " post = " << getHyp(bestHyp).posterior
<< " wer = " << bestError << endl;
}
if (howmany > 0) {
for (unsigned j = 0; j < length; j ++) {
words[j] = getHyp(bestHyp).words[j];
if (words[j] == Vocab_None) break;
}
return bestError;
} else {
if (length > 0) {
words[0] = Vocab_None;
}
return 0.0;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -