📄 nbest-pron-score.cc
字号:
/*
* nbest-pron-score --
* Score pronunciations and pauses in N-best hypotheses
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 2002-2006 SRI International. All Rights Reserved.";
static char RcsId[] = "@(#)$Id: nbest-pron-score.cc,v 1.8 2006/01/05 08:44:25 stolcke Exp $";
#endif
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <locale.h>
#include <assert.h>
#include "option.h"
#include "version.h"
#include "File.h"
#include "zio.h"
#include "Array.cc"
#include "Prob.h"
#include "MultiwordVocab.h"
#include "NBest.h"
#include "Ngram.h"
#include "VocabMultiMap.h"
#include "RefList.h"
#define DEBUG_SCORES 2
static int version = 0;
static unsigned debug = 0;
static int toLower = 0;
static int multiwords = 0;
static char *rescoreFile = 0;
static char *nbestFiles = 0;
static char *pauseLMFile = 0;
static char *dictFile = 0;
static char *pronScoreDir = 0;
static char *pauseScoreDir = 0;
static double pauseScoreWeight = 0.0;
static unsigned maxNbest = 0;
static int intlogs = 0;
static char *noPauseTag = (char *)"<nopause>";
static char *shortPauseTag = (char *)"<shortpause>";
static char *longPauseTag = (char *)"<longpause>";
static double minPauseDur = 0.06;
static double longPauseDur = 0.6;
static VocabIndex noPauseIndex;
static VocabIndex shortPauseIndex;
static VocabIndex longPauseIndex;
static Option options[] = {
{ OPT_TRUE, "version", &version, "print version information" },
{ OPT_UINT, "debug", &debug, "debugging level" },
{ OPT_TRUE, "tolower", &toLower, "map vocabulary to lowercase" },
{ OPT_TRUE, "multiwords", &multiwords, "split multiwords in N-best hyps" },
{ OPT_STRING, "rescore", &rescoreFile, "hyp stream input file to rescore" },
{ OPT_STRING, "nbest", &rescoreFile, "same as -rescore" },
{ OPT_STRING, "nbest-files", &nbestFiles, "list of n-best filenames" },
{ OPT_UINT, "max-nbest", &maxNbest, "maximum number of hyps to consider" },
{ OPT_STRING, "dictionary", &dictFile, "pronunciation dictionary" },
{ OPT_STRING, "pause-lm", &pauseLMFile, "pause language model" },
{ OPT_STRING, "pron-score-dir", &pronScoreDir, "pronunciation score directory" },
{ OPT_STRING, "pause-score-dir", &pauseScoreDir, "pause score directory" },
{ OPT_FLOAT, "pause-score-weight", &pauseScoreWeight, "pause score weight for adding with pron scores" },
{ OPT_TRUE, "intlogs", &intlogs, "dictionary uses intlog probabilities" },
{ OPT_STRING, "no-pause", &noPauseTag, "no pause tag" },
{ OPT_STRING, "short-pause", &shortPauseTag, "short pause tag" },
{ OPT_STRING, "long-pause", &longPauseTag, "long pause tag" },
{ OPT_FLOAT, "min-pause-dur", &minPauseDur, "minumum pause duration" },
{ OPT_FLOAT, "long-pause-dur", &longPauseDur, "long pause duration" },
};
VocabIndex
getPauseTag(NBestTimestamp pauseLength)
{
if (pauseLength < minPauseDur) {
return noPauseIndex;
} else if (pauseLength >= longPauseDur) {
return longPauseIndex;
} else {
return shortPauseIndex;
}
}
void
writeScores(LogP *scores, unsigned numScores, const char *filename)
{
File file(filename, "w");
for (unsigned i = 0; i < numScores; i ++) {
fprintf(file, "%g\n", scores[i]);
}
}
/*
* Rescore one N-best list with pronunciation and pause models
*/
void
processNbest(const char *nbestFile, MultiwordVocab &vocab,
VocabMultiMap &dictionary, Ngram &pauseLM,
const char *pronScoreFile, const char *pauseScoreFile,
double pauseScoreWeight)
{
Vocab &phoneVocab = dictionary.vocab2;
NBestList nbest(vocab, maxNbest, false, true);
{
File file(nbestFile, "r");
if (!nbest.read(file)) {
cerr << "error reading nbest file\n";
return;
}
}
unsigned numHyps = nbest.numHyps();
if (numHyps == 0) {
cerr << "warning: N-best list " << nbestFile << " is empty\n";
return;
}
makeArray(LogP, pronScores, numHyps);
makeArray(LogP, pauseScores, numHyps);
Boolean warning = false;
for (unsigned h = 0; h < numHyps; h ++) {
NBestHyp &hyp = nbest.getHyp(h);
if (hyp.wordInfo == 0) {
if (!warning) {
cerr << "warning: N-best hyp " << h << " in "
<< nbestFile << " does not contain backtrace info\n";
warning = true;
}
continue;
}
/*
* compute pronunciation score:
* sum of pronunciation log probabilites of all words in hyp
*/
if (pronScoreFile) {
LogP pronScore = LogP_One;
for (unsigned i = 0; hyp.words[i] != Vocab_None; i ++) {
/*
* If pronunciation info is missing there is nothing we
* can score. Assume pronunciation prob = 1.
*/
if (hyp.wordInfo[i].phones == 0) {
continue;
}
/*
* copy phone string to buffer for parsing
*/
makeArray(char, phoneString,
strlen(hyp.wordInfo[i].phones) + 1);
strcpy(phoneString, hyp.wordInfo[i].phones);
/*
* convert phone string to index string
*/
Array<VocabIndex> phones;
unsigned numPhones = 0;
for (char *s = strtok(phoneString, phoneSeparator);
s != 0;
s = strtok(NULL, phoneSeparator), numPhones ++)
{
phones[numPhones] = phoneVocab.addWord(s);
}
phones[numPhones] = Vocab_None;
/*
* find pronunciations prob
*/
Prob p = dictionary.get(hyp.words[i], phones.data());
if (debug >= DEBUG_SCORES) {
cerr << "WORD " << vocab.getWord(hyp.words[i])
<< " PRON " << (phoneVocab.use(), phones.data())
<< " PROB " << p << endl;
}
if (p != 0.0) {
if (intlogs) {
pronScore += IntlogToLogP(p);
} else {
pronScore += ProbToLogP(p);
}
}
}
pronScores[h] = pronScore;
}
/*
* compute pause score:
* sum of pause LM log probabilites of all pauses in hyp
*/
if (pauseScoreFile || pauseScoreWeight != 0) {
LogP pauseScore = LogP_One;
VocabIndex lastWord = Vocab_None;
NBestTimestamp pauseLength = 0;
for (unsigned i = 0; hyp.words[i] != Vocab_None; i ++) {
if (hyp.words[i] == vocab.pauseIndex()) {
pauseLength += hyp.wordInfo[i].duration;
} else {
VocabIndex context[3];
context[0] = Vocab_None;
VocabIndex firstPart, lastPart;
if (!multiwords ||
pauseLM.findProb(hyp.words[i], context) != 0)
{
firstPart = lastPart = hyp.words[i];
} else {
context[0] = hyp.words[i];
context[1] = Vocab_None;
VocabIndex expanded[maxWordsPerLine + 1];
unsigned n = vocab.expandMultiwords(context, expanded,
maxWordsPerLine);
firstPart = expanded[0];
lastPart = expanded[n - 1];
}
if (lastWord != Vocab_None) {
context[0] = lastWord;
context[1] = firstPart;
context[2] = Vocab_None;
VocabIndex pauseTag = getPauseTag(pauseLength);
LogP pauseProb = pauseLM.wordProb(pauseTag, context);
if (debug >= DEBUG_SCORES) {
cerr << "PAUSE " << vocab.getWord(pauseTag)
<< " DUR " << pauseLength
<< " CONTEXT " << (vocab.use(), context)
<< " PROB " << pauseProb << endl;
}
pauseScore += pauseProb;
}
pauseLength = 0.0;
lastWord = lastPart;
}
}
if (pauseScoreFile) {
pauseScores[h] = pauseScore;
}
if (pauseScoreWeight != 0.0) {
pronScores[h] += pauseScoreWeight * pauseScore;
}
}
}
if (pronScoreFile) {
writeScores(pronScores, numHyps, pronScoreFile);
}
if (pauseScoreFile) {
writeScores(pauseScores, numHyps, pauseScoreFile);
}
}
int
main (int argc, char *argv[])
{
setlocale(LC_CTYPE, "");
setlocale(LC_COLLATE, "");
Opt_Parse(argc, argv, options, Opt_Number(options), 0);
if (version) {
printVersion(RcsId);
exit(0);
}
MultiwordVocab vocab;
vocab.toLower() = toLower ? true : false;
noPauseIndex = vocab.addWord(noPauseTag);
shortPauseIndex = vocab.addWord(shortPauseTag);
longPauseIndex = vocab.addWord(longPauseTag);
Vocab dictVocab;
VocabMultiMap dictionary(vocab, dictVocab, intlogs);
/*
* Read optional dictionary to help in word alignment
*/
if (dictFile) {
File file(dictFile, "r");
if (!dictionary.read(file)) {
cerr << "format error in dictionary file\n";
exit(1);
}
}
Ngram pauseLM(vocab, 3);
pauseLM.debugme(debug);
if (pauseLMFile) {
File file(pauseLMFile, "r");
if (!pauseLM.read(file)) {
cerr << "format error in pause LM\n";
exit(1);
}
}
/*
* Process single nbest file
*/
if (rescoreFile) {
processNbest(rescoreFile, vocab, dictionary, pauseLM,
dictFile ? "-" : 0, pauseLMFile ? "-" : 0,
pauseScoreWeight);
}
/*
* Read list of nbest filenames
*/
if (nbestFiles) {
File file(nbestFiles, "r");
char *line;
while (line = file.getline()) {
char *fname = strtok(line, wordSeparators);
if (!fname) continue;
RefString sentid = idFromFilename(fname);
makeArray(char, pronScoreFile,
(pronScoreDir ? strlen(pronScoreDir) : 0) + 1
+ strlen(sentid) + strlen(GZIP_SUFFIX) + 1);
if (pronScoreDir) {
sprintf(pronScoreFile, "%s/%s%s", pronScoreDir, sentid,
GZIP_SUFFIX);
}
makeArray(char, pauseScoreFile,
(pauseScoreDir ? strlen(pauseScoreDir) : 0) + 1
+ strlen(sentid) + strlen(GZIP_SUFFIX) + 1);
if (pauseScoreDir) {
sprintf(pauseScoreFile, "%s/%s%s", pauseScoreDir, sentid,
GZIP_SUFFIX);
}
processNbest(fname, vocab, dictionary, pauseLM,
pronScoreDir ? (char *)pronScoreFile : 0,
pauseScoreDir ? (char *)pauseScoreFile : 0,
pauseScoreWeight);
}
}
exit(0);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -