📄 nbest-lattice.cc
字号:
if (debug >= DEBUG_ERRORS) {
if (sentid) cerr << sentid << " ";
cerr << "err " << errors
<< " sub " << subs
<< " ins " << inss
<< " del " << dels << endl;
}
}
}
void
computeWordErrors(const char *sentid, NBestList &nbestList,
const VocabIndex *reference)
{
unsigned numHyps = nbestList.numHyps();
unsigned howmany = (maxRescore > 0) ? maxRescore : numHyps;
if (howmany > numHyps) {
howmany = numHyps;
}
for (unsigned i = 0; i < howmany; i ++) {
unsigned sub, ins, del;
makeArray(WordAlignType, alignment,
Vocab::length(nbestList.getHyp(i).words) +
Vocab::length(reference) + 1);
unsigned numErrors = wordError(reference, nbestList.getHyp(i).words,
sub, ins, del, alignment);
if (sentid) cout << sentid << ":" << i << " ";
cout << numErrors;
for (unsigned j = 0; alignment[j] != END_ALIGN; j ++) {
cout << " " << ((alignment[j] == INS_ALIGN) ? "INS" :
(alignment[j] == DEL_ALIGN) ? "DEL" :
(alignment[j] == SUB_ALIGN) ? "SUB" : "CORR");
}
cout << endl;
}
}
/*
* Align a list of lattices
* a list of lines containing lattice filenames, followed by optional
* weights is read from file
*/
void
alignLattices(MultiAlign &lat, File &file)
{
char *line;
while (line = file.getline()) {
char *lname = strtok(line, wordSeparators);
if (!lname) continue;
double weight = 1.0;
char *wstring = strtok(0, wordSeparators);
if (wstring) {
sscanf(wstring, "%lf", &weight);
}
File lFile(lname, "r");
MultiAlign *newLat;
if (useMesh) {
newLat = new WordMesh(lat.vocab);
} else {
newLat = new WordLattice(lat.vocab);
}
assert(newLat != 0);
if (!newLat->read(lFile)) {
cerr << "format error in lattice file\n";
continue;
}
lat.alignAlignment(*newLat, weight);
delete newLat;
}
}
/*
* Process a single N-best list
*/
void
processNbest(NullLM &nullLM, const char *sentid, const char *nbestFile,
VocabMultiMap &dictionary, SubVocab &hiddenVocab,
const VocabIndex *reference,
const char *outLattice, const char *outNbest)
{
Vocab &vocab = nullLM.vocab;
MultiAlign *lat;
DictionaryAbsDistance dictDistance(vocab, dictionary);
SubVocabDistance subvocabDistance(vocab, hiddenVocab);
const char *latticeName = 0;
if (sentid != 0) {
latticeName = sentid;
} else if (nbestFile != 0) {
latticeName = idFromFilename(nbestFile);
}
if (useMesh) {
if (dictFile) {
lat = new WordMesh(vocab, latticeName, &dictDistance);
} else if (hiddenVocabFile) {
lat = new WordMesh(vocab, latticeName, &subvocabDistance);
} else {
lat = new WordMesh(vocab, latticeName);
}
} else {
lat = new WordLattice(vocab, latticeName);
}
assert(lat != 0);
/*
* Read preexisting lattice if specified
*/
if (readFile) {
File file(readFile, "r");
if (!lat->read(file)) {
cerr << "format error in lattice file\n";
exit(1);
}
}
/*
* Read list of other lattices, and merge with main lattice
*/
if (latticeFiles) {
File file(latticeFiles, "r");
alignLattices(*lat, file);
}
/*
* Process nbest list
*/
if (nbestFile) {
NBestList nbestList(vocab, maxNbest, multiwords,
nbestBacktrace || outputCTM);
nbestList.debugme(debug);
{
File input(nbestFile, "r");
if (!nbestList.read(input)) {
cerr << "format error in nbest list\n";
exit(1);
}
}
/*
* Remove pauses and noise from nbest hyps since these would
* confuse the inter-hyp alignments.
*/
if (!keepNoise) {
nbestList.removeNoise(nullLM);
}
/*
* Compute nbest error relative to reference
*/
if (reference && computeNbestError) {
unsigned sub, ins, del;
unsigned err = nbestList.wordError(reference, sub, ins, del);
if (sentid) cout << sentid << " ";
cout << err
<< " sub " << sub
<< " ins " << ins
<< " del " << del
<< " words " << Vocab::length(reference) << endl;
} else if (werRescore) {
/*
* Word error rescoring
*/
wordErrorRescore(sentid, nbestList);
} else if (!noRescore) {
/*
* Lattice building (and rescoring)
*/
latticeRescore(sentid, *lat, nbestList, reference);
}
if (reference && dumpErrors) {
computeWordErrors(sentid, nbestList, reference);
}
if (outNbest) {
File output(outNbest, "w");
nbestList.write(output, writeDecipherNbest);
}
}
/*
* Compute word error of lattice relative to reference hyps
*/
if (reference && computeLatticeError) {
unsigned sub, ins, del;
unsigned err = lat->wordError(reference, sub, ins, del);
if (sentid) cout << sentid << " ";
cout << err
<< " sub " << sub
<< " ins " << ins
<< " del " << del
<< " words " << Vocab::length(reference) << endl;
}
/*
* If reference words are known, record them in alignment
*/
if (reference && !computeNbestError) {
lat->alignReference(reference);
}
if (outLattice) {
File file(outLattice, "w");
lat->write(file);
}
delete lat;
}
int
main (int argc, char *argv[])
{
setlocale(LC_CTYPE, "");
setlocale(LC_COLLATE, "");
Opt_Parse(argc, argv, options, Opt_Number(options), 0);
if (version) {
printVersion(RcsId);
exit(0);
}
if (primeWith1best || primeWithRefs) {
primeLattice = 1;
}
Vocab vocab;
NullLM nullLM(vocab);
if (vocabFile) {
File file(vocabFile, "r");
vocab.read(file);
}
vocab.toLower() = toLower ? true : false;
/*
* Skip noise tags in scoring
*/
if (noiseVocabFile) {
File file(noiseVocabFile, "r");
nullLM.noiseVocab.read(file);
}
if (noiseTag) { /* backward compatibility */
nullLM.noiseVocab.addWord(noiseTag);
}
/*
* Posterior scaling: if not specified (= 0.0) use LMW for
* backward compatibility.
*/
if (posteriorScale == 0.0) {
posteriorScale = (rescoreLMW == 0.0) ? 1.0 : rescoreLMW;
}
/*
* Default weights for posterior computation are same as for rescoring
*/
if (posteriorLMW == undefinedWeight) {
posteriorLMW = rescoreLMW;
}
if (posteriorWTW == undefinedWeight) {
posteriorWTW = rescoreWTW;
}
Vocab dictVocab;
VocabMultiMap dictionary(vocab, dictVocab);
/*
* Read optional dictionary to help in word alignment
*/
if (dictFile) {
File file(dictFile, "r");
if (!dictionary.read(file)) {
cerr << "format error in dictionary file\n";
exit(1);
}
}
/*
* Optionally read a subvocabulary that is to be kept separate from
* regular words during alignment
*/
SubVocab hiddenVocab(vocab);
if (hiddenVocabFile) {
File file(hiddenVocabFile, "r");
hiddenVocab.read(file);
}
/*
* Read reference words
*/
VocabIndex *reference = 0;
if (refString) {
reference = new VocabIndex[maxWordsPerLine + 1];
assert(reference != 0);
VocabString refWords[maxWordsPerLine + 1];
unsigned numWords =
Vocab::parseWords(refString, refWords, maxWordsPerLine);
if (numWords == maxWordsPerLine) {
cerr << "more than " << maxWordsPerLine << " reference words\n";
exit(1);
}
vocab.addWords(refWords, reference, maxWordsPerLine + 1);
} else if (rescoreFile || !nbestFiles) {
if (dumpErrors || computeNbestError || computeLatticeError) {
cerr << "cannot compute errors without reference\n";
exit(1);
}
}
/*
* Process single nbest file
*/
if (rescoreFile) {
processNbest(nullLM, 0, rescoreFile, dictionary, hiddenVocab, reference,
writeFile, writeNbestFile);
} else if (!nbestFiles) {
/*
* If neither -nbest nor -nbest-files was specified
* do lattice processing only.
*/
processNbest(nullLM, 0, 0, dictionary, hiddenVocab, reference,
writeFile, writeNbestFile);
}
/*
* Read list of nbest filenames
*/
if (nbestFiles) {
RefList refs(vocab);
if (refFile) {
File file(refFile, "r");
refs.read(file, true); // add reference words to vocabulary
} else {
if (dumpErrors || computeNbestError || computeLatticeError) {
cerr << "cannot compute errors without reference\n";
exit(1);
}
}
File file(nbestFiles, "r");
char *line;
while (line = file.getline()) {
char *fname = strtok(line, wordSeparators);
if (!fname) continue;
RefString sentid = idFromFilename(fname);
VocabIndex *reference = 0;
if (refFile) {
reference = refs.findRef(sentid);
if (!reference) {
cerr << "no reference for " << sentid << endl;
if (dumpErrors || computeNbestError || computeLatticeError)
{
continue;
}
}
}
makeArray(char, writeLatticeName ,
(writeDir ? strlen(writeDir) : 0) + 1
+ strlen(sentid) + strlen(GZIP_SUFFIX) + 1);
if (writeDir) {
sprintf(writeLatticeName, "%s/%s%s", writeDir, sentid,
GZIP_SUFFIX);
}
makeArray(char, writeNbestName,
(writeNbestDir ? strlen(writeNbestDir) : 0) + 1
+ strlen(sentid) + strlen(GZIP_SUFFIX) + 1);
if (writeNbestDir) {
sprintf(writeNbestName, "%s/%s%s", writeNbestDir, sentid,
GZIP_SUFFIX);
}
processNbest(nullLM, sentid, fname, dictionary, hiddenVocab,
reference,
writeDir ? (char *)writeLatticeName : 0,
writeNbestDir ? (char *)writeNbestName : 0);
}
}
if (writeVocabFile) {
File file(writeVocabFile, "w");
vocab.write(file);
}
exit(0);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -