📄 lattice-tool.cc
字号:
* if specified
*/
if (hiddenVocabFile) {
wordDistance = new SubVocabDistance(vocab, hiddenVocab);
assert(wordDistance!= 0);
} else if (dictFile && dictAlign) {
wordDistance = new DictionaryAbsDistance(vocab, dictionary);
assert(wordDistance != 0);
}
WordMesh mesh(vocab, lat.getName(), wordDistance);
/*
* Preserve acoustic information in word mesh if requested,
* or if needed for CTM generation.
*/
lat.alignLattice(mesh, noiseWords, posteriorScale,
acousticMesh || outputCTM);
if (posteriorDecode) {
/*
* Recover best hyp from lattice
*/
unsigned maxLength = mesh.length();
double subs, inss, dels;
if (outputCTM) {
NBestWordInfo *bestWords = new NBestWordInfo[maxLength + 1];
assert(bestWords != 0);
double errors = mesh.minimizeWordError(bestWords, maxLength + 1,
subs, inss, dels);
printCTM(vocab, bestWords, lat.getName());
delete [] bestWords;
} else {
makeArray(VocabIndex, bestWords, maxLength + 1);
double errors = mesh.minimizeWordError(bestWords, maxLength + 1,
subs, inss, dels);
cout << lat.getName() << " "
<< (mesh.vocab.use(), bestWords) << endl;
}
}
if (refIndices && (writeMesh || writeMeshDir)) {
mesh.alignReference(refIndices);
}
if (writeMesh) {
File file(writeMesh, "w");
mesh.write(file);
}
if (writeMeshDir) {
makeArray(char, outfile,
strlen(writeMeshDir) + 1 +
strlen(lat.getName()) + sizeof(GZIP_SUFFIX));
sprintf(outfile, "%s/%s%s", writeMeshDir,
lat.getName(), GZIP_SUFFIX);
File file(outfile, "w");
mesh.write(file);
}
delete wordDistance;
}
if (writeNgrams) {
lat.countNgrams(order, ngramCounts, posteriorScale);
}
if (computePosteriors) {
lat.computePosteriors(posteriorScale, true);
}
if (density) {
double d = lat.computeDensity();
if (d == HUGE_VAL) {
cerr << "WARNING: duration for lattice " << inLat << " unknown\n";
} else {
cout << lat.getName() << " " << lat.computeDensity() << endl;
}
}
if (connectivity) {
if (!lat.areConnected(lat.getInitial(), lat.getFinal())) {
cerr << "WARNING: lattice " << inLat << " is not connected\n";
#ifndef NO_TIMEOUT
alarm(0);
#endif
return;
}
}
if (nodeEntropy) {
lat.computeNodeEntropy();
}
if (indexName) {
File indexFile(indexName, "w");
lat.printNodeIndexNamePair(indexFile);
}
if (refFile || refList) {
if (refIndices) {
unsigned numWords = vocab.length(refIndices);
if (!numWords) {
cerr << "WARNING: reference has 0 length\n";
}
unsigned total, sub, ins, del;
total = lat.latticeWER(refIndices, sub, ins, del, noiseWords);
cout << "sub " << sub
<< " ins " << ins
<< " del " << del
<< " wer " << total
<< " words " << numWords
<< endl;
} else {
cerr << "FATAL ERROR: reference is missing for lattice "
<< inLat << endl;
}
}
if (addRefsProb != 0.0) {
if (refIndices) {
lat.addWords(refIndices, addRefsProb, !noPause);
} else if (!(refFile || refList)) {
cerr << "FATAL ERROR: reference is missing for lattice "
<< inLat << endl;
}
}
if (noNulls) {
lat.removeAllXNodes(Vocab_None);
}
if (simpleReduction || simpleReductionIter) {
cerr << "reducing input lattices (overlap ratio = "
<< overlapRatio << ")\n";
if (overlapRatio == 0.0) {
// if reduceBeforePruning is specified then merged transitions probs
// should be addded, not maxed
lat.simplePackBigramLattice(simpleReductionIter, reduceBeforePruning);
} else {
lat.approxRedBigramLattice(simpleReductionIter, overlapBase,
overlapRatio);
}
}
if (posteriorPruneThreshold > 0 && reduceBeforePruning) {
if (!lat.prunePosteriors(posteriorPruneThreshold, posteriorScale,
densityPruneThreshold, nodesPruneThreshold,
fastPrune))
{
cerr << "WARNING: posterior pruning of lattice " << inLat
<< " failed\n";
#ifndef NO_TIMEOUT
alarm(0);
#endif
return;
}
}
Boolean recoverPauses = false;
if (noPause) {
lat.removeAllXNodes(vocab.pauseIndex());
}
if (maxNodes > 0 && lat.getNumNodes() > maxNodes) {
cerr << "WARNING: processing lattice " << inLat
<< " aborted -- too many nodes after reduction: "
<< lat.getNumNodes() << endl;
#ifndef NO_TIMEOUT
alarm(0);
#endif
return;
}
// by default we leave HTK lattices scores alone
HTKScoreMapping htkScoreMapping = mapHTKnone;
if (!readHTK) {
// preserve PFSG weights as HTK acoustic scores
htkScoreMapping = mapHTKacoustic;
}
if (lmFile) {
// remove pause and NULL nodes prior to LM application,
// so each word has a proper predecessor
// (This can be skipped for unigram LMs, unless we're explicitly
// asked to eliminate pauses. It also not necessary for the
// new general LM expansion algorithms.
if (noPause || compactPause || (oldExpansion && order >= 2)) {
if (!noPause) {
lat.removeAllXNodes(vocab.pauseIndex());
}
if (!noNulls) {
lat.removeAllXNodes(Vocab_None);
}
recoverPauses = true;
/*
* attempt further reduction on pause-less lattices
*/
if (preReductionIter) {
cerr << "reducing pause-less lattices (overlap ratio = "
<< overlapRatio << ")\n";
File f(stderr);
if (overlapRatio == 0.0) {
lat.simplePackBigramLattice(preReductionIter);
} else {
lat.approxRedBigramLattice(preReductionIter, overlapBase,
overlapRatio);
}
}
}
Boolean status;
if (oldExpansion) {
switch (order) {
case 1:
case 2:
// unigram/bigram weight replacement
status = lat.replaceWeights(lm);
break;
default:
// trigram expansion
if (compactExpansion) {
status = lat.expandToCompactTrigram(*(Ngram *)&lm, maxNodes);
} else {
status = lat.expandToTrigram(lm, maxNodes);
}
}
} else {
if (noBackoffWeights) {
// hack to ignore backoff weights in LM expansion
lat.noBackoffWeights = true;
}
// general LM expansion
status = lat.expandToLM(lm, maxNodes, compactExpansion);
}
if (!status) {
cerr << "WARNING: expansion of lattice " << inLat << " failed\n";
#ifndef NO_TIMEOUT
alarm(0);
#endif
return;
}
/*
* after LM application need to make sure that probs will fit in
* bytelog range
*/
lat.limitIntlogs = true;
/*
* Replace old HTK language scores in output with new LM scores
*/
htkScoreMapping = mapHTKlanguage;
}
if (!noPause && recoverPauses || insertPause) {
if (compactPause) {
lat.recoverCompactPauses(loopPause, insertPause);
} else {
lat.recoverPauses(loopPause, insertPause);
}
}
/*
* attempt further reduction on output lattices after LM expansion
*/
if (postReductionIter) {
cerr << "reducing output lattices (overlap ratio = "
<< overlapRatio << ")\n";
if (overlapRatio == 0.0) {
lat.simplePackBigramLattice(postReductionIter);
} else {
lat.approxRedBigramLattice(postReductionIter, overlapBase,
overlapRatio);
}
}
if (splitMultiwordsAfterLM) {
/*
* Split multiwords after LM application
* We create an empty LM so that none of the multiwords appear with
* non-zero probability
*/
Ngram dummy(vocab);
lat.splitMultiwordNodes((MultiwordVocab &)vocab, dummy);
}
if (collapseSameWords) {
lat.collapseSameWordNodes(noiseWords);
}
/*
* perform lattice algebra
*/
Lattice *finalLat,
resultLat(vocab, idFromFilename(outLat), ignoreWords);
if (operation && lattice2 != 0) {
resultLat.debugme(debug);
if (!strcmp(operation, LATTICE_OR)) {
resultLat.latticeOr(lat, *lattice2);
} else if (!strcmp(operation, LATTICE_CONCATE)) {
resultLat.latticeCat(lat, *lattice2, interSegmentTime);
} else {
cerr << "unknown operation (" << operation << ")\n";
cerr << "allowed operations are " << LATTICE_OR
<< " and " << LATTICE_CONCATE << endl;
exit(2);
}
finalLat = &resultLat;
} else {
finalLat = ⪫
}
#ifndef NO_TIMEOUT
// kill alarm timer -- we're done
alarm(0);
#endif
if (viterbiDecode) {
if (outputCTM) {
NBestWordInfo *bestwords = new NBestWordInfo[maxWordsPerLine + 1];
assert(bestwords != 0);
LogP prob =
finalLat->bestWords(bestwords, maxWordsPerLine, noiseWords);
if (prob != LogP_Zero || bestwords[0].word != Vocab_None) {
printCTM(vocab, bestwords, finalLat->getName());
}
delete [] bestwords;
} else {
VocabIndex bestwords[maxWordsPerLine + 1];
LogP prob =
finalLat->bestWords(bestwords, maxWordsPerLine, noiseWords);
if (prob != LogP_Zero || bestwords[0] != Vocab_None) {
cout << finalLat->getName() << " "
<< (finalLat->vocab.use(), bestwords) << endl;
}
}
}
if (nbestDecode > 0) {
// output top N hyps
nbestOut.openFiles(finalLat->getName());
if (nbestViterbi) {
finalLat->computeNBestViterbi(nbestDecode, nbestOut, noiseWords,
useMultiwordLM ? multiChar : 0);
} else {
finalLat->computeNBest(nbestDecode, nbestOut, noiseWords,
useMultiwordLM ? multiChar : 0,
nbestMaxHyps, nbestDuplicates);
}
nbestOut.closeFiles();
}
if (pplFile) {
if (!noPause) {
// treat pauses as regular words for LatticeLM computation
finalLat->ignoreVocab.remove(finalLat->vocab.pauseIndex());
}
LatticeLM latlm(*finalLat);
latlm.debugme(debug);
File file(pplFile, "r");
TextStats stats;
/*
* Send perplexity info to stdout
*/
latlm.dout(cout);
latlm.pplFile(file, stats);
latlm.dout(cerr);
cout << "file " << pplFile << ": " << stats;
}
if (outLattice || outLatticeDir) {
File file(outLat, "w");
if (writeHTK) {
finalLat->writeHTK(file, htkScoreMapping, computePosteriors);
} else if (writeInternal) {
finalLat->writePFSG(file);
} else {
finalLat->writeCompactPFSG(file);
}
}
}
LM *
makeMixLM(const char *filename, Vocab &vocab, SubVocab *classVocab,
unsigned order, LM *oldLM, double lambda1, double lambda2)
{
File file(filename, "r");
/*
* create factored LM or class-LM if specified, otherwise a regular ngram
*/
Ngram *lm = factored ?
new ProductNgram((ProductVocab &)vocab, order) :
(classVocab != 0) ?
(simpleClasses ?
new SimpleClassNgram(vocab, *classVocab, order) :
new ClassNgram(vocab, *classVocab, order)) :
new Ngram(vocab, order);
assert(lm != 0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -