📄 lattice-tool.cc
字号:
/*
* lattice-tool --
* Manipulate word lattices
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 1997-2006 SRI International. All Rights Reserved.";
static char RcsId[] = "@(#)$Id: lattice-tool.cc,v 1.121 2006/01/09 19:16:04 stolcke Exp $";
#endif
#include <stdio.h>
#include <math.h>
#include <locale.h>
#include <errno.h>
#include <iostream>
using namespace std;
#ifndef _MSC_VER
#include <sys/time.h>
#include <unistd.h>
#endif
#include <signal.h>
#include <setjmp.h>
#ifndef SIGALRM
#define NO_TIMEOUT
#endif
#include "option.h"
#include "version.h"
#include "Vocab.h"
#include "MultiwordVocab.h"
#include "ProductVocab.h"
#include "Lattice.h"
#include "MultiwordLM.h"
#include "Ngram.h"
#include "ClassNgram.h"
#include "SimpleClassNgram.h"
#include "ProductNgram.h"
#include "BayesMix.h"
#include "RefList.h"
#include "LatticeLM.h"
#include "WordMesh.h"
#include "zio.h"
#include "mkdir.h"
#include "NgramStats.cc"
#include "Trie.cc"
#define DebugPrintFunctionality 1 // same as in Lattice.cc
static int version = 0;
static int compactExpansion = 0;
static int oldExpansion = 0;
static int base = 0;
static int density = 0;
static int connectivity = 0;
static int compactPause = 0;
static int noBackoffWeights = 0;
static int nodeEntropy = 0;
static int viterbiDecode = 0;
static int nbestDecode = 0;
static int nbestViterbi = 0;
static unsigned nbestDuplicates = 0;
static int nbestMaxHyps = 0;
static int outputCTM = 0;
static int computePosteriors = 0;
static char *writePosteriors = 0;
static char *writePosteriorsDir = 0;
static char *writeMesh = 0;
static char *writeMeshDir = 0;
static char *writeNgrams = 0;
static double minCount = 0.0;
static int acousticMesh = 0;
static int posteriorDecode = 0;
static double posteriorScale = 8.0;
static double posteriorPruneThreshold = 0.0;
static double densityPruneThreshold = 0.0;
static unsigned nodesPruneThreshold = 0;
static int fastPrune = 0;
static int reduceBeforePruning = 0;
static int noPause = 0;
static int insertPause = 0;
static int noNulls = 0;
static int loopPause = 0;
static int overwrite = 0;
static int simpleReduction = 0;
static int overlapBase = 0;
static double overlapRatio = 0.0;
static int dag = 0;
static char *dictFile = 0;
static int dictAlign = 0;
static int intlogs = 0;
static char *lmFile = 0;
static char *vocabFile = 0;
static char *noneventFile = 0;
static char *hiddenVocabFile = 0;
static int limitVocab = 0;
static int useUnk = 0;
static char *mapUnknown = 0;
static char *classesFile = 0;
static int simpleClasses = 0;
static int factored = 0;
static int keepNullFactors = 1;
static char *mixFile = 0;
static char *mixFile2 = 0;
static char *mixFile3 = 0;
static char *mixFile4 = 0;
static char *mixFile5 = 0;
static char *mixFile6 = 0;
static char *mixFile7 = 0;
static char *mixFile8 = 0;
static char *mixFile9 = 0;
static double mixLambda = 0.5;
static double mixLambda2 = 0.0;
static double mixLambda3 = 0.0;
static double mixLambda4 = 0.0;
static double mixLambda5 = 0.0;
static double mixLambda6 = 0.0;
static double mixLambda7 = 0.0;
static double mixLambda8 = 0.0;
static double mixLambda9 = 0.0;
static char *inLattice = 0;
static char *inLattice2 = 0;
static char *inLatticeList = 0;
static char *outLattice = 0;
static char *outLatticeDir = 0;
static char *refFile = 0;
static char *refList = 0;
static char *writeRefs = 0;
static double addRefsProb = 0.0;
static int keepPause = 0;
static char *noiseVocabFile = 0;
static char *ignoreVocabFile = 0;
static char *indexName = 0;
static char *operation = 0;
static double interSegmentTime = 0.0;
static int order = 3;
static int simpleReductionIter = 0;
static int preReductionIter = 0;
static int postReductionIter = 0;
static int collapseSameWords = 0;
static int debug = 0;
static int readHTK = 0;
static int writeHTK = 0;
static int useHTKnulls = 1;
static int readMesh = 0;
static int writeInternal = 0;
static int maxTime = 0;
static unsigned maxNodes = 0;
static int splitMultiwords = 0;
static int splitMultiwordsAfterLM = 0;
static int toLower = 0;
static int useMultiwordLM = 0;
static const char *multiChar = MultiwordSeparator;
static char *pplFile = 0;
static HTKHeader htkparms(HTK_undef_float, HTK_undef_float, HTK_undef_float,
HTK_undef_float, HTK_undef_float, HTK_undef_float,
HTK_undef_float, HTK_undef_float, HTK_undef_float,
HTK_undef_float, HTK_undef_float, HTK_undef_float,
HTK_undef_float, HTK_undef_float, HTK_undef_float);
static NBestOptions nbestOut(0,0,0,0,0,0,0,0,0,0,0,0,0,0);
static Option options[] = {
{ OPT_TRUE, "version", &version, "print version information" },
{ OPT_TRUE, "in-lattice-dag", &dag, "input lattices are defined in a directed acyclic graph" },
{ OPT_STRING, "in-lattice", &inLattice, "input lattice for lattice operation including expansion or bigram weight substitution" },
{ OPT_STRING, "in-lattice2", &inLattice2, "a second input lattice for lattice operation" },
{ OPT_STRING, "in-lattice-list", &inLatticeList, "input lattice list for expansion or bigram weight substitution" },
{ OPT_STRING, "out-lattice", &outLattice, "resulting output lattice" },
{ OPT_STRING, "out-lattice-dir", &outLatticeDir, "resulting output lattice dir" },
{ OPT_STRING, "dictionary", &dictFile, "pronunciation dictionary" },
{ OPT_TRUE, "dictionary-align", &dictAlign, "use pronunciation dictionary in alignment for posterior decoding" },
{ OPT_TRUE, "intlogs", &intlogs, "dictionary uses intlog probabilities" },
{ OPT_STRING, "lm", &lmFile, "LM used for expansion or weight substitution" },
{ OPT_STRING, "vocab", &vocabFile, "vocab file" },
{ OPT_STRING, "nonevents", &noneventFile, "non-event vocabulary" },
{ OPT_STRING, "hidden-vocab", &hiddenVocabFile, "subvocabulary to keep separate in lattice alignment" },
{ OPT_TRUE, "limit-vocab", &limitVocab, "limit LM reading to specified vocabulary" },
{ OPT_TRUE, "unk", &useUnk, "map unknown words to <unk>" },
{ OPT_STRING, "map-unk", &mapUnknown, "word to map unknown words to" },
{ OPT_STRING, "classes", &classesFile, "class definitions" },
{ OPT_TRUE, "simple-classes", &simpleClasses, "use unique class model" },
{ OPT_TRUE, "factored", &factored, "use a factored LM" },
{ OPT_FALSE, "no-null-factors", &keepNullFactors, "remove <NULL> in factored LM" },
{ OPT_STRING, "mix-lm", &mixFile, "LM to mix in" },
{ OPT_FLOAT, "lambda", &mixLambda, "mixture weight for -lm" },
{ OPT_STRING, "mix-lm2", &mixFile2, "second LM to mix in" },
{ OPT_FLOAT, "mix-lambda2", &mixLambda2, "mixture weight for -mix-lm2" },
{ OPT_STRING, "mix-lm3", &mixFile3, "third LM to mix in" },
{ OPT_FLOAT, "mix-lambda3", &mixLambda3, "mixture weight for -mix-lm3" },
{ OPT_STRING, "mix-lm4", &mixFile4, "fourth LM to mix in" },
{ OPT_FLOAT, "mix-lambda4", &mixLambda4, "mixture weight for -mix-lm4" },
{ OPT_STRING, "mix-lm5", &mixFile5, "fifth LM to mix in" },
{ OPT_FLOAT, "mix-lambda5", &mixLambda5, "mixture weight for -mix-lm5" },
{ OPT_STRING, "mix-lm6", &mixFile6, "sixth LM to mix in" },
{ OPT_FLOAT, "mix-lambda6", &mixLambda6, "mixture weight for -mix-lm6" },
{ OPT_STRING, "mix-lm7", &mixFile7, "seventh LM to mix in" },
{ OPT_FLOAT, "mix-lambda7", &mixLambda7, "mixture weight for -mix-lm7" },
{ OPT_STRING, "mix-lm8", &mixFile8, "eighth LM to mix in" },
{ OPT_FLOAT, "mix-lambda8", &mixLambda8, "mixture weight for -mix-lm8" },
{ OPT_STRING, "mix-lm9", &mixFile9, "ninth LM to mix in" },
{ OPT_FLOAT, "mix-lambda9", &mixLambda9, "mixture weight for -mix-lm9" },
{ OPT_INT, "order", &order, "ngram order used for expansion or bigram weight substitution" },
{ OPT_STRING, "ref-list", &refList, "reference file used for computing WER (lines starting with utterance id)" },
{ OPT_STRING, "ref-file", &refFile, "reference file used for computing WER (utterances in same order in lattice list)" },
{ OPT_FLOAT, "add-refs", &addRefsProb, "add reference words to lattice with given probability" },
{ OPT_STRING, "write-refs", &writeRefs, "output references to file (for validation)" },
{ OPT_STRING, "ppl", &pplFile, "compute perplexity according to lattice" },
{ OPT_TRUE, "keep-pause", &keepPause, "treat pauses as regular word for WER computation and decoding" },
{ OPT_STRING, "noise-vocab", &noiseVocabFile, "noise vocabulary to ignore in WER computation and decoding" },
{ OPT_STRING, "ignore-vocab", &ignoreVocabFile, "pause-like words to ignore in lattice operations" },
{ OPT_TRUE, "overwrite", &overwrite, "overwrite existing output lattice dir" },
{ OPT_TRUE, "reduce", &simpleReduction, "reduce bigram lattice(s) using the simple algorithm" },
{ OPT_INT, "reduce-iterate", &simpleReductionIter, "reduce input lattices iteratively" },
{ OPT_INT, "pre-reduce-iterate", &preReductionIter, "reduce pause-less lattices iteratively" },
{ OPT_INT, "post-reduce-iterate", &postReductionIter, "reduce output lattices iteratively" },
{ OPT_TRUE, "reduce-before-pruning", &reduceBeforePruning, "apply posterior pruning after lattice reduction" },
{ OPT_FLOAT, "overlap-ratio", &overlapRatio, "if two incoming/outgoing node sets of two given nodes with the same lable overlap beyong this ratio, they are merged" },
{ OPT_INT, "overlap-base", &overlapBase, "use the smaller (0) incoming/outgoing node set to compute overlap ratio, or the larger (1) set to compute the overlap ratio" },
{ OPT_TRUE, "compact-expansion", &compactExpansion, "use compact LM expansion algorithm (using backoff nodes)" },
{ OPT_TRUE, "topo-compact-expansion", &compactExpansion, "(same as above, for backward compatibility)" },
{ OPT_TRUE, "old-expansion", &oldExpansion, "use old unigram/bigram/trigram expansion algorithms" },
{ OPT_TRUE, "no-backoff-weights", &noBackoffWeights, "suppress backoff weights in lattice exansion (a hack)" },
{ OPT_TRUE, "multiwords", &useMultiwordLM, "use multiword wrapper LM" },
{ OPT_TRUE, "split-multiwords", &splitMultiwords, "split multiwords into separate nodes" },
{ OPT_TRUE, "split-multiwords-after-lm", &splitMultiwordsAfterLM, "split multiwords after LM expansion" },
{ OPT_STRING, "multi-char", &multiChar, "multiword component delimiter" },
{ OPT_TRUE, "tolower", &toLower, "map vocabulary to lower case" },
{ OPT_STRING, "operation", &operation, "conventional lattice operations, including \"concatenate\" and \"or\"" },
{ OPT_FLOAT, "inter-segment-time", &interSegmentTime, "pause length to insert between concatenated lattices" },
{ OPT_TRUE, "density", &density, "compute densities of lattices" },
{ OPT_TRUE, "connectivity", &connectivity, "check the connectivity of given lattices" },
{ OPT_TRUE, "compute-node-entropy", &nodeEntropy, "compute the node entropy of given lattices" },
{ OPT_TRUE, "compute-posteriors", &computePosteriors, "compute the node posteriors of given lattices" },
{ OPT_STRING, "write-posteriors", &writePosteriors, "write posterior lattice format to this file" },
{ OPT_STRING, "write-posteriors-dir", &writePosteriorsDir, "write posterior lattices to this directory" },
{ OPT_STRING, "write-mesh", &writeMesh, "write posterior mesh (sausage) to this file" },
{ OPT_STRING, "write-mesh-dir", &writeMeshDir, "write posterior meshes to this directory" },
{ OPT_TRUE, "acoustic-mesh", &acousticMesh, "record acoustic information in word meshes" },
{ OPT_TRUE, "posterior-decode", &posteriorDecode, "decode best words from posterior mesh" },
{ OPT_FLOAT, "posterior-prune", &posteriorPruneThreshold, "posterior node pruning threshold" },
{ OPT_FLOAT, "posterior-scale", &posteriorScale, "posterior scaling factor" },
{ OPT_FLOAT, "density-prune", &densityPruneThreshold, "max lattice density for pruning" },
{ OPT_UINT, "nodes-prune", &nodesPruneThreshold, "max number of real nodes for pruning" },
{ OPT_TRUE, "fast-prune", &fastPrune, "fast posterior pruning (no posterior recomputation)" },
{ OPT_TRUE, "viterbi-decode", &viterbiDecode, "output words on highest probability path" },
{ OPT_INT, "nbest-decode", &nbestDecode, "number of nbest hyps to generate from lattice" },
{ OPT_INT, "nbest-max-stack", &nbestMaxHyps, "max stack size for nbest generation" },
{ OPT_TRUE, "nbest-viterbi", &nbestViterbi, "use Viterbi algorithm to generate nbest (instead of A-star)" },
{ OPT_UINT, "nbest-duplicates", &nbestDuplicates, "number of hyps to output per unique word string (words in -noise-words may or may not differ)" },
{ OPT_TRUE, "output-ctm", &outputCTM, "output decoded words in CTM format" },
{ OPT_STRING, "write-ngrams", &writeNgrams, "write expected ngram counts to file" },
{ OPT_FLOAT, "min-count", &minCount, "prune ngram counts below this value" },
{ OPT_STRING, "index-name", &indexName, "print a list of node index-name pairs to this file" },
{ OPT_TRUE, "no-pause", &noPause, "output lattices with no pauses" },
{ OPT_TRUE, "insert-pause", &insertPause, "insert optional pauses" },
{ OPT_TRUE, "no-nulls", &noNulls, "eliminate null nodes" },
{ OPT_TRUE, "compact-pause", &compactPause, "output lattices with compact pauses" },
{ OPT_TRUE, "loop-pause", &loopPause, "output lattices with loop pauses" },
{ OPT_TRUE, "collapse-same-words", &collapseSameWords, "collapse nodes with same words" },
{ OPT_INT, "debug", &debug, "debug level" },
{ OPT_TRUE, "read-htk", &readHTK, "read input lattices in HTK format" },
{ OPT_TRUE, "write-htk", &writeHTK, "write output lattices in HTK format" },
{ OPT_FALSE, "no-htk-nulls", &useHTKnulls, "don't use null nodes to encode HTK lattices" },
{ OPT_TRUE, "read-mesh", &readMesh, "read input lattices in word mesh format" },
{ OPT_TRUE, "write-internal", &writeInternal, "write out internal node numbering" },
#ifndef NO_TIMEOUT
{ OPT_UINT, "max-time", &maxTime, "maximum no. of seconds allowed per lattice" },
#endif
{ OPT_UINT, "max-nodes", &maxNodes, "maximum no. of nodes allowed in expanding lattice" },
{ OPT_FLOAT, "htk-acscale", &htkparms.acscale, "HTK acscale override" },
{ OPT_FLOAT, "htk-lmscale", &htkparms.lmscale, "HTK lmscale override" },
{ OPT_FLOAT, "htk-ngscale", &htkparms.ngscale, "HTK ngscale override" },
{ OPT_FLOAT, "htk-prscale", &htkparms.prscale, "HTK prscale override" },
{ OPT_FLOAT, "htk-duscale", &htkparms.duscale, "HTK duscale override" },
{ OPT_FLOAT, "htk-wdpenalty", &htkparms.wdpenalty, "HTK wdpenalty override" },
{ OPT_FLOAT, "htk-x1scale", &htkparms.x1scale, "HTK xscore1 override" },
{ OPT_FLOAT, "htk-x2scale", &htkparms.x2scale, "HTK xscore2 override" },
{ OPT_FLOAT, "htk-x3scale", &htkparms.x3scale, "HTK xscore3 override" },
{ OPT_FLOAT, "htk-x4scale", &htkparms.x4scale, "HTK xscore4 override" },
{ OPT_FLOAT, "htk-x5scale", &htkparms.x5scale, "HTK xscore5 override" },
{ OPT_FLOAT, "htk-x6scale", &htkparms.x6scale, "HTK xscore6 override" },
{ OPT_FLOAT, "htk-x7scale", &htkparms.x7scale, "HTK xscore7 override" },
{ OPT_FLOAT, "htk-x8scale", &htkparms.x8scale, "HTK xscore8 override" },
{ OPT_FLOAT, "htk-x9scale", &htkparms.x9scale, "HTK xscore9 override" },
{ OPT_FLOAT, "htk-logbase", &htkparms.logbase, "base for HTK log scores" },
{ OPT_TRUE, "htk-words-on-nodes", &htkparms.wordsOnNodes, "HTK lattices output with words on nodes" },
{ OPT_TRUE, "htk-scores-on-nodes", &htkparms.scoresOnNodes, "HTK lattices output with acoustic scores on nodes" },
{ OPT_TRUE, "htk-quotes", &htkparms.useQuotes, "use quotes in HTK lattices" },
{ OPT_STRING, "out-nbest-dir", &nbestOut.nbestOutDir, "resulting nbest list dir" },
{ OPT_STRING, "out-nbest-dir-ngram", &nbestOut.nbestOutDirNgram, "resulting nbest list ngram score dir" },
{ OPT_STRING, "out-nbest-dir-pron", &nbestOut.nbestOutDirPron, "resulting nbest list pron score dir" },
{ OPT_STRING, "out-nbest-dir-dur", &nbestOut.nbestOutDirDur, "resulting nbest list duration score dir" },
{ OPT_STRING, "out-nbest-dir-xscore1", &nbestOut.nbestOutDirXscore1, "resulting nbest list xscore1 score dir" },
{ OPT_STRING, "out-nbest-dir-xscore2", &nbestOut.nbestOutDirXscore2, "resulting nbest list xscore2 score dir" },
{ OPT_STRING, "out-nbest-dir-xscore3", &nbestOut.nbestOutDirXscore3, "resulting nbest list xscore3 score dir" },
{ OPT_STRING, "out-nbest-dir-xscore4", &nbestOut.nbestOutDirXscore4, "resulting nbest list xscore4 score dir" },
{ OPT_STRING, "out-nbest-dir-xscore5", &nbestOut.nbestOutDirXscore5, "resulting nbest list xscore5 score dir" },
{ OPT_STRING, "out-nbest-dir-xscore6", &nbestOut.nbestOutDirXscore6, "resulting nbest list xscore6 score dir" },
{ OPT_STRING, "out-nbest-dir-xscore7", &nbestOut.nbestOutDirXscore7, "resulting nbest list xscore7 score dir" },
{ OPT_STRING, "out-nbest-dir-xscore8", &nbestOut.nbestOutDirXscore8, "resulting nbest list xscore8 score dir" },
{ OPT_STRING, "out-nbest-dir-xscore9", &nbestOut.nbestOutDirXscore9, "resulting nbest list xscore9 score dir" },
{ OPT_STRING, "out-nbest-dir-rttm", &nbestOut.nbestOutDirRttm, "resulting nbest hyps output in rttm format (with extra preceding column that gives hyp number)" }
};
/*
* Output hypotheses in CTM format
*/
static void
printCTM(Vocab &vocab, const NBestWordInfo *winfo, const char *name)
{
for (unsigned i = 0; winfo[i].word != Vocab_None; i ++) {
cout << name << " 1 ";
if (winfo[i].valid()) {
cout << winfo[i].start << " " << winfo[i].duration;
} else {
cout << "? ?";
}
cout << " " << vocab.getWord(winfo[i].word)
<< " " << winfo[i].wordPosterior << endl;
}
}
#ifndef NO_TIMEOUT
/*
* deal with different signal hander types
*/
#ifndef _sigargs
#define _sigargs int
#endif
typedef void (*sighandler_t)(_sigargs);
static jmp_buf thisContext;
void catchAlarm(int signal)
{
longjmp(thisContext, 1);
}
#endif /* !NO_TIMEOUT */
void processLattice(char *inLat, char *outLat, Lattice *lattice2,
NgramCounts<FloatCount> &ngramCounts,
LM &lm, Vocab &vocab, SubVocab &hiddenVocab,
VocabMultiMap &dictionary,
SubVocab &ignoreWords, SubVocab &noiseWords,
VocabIndex *refIndices = 0)
{
Lattice lat(vocab, idFromFilename(inLat), ignoreWords);
lat.debugme(debug);
if (useUnk) lat.useUnk = true;
{
File file(inLat, "r");
Boolean status;
if (readHTK) {
htkparms.amscale = posteriorScale;
status = lat.readHTK(file, &htkparms, useHTKnulls);
} else if (readMesh) {
lat.setHTKHeader(htkparms);
status = lat.readMesh(file);
} else {
lat.setHTKHeader(htkparms);
status = lat.readPFSGs(file);
}
if (!status) {
cerr << "error reading " << inLat << endl;
return;
}
}
#ifndef NO_TIMEOUT
if (maxTime) {
alarm(maxTime);
if (setjmp(thisContext)) {
cerr << "WARNING: processing lattice " << inLat
<< " aborted after " << maxTime << " seconds\n";
return;
}
signal(SIGALRM, (sighandler_t)catchAlarm);
}
#endif /* !NO_TIMEOUT */
if (dictFile && !dictAlign) {
// pronunciation scoring (only useful for HTK lattices)
// do this BEFORE splitting multiwords since pronunciations apply to
// the original multiwords
if (!lat.scorePronunciations(dictionary, intlogs)) {
cerr << "WARNING: error scoring pronunciations for " << inLat
<< endl;
}
}
if (splitMultiwords) {
lat.splitMultiwordNodes((MultiwordVocab &)vocab, lm);
}
if (posteriorPruneThreshold > 0 && !reduceBeforePruning) {
if (!lat.prunePosteriors(posteriorPruneThreshold, posteriorScale,
densityPruneThreshold, nodesPruneThreshold,
fastPrune))
{
cerr << "WARNING: posterior pruning of lattice " << inLat
<< " failed\n";
#ifndef NO_TIMEOUT
alarm(0);
#endif
return;
}
}
if (writePosteriors) {
File file(writePosteriors, "w");
lat.writePosteriors(file, posteriorScale);
}
if (writePosteriorsDir) {
makeArray(char, outfile,
strlen(writePosteriorsDir) + 1 +
strlen(lat.getName()) + sizeof(GZIP_SUFFIX));
sprintf(outfile, "%s/%s%s", writePosteriorsDir,
lat.getName(), GZIP_SUFFIX);
File file(outfile, "w");
lat.writePosteriors(file, posteriorScale);
}
if (writeMesh || writeMeshDir || posteriorDecode) {
VocabDistance *wordDistance = 0;
/*
* Use word distance constrained by hidden-vocabulary membership
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -