⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nbest-lattice.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 2 页
字号:
	if (debug >= DEBUG_ERRORS) {
	    if (sentid) cerr << sentid << " ";
	    cerr << "err " << errors
		 << " sub " << subs
		 << " ins " << inss
		 << " del " << dels << endl;
	}
    }
}

void
computeWordErrors(const char *sentid, NBestList &nbestList,
						const VocabIndex *reference)
{
    unsigned numHyps = nbestList.numHyps();
    unsigned howmany = (maxRescore > 0) ? maxRescore : numHyps;
    if (howmany > numHyps) {
	howmany = numHyps;
    }

    for (unsigned i = 0; i < howmany; i ++) {
	unsigned sub, ins, del;
	makeArray(WordAlignType, alignment,
		  Vocab::length(nbestList.getHyp(i).words) +
		  Vocab::length(reference) + 1);

	unsigned numErrors = wordError(reference, nbestList.getHyp(i).words,
						    sub, ins, del, alignment);

	if (sentid) cout << sentid << ":" << i << " ";
	cout << numErrors;
	for (unsigned j = 0; alignment[j] != END_ALIGN; j ++) {
	    cout << " " << ((alignment[j] == INS_ALIGN) ? "INS" :
	    			(alignment[j] == DEL_ALIGN) ? "DEL" :
				(alignment[j] == SUB_ALIGN) ? "SUB" : "CORR");
	}
	cout << endl;
    }
}

/*
 * Align a list of lattices
 *	a list of lines containing lattice filenames, followed by optional
 *	weights is read from file
 */
void
alignLattices(MultiAlign &lat, File &file)
{
    char *line;
    while (line = file.getline()) {
	char *lname = strtok(line, wordSeparators);
	if (!lname) continue;

	double weight = 1.0;
	char *wstring = strtok(0, wordSeparators);
	if (wstring) {
	    sscanf(wstring, "%lf", &weight);
	}

	File lFile(lname, "r");
	MultiAlign *newLat;

	if (useMesh) {
	    newLat = new WordMesh(lat.vocab);
	} else {
	    newLat = new WordLattice(lat.vocab);
	}
	assert(newLat != 0);

	if (!newLat->read(lFile)) {
	    cerr << "format error in lattice file\n";
	    continue;
	}

	lat.alignAlignment(*newLat, weight);

	delete newLat;
    }
}

/*
 * Process a single N-best list
 */
void
processNbest(NullLM &nullLM, const char *sentid, const char *nbestFile,
			VocabMultiMap &dictionary, SubVocab &hiddenVocab,
			const VocabIndex *reference,
			const char *outLattice, const char *outNbest)
{
    Vocab &vocab = nullLM.vocab;
    MultiAlign *lat;
    DictionaryAbsDistance dictDistance(vocab, dictionary);
    SubVocabDistance subvocabDistance(vocab, hiddenVocab);

    const char *latticeName = 0;

    if (sentid != 0) {
	latticeName = sentid;
    } else if (nbestFile != 0) {
	latticeName = idFromFilename(nbestFile);
    }
    
    if (useMesh) {
	if (dictFile) {
	    lat = new WordMesh(vocab, latticeName, &dictDistance);
	} else if (hiddenVocabFile) {
	    lat = new WordMesh(vocab, latticeName, &subvocabDistance);
	} else {
	    lat = new WordMesh(vocab, latticeName);
	}
    } else {
	lat = new WordLattice(vocab, latticeName);
    }
    assert(lat != 0);

    /*
     * Read preexisting lattice if specified
     */
    if (readFile) {
	File file(readFile, "r");

	if (!lat->read(file)) {
	    cerr << "format error in lattice file\n";
	    exit(1);
	}
    }

    /*
     * Read list of other lattices, and merge with main lattice
     */
    if (latticeFiles) {
	File file(latticeFiles, "r");

	alignLattices(*lat, file);
    }

    /*
     * Process nbest list
     */
    if (nbestFile) {
	NBestList nbestList(vocab, maxNbest, multiwords,
						nbestBacktrace || outputCTM);
	nbestList.debugme(debug);

	{
	    File input(nbestFile, "r");

	    if (!nbestList.read(input)) {
		cerr << "format error in nbest list\n";
		exit(1);
	    }
	}

	/*
	 * Remove pauses and noise from nbest hyps since these would
	 * confuse the inter-hyp alignments.
	 */
	if (!keepNoise) {
	    nbestList.removeNoise(nullLM);
	}

	/*
	 * Compute nbest error relative to reference
	 */
	if (reference && computeNbestError) {
	    unsigned sub, ins, del;

	    unsigned err = nbestList.wordError(reference, sub, ins, del);
	    if (sentid) cout << sentid << " ";
	    cout << err
		 << " sub " << sub 
		 << " ins " << ins
		 << " del " << del
		 << " words " << Vocab::length(reference) << endl;
	} else if (werRescore) {
	    /*
	     * Word error rescoring
	     */
	    wordErrorRescore(sentid, nbestList);
	} else if (!noRescore) {
	    /*
	     * Lattice building (and rescoring)
	     */
	    latticeRescore(sentid, *lat, nbestList, reference);
	}

	if (reference && dumpErrors) {
	    computeWordErrors(sentid, nbestList, reference);
	}

	if (outNbest) {
	    File output(outNbest, "w");

	    nbestList.write(output, writeDecipherNbest);
	}
    }
    
    /*
     * Compute word error of lattice relative to reference hyps
     */
    if (reference && computeLatticeError) {
	unsigned sub, ins, del;
	unsigned err = lat->wordError(reference, sub, ins, del);

	if (sentid) cout << sentid << " ";
	cout << err
	     << " sub " << sub 
	     << " ins " << ins
	     << " del " << del
	     << " words " << Vocab::length(reference) << endl;
    }

    /*
     * If reference words are known, record them in alignment
     */
    if (reference && !computeNbestError) {
	lat->alignReference(reference);
    }
    
    if (outLattice) {
	File file(outLattice, "w");

	lat->write(file);
    }

    delete lat;
}

int
main (int argc, char *argv[])
{
    setlocale(LC_CTYPE, "");
    setlocale(LC_COLLATE, "");

    Opt_Parse(argc, argv, options, Opt_Number(options), 0);

    if (version) {
	printVersion(RcsId);
	exit(0);
    }

    if (primeWith1best || primeWithRefs) {
	primeLattice = 1;
    }

    Vocab vocab;
    NullLM nullLM(vocab);

    if (vocabFile) {
	File file(vocabFile, "r");
	vocab.read(file);
    }

    vocab.toLower() = toLower ? true : false;

    /*
     * Skip noise tags in scoring
     */
    if (noiseVocabFile) {
	File file(noiseVocabFile, "r");
	nullLM.noiseVocab.read(file);
    }
    if (noiseTag) {				/* backward compatibility */
	nullLM.noiseVocab.addWord(noiseTag);
    }

    /*
     * Posterior scaling:  if not specified (= 0.0) use LMW for
     * backward compatibility.
     */
    if (posteriorScale == 0.0) {
	posteriorScale = (rescoreLMW == 0.0) ? 1.0 : rescoreLMW;
    }

    /*
     * Default weights for posterior computation are same as for rescoring
     */
    if (posteriorLMW == undefinedWeight) {
	posteriorLMW = rescoreLMW;
    }
    if (posteriorWTW == undefinedWeight) {
	posteriorWTW = rescoreWTW;
    }

    Vocab dictVocab;
    VocabMultiMap dictionary(vocab, dictVocab);

    /* 
     * Read optional dictionary to help in word alignment
     */
    if (dictFile) {
	File file(dictFile, "r");

	if (!dictionary.read(file)) {
	    cerr << "format error in dictionary file\n";
	    exit(1);
	}
    }

    /*
     * Optionally read a subvocabulary that is to be kept separate from
     * regular words during alignment
     */
    SubVocab hiddenVocab(vocab);
    if (hiddenVocabFile) {
	File file(hiddenVocabFile, "r");

	hiddenVocab.read(file);
    }

    /*
     * Read reference words
     */
    VocabIndex *reference = 0;

    if (refString) {
	reference = new VocabIndex[maxWordsPerLine + 1];
	assert(reference != 0);

	VocabString refWords[maxWordsPerLine + 1];
	unsigned numWords =
		    Vocab::parseWords(refString, refWords, maxWordsPerLine);
        if (numWords == maxWordsPerLine) {
	    cerr << "more than " << maxWordsPerLine << " reference words\n";
	    exit(1);
	}

	vocab.addWords(refWords, reference, maxWordsPerLine + 1);
    } else if (rescoreFile || !nbestFiles) {
	if (dumpErrors || computeNbestError || computeLatticeError) {
	    cerr << "cannot compute errors without reference\n";
	    exit(1);
	}
    }

    /*
     * Process single nbest file
     */
    if (rescoreFile) {
	processNbest(nullLM, 0, rescoreFile, dictionary, hiddenVocab, reference,
						writeFile, writeNbestFile);
    } else if (!nbestFiles) {
	/*
	 * If neither -nbest nor -nbest-files was specified
	 * do lattice processing only.
	 */
	processNbest(nullLM, 0, 0, dictionary, hiddenVocab, reference,
						writeFile, writeNbestFile);
    }

    /*
     * Read list of nbest filenames
     */
    if (nbestFiles) {
	RefList refs(vocab);

	if (refFile) {
	    File file(refFile, "r");
	    refs.read(file, true);	 // add reference words to vocabulary
	} else {
	    if (dumpErrors || computeNbestError || computeLatticeError) {
		cerr << "cannot compute errors without reference\n";
		exit(1);
	    }
	}
		

	File file(nbestFiles, "r");
	char *line;
	while (line = file.getline()) {
	    char *fname = strtok(line, wordSeparators);
	    if (!fname) continue;

	    RefString sentid = idFromFilename(fname);

	    VocabIndex *reference = 0;

	    if (refFile) {
		reference = refs.findRef(sentid);
		if (!reference) {
		    cerr << "no reference for " << sentid << endl;
		    if (dumpErrors || computeNbestError || computeLatticeError)
		    {
			continue;
		    }
		}
	    }

	    makeArray(char, writeLatticeName ,
		      (writeDir ? strlen(writeDir) : 0) + 1
				  + strlen(sentid) + strlen(GZIP_SUFFIX) + 1);
	    if (writeDir) {
		sprintf(writeLatticeName, "%s/%s%s", writeDir, sentid,
								GZIP_SUFFIX);
	    }

	    makeArray(char, writeNbestName,
		      (writeNbestDir ? strlen(writeNbestDir) : 0) + 1
				+ strlen(sentid) + strlen(GZIP_SUFFIX) + 1);
	    if (writeNbestDir) {
		sprintf(writeNbestName, "%s/%s%s", writeNbestDir, sentid,
								GZIP_SUFFIX);
	    }

	    processNbest(nullLM, sentid, fname, dictionary, hiddenVocab,
				    reference,
				    writeDir ? (char *)writeLatticeName : 0,
				    writeNbestDir ? (char *)writeNbestName : 0);
	}
    }

    if (writeVocabFile) {
	File file(writeVocabFile, "w");
	vocab.write(file);
    }

    exit(0);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -