nbest.cc

来自「这是一款很好用的工具包」· CC 代码 · 共 1,140 行 · 第 1/2 页
1,140 行
/*
 * NBest.cc --
 *	N-best hypotheses and lists
 *
 */

#ifndef lint
static char Copyright[] = "Copyright (c) 1995-2006 SRI International.  All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lm/src/RCS/NBest.cc,v 1.57 2006/01/09 18:08:21 stolcke Exp $";
#endif

#include <iostream>
using namespace std;
#include <string.h>
#include <stdlib.h>
#include <assert.h>

#include "NBest.h"
#include "WordAlign.h"

#include "Array.cc"
#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_ARRAY(NBestHyp);
#endif

#define DEBUG_PRINT_RANK	1

const char *phoneSeparator = ":";	 // used for phones & phoneDurs strings
const NBestTimestamp frameLength = 0.01f; // quantization unit of word timemarks

/*
 * N-best word backtrace information
 */

const unsigned phoneStringLength = 100;

NBestWordInfo::NBestWordInfo()
    : word(Vocab_None), phones(0), phoneDurs(0),
      wordPosterior(0.0), transPosterior(0.0)
{
}

NBestWordInfo::~NBestWordInfo()
{
    if (phones) free(phones);
    if (phoneDurs) free(phoneDurs);
}

NBestWordInfo &
NBestWordInfo::operator= (const NBestWordInfo &other)
{
    if (&other == this) {
	return *this;
    }

    if (phones) free(phones);
    if (phoneDurs) free(phoneDurs);

    word = other.word;
    start = other.start;
    duration = other.duration;
    acousticScore = other.acousticScore;
    languageScore = other.languageScore;

    if (!other.phones) {
	phones = 0;
    } else {
	phones = strdup(other.phones);
	assert(phones != 0);
    }

    if (!other.phoneDurs) {
	phoneDurs = 0;
    } else {
	phoneDurs = strdup(other.phoneDurs);
	assert(phoneDurs != 0);
    }

    wordPosterior = other.wordPosterior;
    transPosterior = other.transPosterior;

    return *this;
}

void
NBestWordInfo::write(File &file)
{
    fprintf(file, "%lg %lg %lg %lg %s %s",
			(double)start, (double)duration,
			(double)acousticScore, (double)languageScore,
			phones ? phones : phoneSeparator,
			phoneDurs ? phoneDurs : phoneSeparator);
}

Boolean
NBestWordInfo::parse(const char *s)
{
    double sTime, dur, aScore, lScore;
    char phs[phoneStringLength], phDurs[phoneStringLength];

    if (sscanf(s, "%lg %lg %lg %lg %100s %100s",
			&sTime, &dur, &aScore, &lScore, phs, phDurs) != 6)
    {
	return false;
    } else {
	start = (NBestTimestamp)sTime;
	duration = (NBestTimestamp)dur;
	acousticScore = (LogP)aScore;
	languageScore = (LogP)lScore;

	if (strcmp(phs, phoneSeparator) == 0) {
	    phones = 0;
	} else {
	    phones = strdup(phs);
	    assert(phones != 0);
	}

	if (strcmp(phDurs, phoneSeparator) == 0) {
	    phoneDurs = 0;
	} else {
	    phoneDurs = strdup(phDurs);
	    assert(phoneDurs != 0);
	}

	return true;
    }
}

void
NBestWordInfo::invalidate()
{
    duration = 0.0;
}

Boolean
NBestWordInfo::valid() const
{
    return (duration > 0);
}

void
NBestWordInfo::merge(const NBestWordInfo &other)
{
    /*
     * let the "other" word information supercede our own if it has
     * higher duration-normalized acoustic likelihood
     */
    if (other.acousticScore/other.duration > acousticScore/duration)
    {
	*this = other;
    }
}

/*
 * N-Best hypotheses
 */

NBestHyp::NBestHyp()
{
    words = 0;
    wordInfo = 0;
    acousticScore = languageScore = totalScore = 0.0;
    posterior = 0.0;
    numWords = numErrors = 0;
    rank = 0;
}

NBestHyp::~NBestHyp()
{
    delete [] words;
    delete [] wordInfo;
}

NBestHyp &
NBestHyp::operator= (const NBestHyp &other)
{
    // cerr << "warning: NBestHyp::operator= called\n";

    if (&other == this) {
	return *this;
    }

    delete [] words;
    delete [] wordInfo;

    acousticScore = other.acousticScore;
    languageScore = other.languageScore;
    totalScore = other.totalScore;

    numWords = other.numWords;
    posterior = other.posterior;
    numErrors = other.numErrors;
    rank = other.rank;

    if (other.words) {
	unsigned actualNumWords = Vocab::length(other.words) + 1;

	words = new VocabIndex[actualNumWords];
	assert(words != 0);

	for (unsigned i = 0; i < actualNumWords; i++) {
	    words[i] = other.words[i];
	}

	if (other.wordInfo) {
	    wordInfo = new NBestWordInfo[actualNumWords];
	    assert(wordInfo != 0);

	    for (unsigned i = 0; i < actualNumWords; i++) {
		wordInfo[i] = other.wordInfo[i];
	    }
	} else {
	    wordInfo = 0;
	}

    } else {
	words = 0;
	wordInfo = 0;
    }

    return *this;
}

/*
 * N-Best Hypotheses
 */

const char multiwordSeparator = '_';

static Boolean
addPhones(char *old, const char *ph, Boolean reversed = false) 
{
    unsigned oldLen = strlen(old);
    unsigned newLen = strlen(ph);

    if (oldLen + 1 + newLen + 1 > phoneStringLength) {
	return false;
    } else if (reversed) {
	if (oldLen > 0) {
	    memmove(&old[newLen + 1], old, oldLen + 1);
	}
	strcpy(old, ph);
	if (oldLen > 0) {
	    old[newLen] = phoneSeparator[0];
	}
    } else {
	if (oldLen > 0) {
	    old[oldLen ++] = phoneSeparator[0];
	}
	strcpy(&old[oldLen], ph);
    }

    return true;
}

Boolean
NBestHyp::parse(char *line, Vocab &vocab, unsigned decipherFormat,
		    LogP acousticOffset, Boolean multiwords, Boolean backtrace)
{
    const unsigned maxFieldsPerLine = 11 * maxWordsPerLine + 4;
			    /* NBestList2.0 format uses 11 fields per word */

    static VocabString wstrings[maxFieldsPerLine];
    static VocabString justWords[maxFieldsPerLine + 1];
    Array<NBestWordInfo> backtraceInfo;

    unsigned actualNumWords =
		Vocab::parseWords(line, wstrings, maxFieldsPerLine);

    if (actualNumWords == maxFieldsPerLine) {
	cerr << "more than " << maxFieldsPerLine << " fields per line\n";
	return false;
    }

    /*
     * We don't do multiword splitting with backtraces -- that would require
     * a dictionary (see external split-nbest-words script).
     */
    if (backtrace) {
	multiwords = false;
    }

    /*
     * We accept three formats for N-best hyps.
     * - The Decipher NBestList1.0 format, which has one combined bytelog score
     *	 in parens preceding the hyp.
     * - The Decipher NBestList2.0 format, where each word is followed by
     *	  ( st: <starttime> et: <endtime> g: <grammar_score> a: <ac_score> )
     * - Our own format, which has acoustic score, LM score, and number of
     *   words followed by the hyp.
     * If (decipherFormat > 0) only the specified Decipher format is accepted.
     */

    if (decipherFormat == 1 || 
	decipherFormat == 0 && wstrings[0][0] == '(')
    {
	/*
	 * These formats don't support backtrace info
	 */
	backtrace = false;

	actualNumWords --;

	if (actualNumWords > maxWordsPerLine) {
	    cerr << "more than " << maxWordsPerLine << " words in hyp\n";
	    return false;
	}

	/*
	 * Parse the first word as a score (in parens)
	 */
	double score;

	if (sscanf(wstrings[0], "(%lf)", &score) != 1)
	{
	    cerr << "bad Decipher score: " << wstrings[0] << endl;
	    return false;
	}

	/*
	 * Save score
	 */
	totalScore = acousticScore = BytelogToLogP(score);
	languageScore = 0.0;

	/* 
	 * Note: numWords includes pauses, consistent with the way the 
	 * recognizer applies word transition weights.  Elimination of pauses
	 * is the job of LM rescoring.
	 */
	numWords = actualNumWords;

	Vocab::copy(justWords, &wstrings[1]);

    } else if (decipherFormat == 2) {
	if ((actualNumWords - 1) % 11) {
	    cerr << "badly formatted hyp\n";
	    return false;
	}

	unsigned numTokens = (actualNumWords - 1)/11;

	if (numTokens > maxWordsPerLine) {
	    cerr << "more than " << maxWordsPerLine << " tokens in hyp\n";
	    return false;
	}

	/*
	 * Parse the first word as a score (in parens)
	 */
	double score;

	if (sscanf(wstrings[0], "(%lf)", &score) != 1)
	{
	    cerr << "bad Decipher score: " << wstrings[0] << endl;
	    return false;
	}

	/*
	 * Parse remaining line into words and scores
	 *	skip over phone and state backtrace tokens, which can be
	 *	identified by noting that their times are contained within
	 *	the word duration.
	 */
	Bytelog acScore = 0;
	Bytelog lmScore = 0;

	NBestTimestamp prevEndTime = -1.0;	/* end time of last token */
	NBestTimestamp prevPhoneStart = 0.0;
	NBestWordInfo *prevWordInfo = 0;

	char phones[phoneStringLength];
	char phoneDurs[phoneStringLength];

	actualNumWords = 0;
	for (unsigned i = 0; i < numTokens; i ++) {

	    const char *token = wstrings[1 + 11 * i];
	    NBestTimestamp startTime = (NBestTimestamp)atof(wstrings[1 + 11 * i + 3]);
	    NBestTimestamp endTime = (NBestTimestamp)atof(wstrings[1 + 11 * i + 5]);

	    /*
	     * Check if this token refers to an HMM state, i.e., if
	     * it matches the pattern /-[0-9]$/.
	     * XXX: because of a bug in Decipher we need to perform this
	     * check even if we're scanning for word tokens.
	     */
	    const char *hyphen = strrchr(token, '-');
	    Boolean isStateToken = hyphen != 0 &&
				hyphen[1] >= '0' && hyphen[1] <= '9' &&
				hyphen[2] == '\0';

	    if (startTime > prevEndTime && !isStateToken) {
		int acWordScore = atol(wstrings[1 + 11 * i + 9]);
		int lmWordScore = atol(wstrings[1 + 11 * i + 7]);

		justWords[actualNumWords] = token;

		if (backtrace) {
		    /*
		     * save pronunciation info for previous word
		     */
		    if (prevWordInfo) {
			prevWordInfo->phones = strdup(phones);
			assert(prevWordInfo->phones != 0);

			prevWordInfo->phoneDurs = strdup(phoneDurs);
			assert(prevWordInfo->phoneDurs != 0);
		    }

		    NBestWordInfo winfo;
		    winfo.word = Vocab_None;
		    winfo.start = startTime;
		    /*
		     * NB: "et" in nbest backtrace is actually the START time
		     * of the last frame
		     */
		    winfo.duration = endTime - startTime + frameLength;
		    winfo.acousticScore = BytelogToLogP(acWordScore);
		    winfo.languageScore = BytelogToLogP(lmWordScore);
		    winfo.phones = winfo.phoneDurs = 0;

		    backtraceInfo[actualNumWords] = winfo;

		    /*
		     * prepare for collecting phone backtrace info
		     */
		    prevWordInfo = &backtraceInfo[actualNumWords];
		    phones[0] = phoneDurs[0] = '\0';
		}

		acScore += acWordScore;
		lmScore += lmWordScore;

		actualNumWords ++;

		prevEndTime = endTime;
	    } else {
		/*
		 * check if this token refers to an HMM state, i.e., if
		 * if matches the pattern /-[0-9]$/
		 */
		if (isStateToken) {
		    continue;
		}

		/*
		 * a phone token: if we're extracting backtrace information,
		 * get phone identity and duration and store in word Info
		 */
		if (prevWordInfo) {
		    const char *lbracket = strchr(token, '[');
		    const char *phone = lbracket ? lbracket + 1 : token;
		    char *rbracket = (char *)strrchr(phone, ']');
		    if (rbracket) *rbracket = '\0';
		    addPhones(phones, phone, startTime < prevPhoneStart);

		    char phoneDur[20];
		    sprintf(phoneDur, "%d",
			    (int)((endTime - startTime)/frameLength + 0.5) + 1);
		    addPhones(phoneDurs, phoneDur, startTime < prevPhoneStart);

		    prevPhoneStart = startTime;
		}
	    }
	}

	if (backtrace) {
	    /*
	     * save pronunciation info for last word
	     */
	    if (prevWordInfo) {
		prevWordInfo->phones = strdup(phones);
		assert(prevWordInfo->phones != 0);

		prevWordInfo->phoneDurs = strdup(phoneDurs);
		assert(prevWordInfo->phoneDurs != 0);
	    }
	}

	justWords[actualNumWords] = 0;

	/*
	 * Save scores
	 */
	totalScore = BytelogToLogP(score);
	acousticScore = BytelogToLogP(acScore);
	languageScore = BytelogToLogP(lmScore);
	numWords = actualNumWords;

	/*
	if (score != acScore + lmScore) {
	    cerr << "acoustic and language model scores don't add up ("
		 << acScore << " + " << lmScore << " != " << score << ")\n";
	}
	*/

    } else {
	actualNumWords -= 3;

	if (actualNumWords > maxWordsPerLine) {
	    cerr << "more than " << maxWordsPerLine << " words in hyp\n";
	    return false;
	}

	/*
	 * Parse the first three columns as numbers
	 */
	if (!parseLogP(wstrings[0], acousticScore)) {
	    cerr << "bad acoustic score: " << wstrings[0] << endl;
	    return false;
	}
	if (!parseLogP(wstrings[1], languageScore)) {
	    cerr << "bad LM score: " << wstrings[1] << endl;
	    return false;
	}
	if (sscanf(wstrings[2], "%u", &numWords) != 1) {
	    cerr << "bad word count: " << wstrings[2] << endl;
	    return false;
	}

	/*
	 * Set the total score to the acoustic score so 
	 * decipherFix() with a null language model leaves everything
	 * unchanged.
	 */
	totalScore = acousticScore;

	Vocab::copy(justWords, &wstrings[3]);
    }

    /*
     * Apply acoustic normalization in effect
     */
    acousticScore -= acousticOffset;
    totalScore -= acousticOffset;

    /*
     * Adjust number of words for multiwords if appropriate
     */
    if (multiwords) {
	for (unsigned j = 0; justWords[j] != 0; j ++) {
	    const char *cp = justWords[j];

	    for (cp = strchr(cp, multiwordSeparator);
		 cp != 0;
		 cp = strchr(cp + 1, multiwordSeparator))
	    {
		actualNumWords ++;
	    }
	}
    }

    /*
     * Copy words to nbest list
     */
    delete [] words;
    words = new VocabIndex[actualNumWords + 1];
    assert(words != 0);

    Boolean unkIsWord = vocab.unkIsWord();

    /*
     * Map word strings to indices
     */
    if (!multiwords) {
	if (unkIsWord) {
	    vocab.getIndices(justWords, words, actualNumWords + 1,
							    vocab.unkIndex());
	} else {
nbest.cc - 源码说明

本页面展示了「这是一款很好用的工具包」中的 nbest.cc 源码文件，采用 CC 编程语言编写，共 1,140 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与工具包相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?