⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htklattice.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 4 页
字号:
/*
 * HTKLattice.cc --
 *	HTK Standard Lattice Format support for SRILM lattices
 *
 *	Note: there is no separate HTKLattice class, only I/O methods!
 *
 */

#ifndef lint
static char Copyright[] = "Copyright (c) 2003-2006 SRI International.  All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lattice/src/RCS/HTKLattice.cc,v 1.40 2006/01/16 19:34:15 stolcke Exp $";
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <assert.h>

#include "Prob.h"
#include "Array.cc"
#include "LHash.cc"
#include "Lattice.h"
#include "MultiwordVocab.h"
#include "NBest.h"		// for phoneSeparator defn

#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_ARRAY(HTKWordInfo);
#endif

/* from Lattice.cc */
#define DebugPrintFatalMessages         1 
#define DebugPrintFunctionality         1 

const char *HTKLattice_Version = "1.1";

const char *HTK_null_word = "!NULL";

const char HTK_single_quote = '\'';
const char HTK_double_quote = '\"';
const char HTK_escape_quote = '\\';

const float HTK_def_tscale = 1.0;
const float HTK_def_acscale = 1.0;
const float HTK_def_lmscale = 1.0;
const float HTK_def_ngscale = 1.0;
const float HTK_def_wdpenalty = 0.0;
const float HTK_def_prscale = 1.0;
const float HTK_def_duscale = 0.0;
const float HTK_def_xscale = 0.0;

HTKHeader::HTKHeader()
    : logbase(10), tscale(HTK_def_tscale), acscale(HTK_def_acscale),
      ngscale(HTK_def_ngscale), lmscale(HTK_def_lmscale),
      wdpenalty(HTK_def_wdpenalty), prscale(HTK_def_prscale),
      duscale(HTK_def_duscale), amscale(HTK_undef_float),
      x1scale(HTK_def_xscale), x2scale(HTK_def_xscale), x3scale(HTK_def_xscale),
      x4scale(HTK_def_xscale), x5scale(HTK_def_xscale), x6scale(HTK_def_xscale),
      x7scale(HTK_def_xscale), x8scale(HTK_def_xscale), x9scale(HTK_def_xscale),
      vocab(0), lmname(0), ngname(0), hmms(0),
      wordsOnNodes(false), scoresOnNodes(false), useQuotes(false)
{
};

HTKHeader::HTKHeader(double acscale, double lmscale, double ngscale,
			double prscale, double duscale, double wdpenalty,
			double x1scale, double x2scale, double x3scale,
			double x4scale, double x5scale, double x6scale,
			double x7scale, double x8scale, double x9scale)
    : logbase(10), tscale(HTK_def_tscale), acscale(acscale),
      ngscale(ngscale), lmscale(lmscale),
      wdpenalty(wdpenalty), prscale(prscale),
      duscale(duscale), amscale(HTK_undef_float),
      x1scale(x2scale), x2scale(x2scale), x3scale(x3scale),
      x4scale(x4scale), x5scale(x5scale), x6scale(x6scale),
      x7scale(x7scale), x8scale(x8scale), x9scale(x9scale),
      vocab(0), lmname(0), ngname(0), hmms(0),
      wordsOnNodes(false), scoresOnNodes(false), useQuotes(false)
{
};

HTKHeader::~HTKHeader()
{
    if (vocab) free(vocab);
    if (lmname) free(lmname);
    if (ngname) free(ngname);
    if (hmms) free(hmms);
}

HTKHeader &
HTKHeader::operator= (const HTKHeader &other)
{
    if (&other == this) {
	return *this;
    }

    if (vocab) free(vocab);
    if (lmname) free(lmname);
    if (ngname) free(ngname);
    if (hmms) free(hmms);

    tscale = other.tscale;
    acscale = other.acscale;
    ngscale = other.ngscale;
    lmscale = other.lmscale;
    wdpenalty = other.wdpenalty;
    prscale = other.prscale;
    duscale = other.duscale;
    x1scale = other.x1scale;
    x2scale = other.x2scale;
    x3scale = other.x3scale;
    x4scale = other.x4scale;
    x5scale = other.x5scale;
    x6scale = other.x6scale;
    x7scale = other.x7scale;
    x8scale = other.x8scale;
    x9scale = other.x9scale;
    amscale = other.amscale;
    if (other.vocab == 0) {
	vocab = 0;
    } else {
	vocab = strdup(other.vocab);
	assert(vocab != 0);
    }
    if (other.lmname == 0) {
	lmname = 0;
    } else {
	lmname = strdup(other.lmname);
	assert(lmname != 0);
    }
    if (other.ngname == 0) {
	ngname = 0;
    } else {
	ngname = strdup(other.ngname);
	assert(ngname != 0);
    }
    if (other.hmms == 0) {
	hmms = 0;
    } else {
	hmms = strdup(other.hmms);
	assert(hmms != 0);
    }

    return *this;
}


HTKWordInfo::HTKWordInfo()
    : time(HTK_undef_float), word(Vocab_None), var(HTK_undef_uint),
      div(0), states(0),
      acoustic(HTK_undef_float), ngram(HTK_undef_float),
      language(HTK_undef_float), pron(HTK_undef_float),
      duration(HTK_undef_float), xscore1(HTK_undef_float),
      xscore2(HTK_undef_float), xscore3(HTK_undef_float),
      xscore4(HTK_undef_float), xscore5(HTK_undef_float),
      xscore6(HTK_undef_float), xscore7(HTK_undef_float),
      xscore8(HTK_undef_float), xscore9(HTK_undef_float),
      posterior(HTK_undef_float)
{
}

HTKWordInfo::HTKWordInfo(const HTKWordInfo &other)
    : div(0), states(0)
{
    *this = other;
}

HTKWordInfo::~HTKWordInfo()
{
    if (div) free(div);
    if (states) free(states);
}

HTKWordInfo &
HTKWordInfo::operator= (const HTKWordInfo &other)
{
    if (&other == this) {
	return *this;
    }

    if (div) free(div);
    if (states) free(states);

    time = other.time;
    word = other.word;
    var = other.var;
    if (other.div == 0) {
	div = 0;
    } else {
	div = strdup(other.div);
	assert(div != 0);
    }
    if (other.states == 0) {
	states = 0;
    } else {
	states = strdup(other.states);
	assert(states != 0);
    }
    acoustic = other.acoustic;
    ngram = other.ngram;
    language = other.language;
    pron = other.pron;
    duration = other.duration;
    xscore1 = other.xscore1;
    xscore2 = other.xscore2;
    xscore3 = other.xscore3;
    xscore4 = other.xscore4;
    xscore5 = other.xscore5;
    xscore6 = other.xscore6;
    xscore7 = other.xscore7;
    xscore8 = other.xscore8;
    xscore9 = other.xscore9;
    posterior = other.posterior;
    return *this;
}

/* 
 * Format HTKWordInfo (for debugging)
 */
ostream &
operator<< (ostream &stream, HTKWordInfo &link)
{
    stream << "[HTKWordInfo";

    if (link.word != Vocab_None) {
	stream << " WORD=" << link.word;
    }
    if (link.time != HTK_undef_float) {
	stream << " time=" << link.time;
    }
    if (link.var != HTK_undef_uint) {
	stream << " var=" << link.var;
    }
    if (link.div != 0) {
	stream << " div=" << link.div;
    }
    if (link.states != 0) {
	stream << " s=" << link.states;
    }
    if (link.acoustic != HTK_undef_float) {
	stream << " a=" << link.acoustic;
    }
    if (link.ngram != HTK_undef_float) {
	stream << " n=" << link.ngram;
    }
    if (link.language != HTK_undef_float) {
	stream << " l=" << link.language;
    }
    if (link.pron != HTK_undef_float) {
	stream << " r=" << link.pron;
    }
    if (link.duration != HTK_undef_float) {
	stream << " ds=" << link.duration;
    }
    if (link.xscore1 != HTK_undef_float) {
	stream << " x1=" << link.xscore1;
    }
    if (link.xscore2 != HTK_undef_float) {
	stream << " x2=" << link.xscore2;
    }
    if (link.xscore3 != HTK_undef_float) {
	stream << " x3=" << link.xscore3;
    }
    if (link.xscore4 != HTK_undef_float) {
	stream << " x4=" << link.xscore4;
    }
    if (link.xscore5 != HTK_undef_float) {
	stream << " x5=" << link.xscore5;
    }
    if (link.xscore6 != HTK_undef_float) {
	stream << " x6=" << link.xscore6;
    }
    if (link.xscore7 != HTK_undef_float) {
	stream << " x7=" << link.xscore7;
    }
    if (link.xscore8 != HTK_undef_float) {
	stream << " x8=" << link.xscore8;
    }
    if (link.xscore9 != HTK_undef_float) {
	stream << " x9=" << link.xscore9;
    }
    if (link.posterior != HTK_undef_float) {
	stream << " p=" << link.posterior;
    }
    stream << "]";
    return stream;
}


/*
 * Find the next key=value pair in line, return string value, nad 
 * advance line pointer past it.
 * The string pointed to by line is modified in the process.
 */
static char *
getHTKField(char *&line, char *&value, Boolean useQuotes)
{
    char *cp = line;
    char *key;

    do {
	switch (*cp) {
	case '\0':
	case '#':
		return 0;
		break;
	case ' ':
	case '\t':
	case '\n':
		cp ++;
		break;
	default:
		key = cp;

		while (*cp != '\0' && !isspace(*cp) && *cp != '=') cp++;

		if (*cp == '=') {
		    *(cp++) = '\0';	// terminate key string
		    value = cp;		// beginning of value string
		    char *cpv = cp;	// target location for copying value

		    char inquote = '\0';

		    /*
		     * Quotes are only treated specially if they 
		     * occur in first position
		     */
		    if (useQuotes &&
			(*cp == HTK_single_quote || *cp == HTK_double_quote))
		    {
			inquote = *(cp++);
		    }

		    while (*cp != '\0') {
			if (useQuotes && *cp == HTK_escape_quote) {
			    /*
			     * Backslash quote processing
			     */
			    cp ++;
			    if (*cp == '\0') {
				/*
				 * Shouldn't happen, we just ignore it
				 */
				break;
			    } else if (*cp == '0') {
				/*
				 * Octal char code
				 */
				unsigned charcode;
				unsigned charlen;
				sscanf(cp, "%o%n", &charcode, &charlen);
				*(cpv++) = charcode;
				cp += charlen;
			    } else {
				/*
				 * Other quoted character
				 */
				*(cpv++) = *(cp++);
			    }
			} else if (!inquote && isspace(*cp)) {
			    /*
			     * String deliminted by White-space
			     */
			    cp ++;
			    break;
			} else if (inquote && *cp == inquote) {
			    /*
			     * String delimited by end quote
			     */
			    cp ++;
			    break;
			} else {
			    /* 
			     * Character in string
			     */
			    *(cpv++) = *(cp++);
			}
		    }
		    *cpv = '\0';	// terminate value string
		} else {
		    value = cp;		// beginning of value string
		    if (*cp != '\0') {
			*(cp++) = '\0';	// terminate value string
		    }
		}

		line = cp;
		return key;
	}
    } while (1);
}

/*
 * Convert string to log score 
 */
static inline LogP
getHTKscore(const char *value, double logbase, File &file)
{
    if (logbase > 0.0) {
	LogP score;
	if (parseLogP(value, score)) {
	    return score * ProbToLogP(logbase);
	} else {
	    file.position() << "warning: malformed HTK log score "
			    << value << endl;
	    return LogP_Zero;
	}
    } else {
	return ProbToLogP(atof(value));
    }
}

/*
 * Output quoted version of string
 */
static void
printQuoted(FILE *f, const char *name, Boolean useQuotes)
{
    Boolean octalPrinted = false;

    if (!useQuotes) {
	fputs(name, f);
    } else {
	for (const char *cp = name; *cp != '\0'; cp ++) {
	    if (*cp == ' ' || *cp == HTK_escape_quote ||
		cp == name &&
		    (*cp == HTK_single_quote || *cp == HTK_double_quote) ||
		octalPrinted && isdigit(*cp))
	    {
		/*
		 * This character needs to be quoted
		 */
		putc(HTK_escape_quote, f);
		putc(*cp, f);
		octalPrinted = false;
	    } else if (!isprint(*cp) || isspace(*cp)) {
		/*

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -