⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htklattice.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 4 页
字号:
		 * Print as octal char code
		 */
		fprintf(f, "%c0%o", HTK_escape_quote, *cp);
		octalPrinted = true;
	    } else {
		/*
		 * Print as plain character
		 */
		putc(*cp, f);
		octalPrinted = false;
	    }
	}
    }
}

/*
 * Set user-specified parameters in the HTK lattice header structure
 */
void
Lattice::setHTKHeader(HTKHeader &header)
{
    if (header.logbase != HTK_undef_float) {
	htkheader.logbase = header.logbase;
    }
    if (header.acscale != HTK_undef_float) {
	htkheader.acscale = header.acscale;
    }
    if (header.lmscale != HTK_undef_float) {
	htkheader.lmscale = header.lmscale;
    }
    if (header.ngscale != HTK_undef_float) {
	htkheader.ngscale = header.ngscale;
    }
    if (header.prscale != HTK_undef_float) {
	htkheader.prscale = header.prscale;
    }
    if (header.duscale != HTK_undef_float) {
	htkheader.duscale = header.duscale;
    }
    if (header.wdpenalty != HTK_undef_float) {
	// scale user-specific wdpenalty from user-specified/default logbase
	if (htkheader.logbase > 0.0) {
	    htkheader.wdpenalty =
			header.wdpenalty * ProbToLogP(htkheader.logbase);
	} else {
	    htkheader.wdpenalty = ProbToLogP(header.wdpenalty);
	}
    }
    if (header.x1scale != HTK_undef_float) {
	htkheader.x1scale = header.x1scale;
    }
    if (header.x2scale != HTK_undef_float) {
	htkheader.x2scale = header.x2scale;
    }
    if (header.x3scale != HTK_undef_float) {
	htkheader.x3scale = header.x3scale;
    }
    if (header.x4scale != HTK_undef_float) {
	htkheader.x4scale = header.x4scale;
    }
    if (header.x5scale != HTK_undef_float) {
	htkheader.x5scale = header.x5scale;
    }
    if (header.x6scale != HTK_undef_float) {
	htkheader.x6scale = header.x6scale;
    }
    if (header.x7scale != HTK_undef_float) {
	htkheader.x7scale = header.x7scale;
    }
    if (header.x8scale != HTK_undef_float) {
	htkheader.x8scale = header.x8scale;
    }
    if (header.x9scale != HTK_undef_float) {
	htkheader.x9scale = header.x9scale;
    }
    if (header.amscale != HTK_undef_float) {
	htkheader.amscale = header.amscale;
    }
    htkheader.wordsOnNodes = header.wordsOnNodes;
    htkheader.scoresOnNodes = header.scoresOnNodes;
    htkheader.useQuotes = header.useQuotes;
}


/*
 * Input lattice in HTK format
 *	Algorithm:
 *	- each HTK node becomes a null node.
 *	- each HTK link becomes a non-null node.
 *	- word and other link information is added to the non-null nodes.
 *	- link information attached to HTK nodes is added to non-null nodes.
 *	- lattice transition weights are computed as a log-linear combination
 *	  of HTK scores.
 * Arguments:
 *	- if header != 0, supplied scaling parameters override information
 *	  from lattice header
 *	- if useNullNodes == false null nodes corresponding to original
 *	  HTK nodes are eliminated
 */
Boolean
Lattice::readHTK(File &file, HTKHeader *header, Boolean useNullNodes)
{
    removeAll();

    unsigned HTKnumlinks = 0;
    unsigned HTKnumnodes = 0;
    float HTKlogbase = (float) M_E;
    unsigned HTKfinal = HTK_undef_uint;
    unsigned HTKinitial = HTK_undef_uint;
    char HTKdirection = 'f';
    char HTKwdpenalty[100];
    HTKwdpenalty[0] = HTKwdpenalty[sizeof(HTKwdpenalty)-1] = '\0';

    LHash<unsigned, NodeIndex> nodeMap;		// maps HTK nodes->lattice nodes
    Array<HTKWordInfo> nodeInfoMap;		// node-based link information

    // dummy word used temporarily to represent HTK nodes
    // (could have used null nodes, but this way we preserve null nodes in
    // the input lattice)
    const char *HTKNodeWord = "***HTK_Node***";
    VocabIndex HTKNodeDummy = useNullNodes ? Vocab_None :
					     vocab.addWord(HTKNodeWord);

    /*
     * Override supplied header parameters
     */
    if (header != 0) {
	setHTKHeader(*header);
    }

    /*
     * Parse HTK lattice file
     */
    while (char *line = file.getline()) {
	char *key;
	char *value;

	/*
	 * Parse key=value pairs
	 * (we test for frequent fields first to save time)
	 * We assume that header information comes before node information,
	 * which comes before link information.  However, this is is not
	 * enforced, and incomplete lattices may result if the input file
	 * contains things out of order.
	 */
	while (key = getHTKField(line, value, htkheader.useQuotes)) {
#define keyis(x)	(strcmp(key, (x)) == 0)
	    /*
	     * Link fields
	     */
	    if (keyis("J")) {
		unsigned HTKlinkno = atoi(value);

		/*
		 * parse link fields
		 */
		HTKWordInfo *linkinfo = new HTKWordInfo;
		assert(linkinfo != 0);
				// allocates new HTKWordInfo pointer in lattice
		htkinfos[htkinfos.size()] = linkinfo;

		unsigned HTKstartnode, HTKendnode;
		NodeIndex startIndex = NoNode, endIndex = NoNode;

		while (key = getHTKField(line, value, htkheader.useQuotes)) {
		    if (keyis("S") || keyis("START")) {
			HTKstartnode = atoi(value);
			Boolean found;
			NodeIndex *startIndexPtr =
				nodeMap.insert(HTKstartnode, found);
			if (!found) {
			    // node index not seen before; create it
			    *startIndexPtr = dupNode(Vocab_None);
			}
			startIndex = *startIndexPtr;

		    } else if (keyis("E") || keyis("END")) {
			HTKendnode = atoi(value);
			Boolean found;
			NodeIndex *endIndexPtr =
				nodeMap.insert(HTKendnode, found);
			if (!found) {
			    // node index not seen before; create it
			    *endIndexPtr = dupNode(Vocab_None);
			}
			endIndex = *endIndexPtr;

		    } else if (keyis("W") || keyis("WORD")) {
			if (strcmp(value, HTK_null_word) == 0) {
			    linkinfo->word = Vocab_None;
			} else if (useUnk) {
			    linkinfo->word =
					vocab.getIndex(value, vocab.unkIndex());
			} else {
			    linkinfo->word = vocab.addWord(value);
			}
		    } else if (keyis("v") || keyis("var")) {
			linkinfo->var = atoi(value);
		    } else if (keyis("d") || keyis("div")) {
			linkinfo->div = strdup(value);
			assert(linkinfo->div != 0);
		    } else if (keyis("s") || keyis("states")) {
			linkinfo->states = strdup(value);
			assert(linkinfo->states != 0);
		    } else if (keyis("a") || keyis("acoustic")) {
			linkinfo->acoustic = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("n") || keyis("ngram")) {
			linkinfo->ngram = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("l") || keyis("language")) {
			linkinfo->language = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("r")) {
			linkinfo->pron = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("ds")) {
			linkinfo->duration = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x1")) {
			linkinfo->xscore1 = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x2")) {
			linkinfo->xscore2 = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x3")) {
			linkinfo->xscore3 = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x4")) {
			linkinfo->xscore4 = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x5")) {
			linkinfo->xscore5 = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x6")) {
			linkinfo->xscore6 = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x7")) {
			linkinfo->xscore7 = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x8")) {
			linkinfo->xscore8 = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x9")) {
			linkinfo->xscore9 = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("p")) {
			linkinfo->posterior = atof(value);
		    } else {
			file.position() << "unexpected link field name "
					<< key << endl;
			if (!useNullNodes) vocab.remove(HTKNodeDummy);
			return false;
		    }
		}

		if (startIndex == NoNode) {
		    file.position() << "missing start node spec\n";
		    if (!useNullNodes) vocab.remove(HTKNodeDummy);
		    return false;
		}

		if (endIndex == NoNode) {
		    file.position() << "missing end node spec\n";
		    if (!useNullNodes) vocab.remove(HTKNodeDummy);
		    return false;
		}

		/*
		 * fill in unspecified link info from associated node info
		 * 'forward' lattices use end-node information.
		 * 'backward' lattices use start-node information.
		 */
		HTKWordInfo *nodeinfo = 0;
		if (HTKdirection == 'f') {
		    nodeinfo = &nodeInfoMap[HTKendnode];
		} else if (HTKdirection == 'b') {
		    nodeinfo = &nodeInfoMap[HTKstartnode];
		}

		if (nodeinfo != 0) {
		    linkinfo->time = nodeinfo->time;

		    if (linkinfo->word == Vocab_None) {
			linkinfo->word = nodeinfo->word;
		    }
		    if (linkinfo->var == HTK_undef_uint) {
			linkinfo->var = nodeinfo->var;
		    }
		    if (linkinfo->div == 0 && nodeinfo->div != 0) {
			linkinfo->div = strdup(nodeinfo->div);
			assert(linkinfo->div != 0);
		    }
		    if (linkinfo->states == 0 && nodeinfo->states != 0) {
			linkinfo->states = strdup(nodeinfo->states);
			assert(linkinfo->states != 0);
		    }
		    if (linkinfo->acoustic == HTK_undef_float) {
			linkinfo->acoustic = nodeinfo->acoustic;
		    }
		    if (linkinfo->pron == HTK_undef_float) {
			linkinfo->pron = nodeinfo->pron;
		    }
		    if (linkinfo->duration == HTK_undef_float) {
			linkinfo->duration = nodeinfo->duration;
		    }
		    if (linkinfo->xscore1 == HTK_undef_float) {
			linkinfo->xscore1 = nodeinfo->xscore1;
		    }
		    if (linkinfo->xscore2 == HTK_undef_float) {
			linkinfo->xscore2 = nodeinfo->xscore2;
		    }
		    if (linkinfo->xscore3 == HTK_undef_float) {
			linkinfo->xscore3 = nodeinfo->xscore3;
		    }
		    if (linkinfo->xscore4 == HTK_undef_float) {
			linkinfo->xscore4 = nodeinfo->xscore4;
		    }
		    if (linkinfo->xscore5 == HTK_undef_float) {
			linkinfo->xscore5 = nodeinfo->xscore5;
		    }
		    if (linkinfo->xscore6 == HTK_undef_float) {
			linkinfo->xscore6 = nodeinfo->xscore6;
		    }
		    if (linkinfo->xscore7 == HTK_undef_float) {
			linkinfo->xscore7 = nodeinfo->xscore7;
		    }
		    if (linkinfo->xscore8 == HTK_undef_float) {
			linkinfo->xscore8 = nodeinfo->xscore8;
		    }
		    if (linkinfo->xscore9 == HTK_undef_float) {
			linkinfo->xscore9 = nodeinfo->xscore9;
		    }
		}

		/*
		 * Create lattice node
		 */
		NodeIndex newNode = dupNode(linkinfo->word, 0, linkinfo);

		/*
		 * Compute lattice transition weight as a weighted combination
		 * of HTK lattice scores
		 */
		LogP weight = LogP_One;

		if (linkinfo->acoustic != HTK_undef_float) {
		    weight += (LogP)htkheader.acscale * linkinfo->acoustic;
		}
		if (linkinfo->ngram != HTK_undef_float) {
		    weight += (LogP)htkheader.ngscale * linkinfo->ngram;
		}
		if (linkinfo->language != HTK_undef_float) {
		    weight += (LogP)htkheader.lmscale * linkinfo->language;
		}
		if (linkinfo->pron != HTK_undef_float) {
		    weight += (LogP)htkheader.prscale * linkinfo->pron;
		}
		if (linkinfo->duration != HTK_undef_float) {
		    weight += (LogP)htkheader.duscale * linkinfo->duration;
		}
		if (!ignoreWord(linkinfo->word)) {
		    weight += (LogP)htkheader.wdpenalty;
		}
		if (linkinfo->xscore1 != HTK_undef_float) {
		    weight += (LogP)htkheader.x1scale * linkinfo->xscore1;
		}
		if (linkinfo->xscore2 != HTK_undef_float) {
		    weight += (LogP)htkheader.x2scale * linkinfo->xscore2;
		}
		if (linkinfo->xscore3 != HTK_undef_float) {
		    weight += (LogP)htkheader.x3scale * linkinfo->xscore3;
		}
		if (linkinfo->xscore4 != HTK_undef_float) {
		    weight += (LogP)htkheader.x4scale * linkinfo->xscore4;
		}
		if (linkinfo->xscore5 != HTK_undef_float) {
		    weight += (LogP)htkheader.x5scale * linkinfo->xscore5;
		}
		if (linkinfo->xscore6 != HTK_undef_float) {
		    weight += (LogP)htkheader.x6scale * linkinfo->xscore6;
		}
		if (linkinfo->xscore7 != HTK_undef_float) {
		    weight += (LogP)htkheader.x7scale * linkinfo->xscore7;
		}
		if (linkinfo->xscore8 != HTK_undef_float) {
		    weight += (LogP)htkheader.x8scale * linkinfo->xscore8;
		}
		if (linkinfo->xscore9 != HTK_undef_float) {
		    weight += (LogP)htkheader.x9scale * linkinfo->xscore9;
		}

		/*
		 * Add transitions from start node, and to end node
		 */
		LatticeTransition trans1(weight, 0);
		insertTrans(startIndex, newNode, trans1);

		LatticeTransition trans2(LogP_One, 0);
		insertTrans(newNode, endIndex, trans2);

		continue;

	    /*
	     * Node fields
	     */
	    } else if (keyis("I")) {
		unsigned HTKnodeno = atoi(value);

		/*
		 * create a null node for this HTK node,
		 * and record node-related info.
		 */
		NodeIndex nullNodeIndex = dupNode(HTKNodeDummy);

		*nodeMap.insert(HTKnodeno) = nullNodeIndex;
		HTKWordInfo &nodeinfo = nodeInfoMap[HTKnodeno];

		/*
		 * parse node fields
		 */
		while (key = getHTKField(line, value, htkheader.useQuotes)) {
		    if (keyis("t") || keyis("time")) {
			nodeinfo.time = (float)atof(value);
		    } else if (keyis("W") || keyis("WORD")) {
			if (strcmp(value, HTK_null_word) == 0) {
			    nodeinfo.word = Vocab_None;
			} else if (useUnk) {
			    nodeinfo.word =
					vocab.getIndex(value, vocab.unkIndex());
			} else {
			    nodeinfo.word = vocab.addWord(value);
			}
		    } else if (keyis("v") || keyis("var")) {
			nodeinfo.var = atoi(value);
		    } else if (keyis("d") || keyis("div")) {
			nodeinfo.div = strdup(value);
			assert(nodeinfo.div != 0);
		    } else if (keyis("s") || keyis("states")) {
			nodeinfo.states = strdup(value);
			assert(nodeinfo.states != 0);
		    } else if (keyis("a") || keyis("acoustic")) {
			nodeinfo.acoustic = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("r")) {
			nodeinfo.pron = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("ds")) {
			nodeinfo.duration = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x1")) {
			nodeinfo.xscore1 = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x2")) {
			nodeinfo.xscore2 = getHTKscore(value, HTKlogbase, file);
		    } else if (keyis("x3")) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -