reflist.cc

来自「这是一款很好用的工具包」· CC 代码 · 共 182 行

182 行

/*
 * RefList.cc --
 *	List of reference transcripts
 *
 */

#ifndef lint
static char Copyright[] = "Copyright (c) 1998-2003 SRI International.  All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lm/src/RCS/RefList.cc,v 1.9 2006/01/05 20:21:27 stolcke Exp $";
#endif

#include <iostream>
using namespace std;
#include <string.h>
#include <stdlib.h>
#include <assert.h>

#include "RefList.h"

#include "LHash.cc"
#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_LHASH(RefString, VocabIndex *);
#endif

#include "Array.cc"
#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_ARRAY(VocabIndex *);
#endif

/*
 * List of known filename suffixes that can be stripped to infer 
 * utterance ids.
 */
static char *suffixes[] = {
    ".Z", ".gz", ".score", ".wav", ".wav_cep", ".wv", ".wv1", ".sph", ".lat", 0
};

/*
 * Locate utterance id in filename
 *	Result is returned in temporary buffer that is valid until next
 *	call to this function.
 */
RefString
idFromFilename(const char *filename)
{
    static char *result = 0;

    const char *root = strrchr(filename, '/');

    if (root) {
	root += 1;
    } else {
	root = filename;
    }

    if (result) free(result);
    result = strdup(root);
    assert(result != 0);

    unsigned rootlen = strlen(result);

    for (unsigned i = 0; suffixes[i] != 0; i++) {
	unsigned suffixlen = strlen(suffixes[i]);

	if (suffixlen < rootlen &&
	    strcmp(&result[rootlen - suffixlen], suffixes[i]) == 0)
	{
	    result[rootlen - suffixlen] = '\0';
	    rootlen -= suffixlen;
	}
    }
    return result;
}

RefList::RefList(Vocab &vocab, Boolean haveIDs)
    : vocab(vocab), haveIDs(haveIDs)
{
}

RefList::~RefList()
{
    for (unsigned i = 0; i < refarray.size(); i++) {
	delete [] refarray[i];
    }
}

Boolean
RefList::read(File &file, Boolean addWords)
{
    char *line;
    
    while (line = file.getline()) {
	VocabString words[maxWordsPerLine + 2];
	unsigned nWords = Vocab::parseWords(line, words, maxWordsPerLine + 2);

	if (nWords == maxWordsPerLine + 2) {
	    file.position() << "too many words\n";
	    continue;
	}

	VocabIndex *wids = new VocabIndex[nWords + 2];
	assert(wids != 0);

	if (addWords) {
	    vocab.addWords(haveIDs ? words + 1 : words, wids, nWords + 2);
	} else {
	    vocab.getIndices(haveIDs ? words + 1: words, wids, nWords + 2,
							    vocab.unkIndex());
	}

	refarray[refarray.size()] = wids;

	if (haveIDs) {
	    VocabIndex **oldWids = reflist.insert((RefString)words[0]);
	    delete [] *oldWids;

	    *oldWids = wids;
	}
    }

    return true;
}

Boolean
RefList::write(File &file)
{
    if (haveIDs) {
	/* 
	 * Output sorted by ID
	 */
	LHashIter<RefString, VocabIndex *> iter(reflist, strcmp);

	RefString id;
	VocabIndex **wids;

	while (wids = iter.next(id)) {
	    VocabString words[maxWordsPerLine + 1];
	    vocab.getWords(*wids, words, maxWordsPerLine + 1);

	    fprintf(file, "%s ", id);
	    Vocab::write(file, words);
	    fprintf(file, "\n");
	}
    } else {
	/*
	 * Output in read order
	 */
	for (unsigned i = 0; i < refarray.size(); i++) {
	    VocabString words[maxWordsPerLine + 1];
	    vocab.getWords(refarray[i], words, maxWordsPerLine + 1);

	    Vocab::write(file, words);
	    fprintf(file, "\n");
	}
    }

    return true;
}

VocabIndex *
RefList::findRef(RefString id)
{
    VocabIndex **wids = reflist.find(id);

    if (wids) {
	return *wids;
    } else {
	return 0;
    }
}

VocabIndex *
RefList::findRefByNumber(unsigned id)
{
    if (id < refarray.size()) {
	return refarray[id];
    } else {
	return 0;
    }
}

reflist.cc - 源码说明

本页面展示了「这是一款很好用的工具包」中的 reflist.cc 源码文件，采用 CC 编程语言编写，共 182 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与工具包相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?