⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 subvocab.cc

📁 这是一款很好用的工具包
💻 CC
字号:
/*
 * SubVocab.cc --
 *	Vocabulary subset class
 *
 */

#ifndef lint
static char Copyright[] = "Copyright (c) 1996,1999,2003 SRI International.  All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lm/src/RCS/SubVocab.cc,v 1.7 2006/01/05 20:21:27 stolcke Exp $";
#endif

#include <iostream>
using namespace std;
#include <string.h>
#include <ctype.h>
#include <assert.h>

#include "SubVocab.h"

#include "LHash.h"
#include "Array.h"

SubVocab::SubVocab(Vocab &baseVocab)
    :  _baseVocab(baseVocab)
{
    /*
     * These defaults are inherited from the base vocab.
     */
    outputVocab = &baseVocab;

    /*
     * sub-vocabularies don't have any special tokens by default
     */
    remove(_unkIndex);
    remove(_ssIndex);
    remove(_seIndex);
    remove(_pauseIndex);
}

// Add word to vocabulary
VocabIndex
SubVocab::addWord(VocabString name)
{
    /*
     * Try to find word in base vocabulary
     * If it doesn't exist there, add it first to the base vocabulary.
     * Then use the same index here.
     */
    VocabIndex wid = _baseVocab.addWord(name);

    if (wid == Vocab_None) {
	return Vocab_None;
    } else {
	return addWord(wid);
    }
}

// Add index to sub-vocabulary
VocabIndex
SubVocab::addWord(VocabIndex wid)
{
    /*
     * Index has to already exist in the base vocabulary.
     * If not, we fail.
     */
    VocabString baseName = _baseVocab.getWord(wid);

    if (baseName == 0) {
	return Vocab_None;
    } else {
	Boolean found;

	// use baseName here in case base Vocab changed capitalization
	VocabIndex *indexPtr = byName.insert(baseName, found);

	if (found) {
	    assert(*indexPtr == wid);
	} else {
	    *indexPtr = wid;
	    byIndex[wid] = byName.getInternalKey(baseName);

	    /*
	     * Initialize word strings between last highest and new index 
	     * (so that lookups return 0)
	     */
	    for (unsigned i = nextIndex; i < wid; i ++) {
		byIndex[i] = 0;
	    }
	    
	    /*
	     * nextIndex is 1 plus the highest word index used.
	     */
	    if (wid + 1 > nextIndex) {
		nextIndex = wid + 1;
	    }
	} 
	return wid;
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -