⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cachelm.cc

📁 这是一款很好用的工具包
💻 CC
字号:
/*
 * CacheLM.cc --
 *	Unigram cache language model
 *
 */

#ifndef lint
static char Copyright[] = "Copyright (c) 1995, SRI International.  All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lm/src/RCS/CacheLM.cc,v 1.7 2006/01/05 20:21:27 stolcke Exp $";
#endif

#include <iostream>
using namespace std;
#include <stdlib.h>
#include <math.h>

#include "CacheLM.h"

#include "Array.cc"
#ifdef INSTANTIATE_TEMPLATES
// INSTANTIATE_ARRAY(VocabIndex);
#endif

#include "LHash.cc"
#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_LHASH(VocabIndex,double);
#endif

/*
 * Debug levels used
 */
#define DEBUG_CACHE_HITS	2

CacheLM::CacheLM(Vocab &vocab, unsigned historyLength)
    : LM(vocab), historyLength(historyLength),
      wordHistory(0, historyLength), wordCounts(0)
{
   flushCache();
}

/*
 * Forget all that is in the cache
 */
void
CacheLM::flushCache()
{
    /*
     * Initialize word history.
     */
    for (unsigned i = 0; i < historyLength; i++) {
	wordHistory[i] = Vocab_None;
    }
    historyEnd = 0;
    totalCount = 0.0;

    /*
     * Reset word counts to zero
     */
    LHashIter<VocabIndex,double> wordIter(wordCounts);
    VocabIndex word;
    double *wordCount;

    while (wordCount = wordIter.next(word)) {
	*wordCount = 0.0;
    }
}

LogP
CacheLM::wordProb(VocabIndex word, const VocabIndex *context)
{
    /*
     * We don't cache unknown words unless <unk> is treated as a regular word.
     */
    if (word == vocab.unkIndex() && !vocab.unkIsWord()) {
	return LogP_Zero;
    }

    /*
     * Return the maximum likelihood estimate based on all words
     * in the history.  Return prob 0 for the very first word.
     */
    double *wordCount = wordCounts.insert(word);

    Prob wordProb =
	totalCount == 0.0 ? 0.0 : (*wordCount / totalCount);

    if (running() && debug(DEBUG_CACHE_HITS)) {
	dout() << "[cache=" << wordProb << "]";
    }

    /*
     * Update history and counts
     */
    if (running() && historyLength > 0) {
	VocabIndex oldWord = wordHistory[historyEnd];
	if (oldWord == Vocab_None) {
	    totalCount ++;
	} else {
	    double *oldWordCount = wordCounts.find(oldWord);
	    assert(oldWordCount != 0);

	    *oldWordCount -= 1.0;
	}

	wordHistory[historyEnd] = word;
	*wordCount += 1.0;

	historyEnd = (historyEnd + 1) % historyLength;
    }

    return ProbToLogP(wordProb);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -