myngram.cpp

来自「这是一款很好用的工具包」· C++ 代码 · 共 132 行

CPP

132 行

#include "MyNgram.h"

LogPMyNgram::My_wordProbBO(VocabIndex word, const VocabIndex *context, unsigned int clen, bool& bo){	LogP logp = LogP_Zero;	LogP bow = LogP_One;	unsigned found = 0;	BOtrie *trieNode = &contexts;	int i = 0;	bo = true;	do {		LogP *prob = trieNode->value().probs.find(word);		if (prob) {			/*			* If a probability is found at this level record it as the 			* most specific one found so far and reset the backoff weight.			*/			logp = *prob;			bow = LogP_One;			found = i + 1;		} 		if  (i >= (int)clen || context[i] == Vocab_None) 		{			bo = false;	//return			break;		}		BOtrie *next = trieNode->findTrie(context[i]);		if (next) {			/*			* Accumulate backoff weights 			*/			bow += next->value().bow;			trieNode = next;			i ++;		} else {			break;		}	} while (1);	return logp + bow;}

//whether use own mixture method
//lambda为this这个MyNgram(lm1)的权重
void MyNgram::mixProbs(MyNgram &lm2, double lambda,bool own_mix)
{
	if ( !own_mix )
	{
		return Ngram::mixProbs(lm2, lambda);
	}

	//use our own mix-method
	bool bo;
	makeArray(VocabIndex, context, order + 1);	/*	* In destructive merging we need to process the longer ngrams first	* so that we can still use the model being modified to compute its own	* original probability estimates.	*/	for (int i = order - 1; i >= 0 ; i--) {		BOnode *node;		NgramBOsIter iter1(*this, context, i);		/*		* First, find all explicit ngram probs in *this, and mix them		* with the corresponding probs of lm2 (explicit or backed-off).		*/		while (node = iter1.next()) {			NgramProbsIter piter(*node);			VocabIndex word;			LogP *prob1;			while (prob1 = piter.next(word)) {				LogP prob = lm2.My_wordProbBO(word, context,i, bo);				double d = lambda;
				if ( bo )
				{
					//如果是backoff得到的，则认为lm2中的概率不可信
					//d = lambda / ( lambda + (1-d)/2) ;					//lm2的权重减半
					d = lambda / ( lambda + min(lambda,(1-d)/2) ) ;	//lm2的权重减半且不能大于lambda
					//d = lambda / ( lambda + min(lambda/2, (1-d)/2)) ;	//lm2的权重减半且不能大于lambda的1/2
				}
				*prob1 =					MixLogP(*prob1, prob, d);			}		}		/*		* Do the same for lm2, except we dont't need to recompute 		* those cases that were already handled above (explicit probs		* in both *this and lm2).		*/		NgramBOsIter iter2(lm2, context, i);		while (node = iter2.next()) {			NgramProbsIter piter(*node);			VocabIndex word;			LogP *prob2;			while (prob2 = piter.next(word)) {				if (!findProb(word, context)) {					LogP prob = My_wordProbBO(word, context, i, bo);					double d = lambda;
					if ( bo )
					{
						//lm1中不存在，backoff得到，则认为lm1得到的概率不可信
						//double dlm1 = d/2 ;					//lm1的权重减半
						double dlm1 = min(d/2, (1-d));	//lm1的权重减半且不能大于lm2权重
						//double dlm1 = min(d/2, (1-d)/2);	//lm1的权重减半且不能大于lm2权重的1/2
						d = dlm1 /(dlm1 + 1-d);	
					}
					LogP mixProb =						MixLogP(prob, *prob2, d);					*insertProb(word, context) = mixProb;				}			}		}	}	recomputeBOWs();
}

myngram.cpp - 源码说明

本页面展示了「这是一款很好用的工具包」中的 myngram.cpp 源码文件，采用 C++ 编程语言编写，共 132 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与工具包相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?