⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 myngram.cpp

📁 这是一款很好用的工具包
💻 CPP
字号:
#include "MyNgram.h"

LogPMyNgram::My_wordProbBO(VocabIndex word, const VocabIndex *context, unsigned int clen, bool& bo){	LogP logp = LogP_Zero;	LogP bow = LogP_One;	unsigned found = 0;	BOtrie *trieNode = &contexts;	int i = 0;	bo = true;	do {		LogP *prob = trieNode->value().probs.find(word);		if (prob) {			/*			* If a probability is found at this level record it as the 			* most specific one found so far and reset the backoff weight.			*/			logp = *prob;			bow = LogP_One;			found = i + 1;		} 		if  (i >= (int)clen || context[i] == Vocab_None) 		{			bo = false;	//return			break;		}		BOtrie *next = trieNode->findTrie(context[i]);		if (next) {			/*			* Accumulate backoff weights 			*/			bow += next->value().bow;			trieNode = next;			i ++;		} else {			break;		}	} while (1);	return logp + bow;}

//whether use own mixture method
//lambda为this这个MyNgram(lm1)的权重
void MyNgram::mixProbs(MyNgram &lm2, double lambda,bool own_mix)
{
	if ( !own_mix )
	{
		return Ngram::mixProbs(lm2, lambda);
	}

	//use our own mix-method
	bool bo;
	makeArray(VocabIndex, context, order + 1);	/*	* In destructive merging we need to process the longer ngrams first	* so that we can still use the model being modified to compute its own	* original probability estimates.	*/	for (int i = order - 1; i >= 0 ; i--) {		BOnode *node;		NgramBOsIter iter1(*this, context, i);		/*		* First, find all explicit ngram probs in *this, and mix them		* with the corresponding probs of lm2 (explicit or backed-off).		*/		while (node = iter1.next()) {			NgramProbsIter piter(*node);			VocabIndex word;			LogP *prob1;			while (prob1 = piter.next(word)) {				LogP prob = lm2.My_wordProbBO(word, context,i, bo);				double d = lambda;
				if ( bo )
				{
					//如果是backoff得到的,则认为lm2中的概率不可信
					//d = lambda / ( lambda + (1-d)/2) ;					//lm2的权重减半
					d = lambda / ( lambda + min(lambda,(1-d)/2) ) ;	//lm2的权重减半且不能大于lambda
					//d = lambda / ( lambda + min(lambda/2, (1-d)/2)) ;	//lm2的权重减半且不能大于lambda的1/2
				}
				*prob1 =					MixLogP(*prob1, prob, d);			}		}		/*		* Do the same for lm2, except we dont't need to recompute 		* those cases that were already handled above (explicit probs		* in both *this and lm2).		*/		NgramBOsIter iter2(lm2, context, i);		while (node = iter2.next()) {			NgramProbsIter piter(*node);			VocabIndex word;			LogP *prob2;			while (prob2 = piter.next(word)) {				if (!findProb(word, context)) {					LogP prob = My_wordProbBO(word, context, i, bo);					double d = lambda;
					if ( bo )
					{
						//lm1中不存在,backoff得到,则认为lm1得到的概率不可信
						//double dlm1 = d/2 ;					//lm1的权重减半
						double dlm1 = min(d/2, (1-d));	//lm1的权重减半且不能大于lm2权重
						//double dlm1 = min(d/2, (1-d)/2);	//lm1的权重减半且不能大于lm2权重的1/2
						d = dlm1 /(dlm1 + 1-d);	
					}
					LogP mixProb =						MixLogP(prob, *prob2, d);					*insertProb(word, context) = mixProb;				}			}		}	}	recomputeBOWs();
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -