📄 myngram.cpp
字号:
#include "MyNgram.h"
LogPMyNgram::My_wordProbBO(VocabIndex word, const VocabIndex *context, unsigned int clen, bool& bo){ LogP logp = LogP_Zero; LogP bow = LogP_One; unsigned found = 0; BOtrie *trieNode = &contexts; int i = 0; bo = true; do { LogP *prob = trieNode->value().probs.find(word); if (prob) { /* * If a probability is found at this level record it as the * most specific one found so far and reset the backoff weight. */ logp = *prob; bow = LogP_One; found = i + 1; } if (i >= (int)clen || context[i] == Vocab_None) { bo = false; //return break; } BOtrie *next = trieNode->findTrie(context[i]); if (next) { /* * Accumulate backoff weights */ bow += next->value().bow; trieNode = next; i ++; } else { break; } } while (1); return logp + bow;}
//whether use own mixture method
//lambda为this这个MyNgram(lm1)的权重
void MyNgram::mixProbs(MyNgram &lm2, double lambda,bool own_mix)
{
if ( !own_mix )
{
return Ngram::mixProbs(lm2, lambda);
}
//use our own mix-method
bool bo;
makeArray(VocabIndex, context, order + 1); /* * In destructive merging we need to process the longer ngrams first * so that we can still use the model being modified to compute its own * original probability estimates. */ for (int i = order - 1; i >= 0 ; i--) { BOnode *node; NgramBOsIter iter1(*this, context, i); /* * First, find all explicit ngram probs in *this, and mix them * with the corresponding probs of lm2 (explicit or backed-off). */ while (node = iter1.next()) { NgramProbsIter piter(*node); VocabIndex word; LogP *prob1; while (prob1 = piter.next(word)) { LogP prob = lm2.My_wordProbBO(word, context,i, bo); double d = lambda;
if ( bo )
{
//如果是backoff得到的,则认为lm2中的概率不可信
//d = lambda / ( lambda + (1-d)/2) ; //lm2的权重减半
d = lambda / ( lambda + min(lambda,(1-d)/2) ) ; //lm2的权重减半且不能大于lambda
//d = lambda / ( lambda + min(lambda/2, (1-d)/2)) ; //lm2的权重减半且不能大于lambda的1/2
}
*prob1 = MixLogP(*prob1, prob, d); } } /* * Do the same for lm2, except we dont't need to recompute * those cases that were already handled above (explicit probs * in both *this and lm2). */ NgramBOsIter iter2(lm2, context, i); while (node = iter2.next()) { NgramProbsIter piter(*node); VocabIndex word; LogP *prob2; while (prob2 = piter.next(word)) { if (!findProb(word, context)) { LogP prob = My_wordProbBO(word, context, i, bo); double d = lambda;
if ( bo )
{
//lm1中不存在,backoff得到,则认为lm1得到的概率不可信
//double dlm1 = d/2 ; //lm1的权重减半
double dlm1 = min(d/2, (1-d)); //lm1的权重减半且不能大于lm2权重
//double dlm1 = min(d/2, (1-d)/2); //lm1的权重减半且不能大于lm2权重的1/2
d = dlm1 /(dlm1 + 1-d);
}
LogP mixProb = MixLogP(prob, *prob2, d); *insertProb(word, context) = mixProb; } } } } recomputeBOWs();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -