📄 productngram.cc
字号:
#ifndef ProductNgram_cc
#define ProductNgram_cc
/* * ProductNgram.cc -- * Product N-gram backoff language models * Jeff Bilmes <bilmes@ee.washington.edu> * */#ifndef lintstatic char Copyright[] = "Copyright (c) 1995-2004, SRI International. All Rights Reserved.";static char RcsId[] = "@(#)$Header: /home/srilm/devel/flm/src/RCS/ProductNgram.cc,v 1.7 2004/12/03 00:52:51 stolcke Exp $";#endif#ifndef EXCLUDE_CONTRIB#include "ProductNgram.h"#include "FNgramStats.cc"#include "FNgramSpecs.cc"/* * Debug levels used */#define DEBUG_PRINT_SENT_PROBS 1 /* from LM.cc */#define DEBUG_WORD_PROB_SUMS 4ProductNgram::ProductNgram(ProductVocab &vocab, unsigned order) : Ngram(vocab, order), vocab(vocab), fnSpecs(0), factoredStats(0), fngramLM(0){}ProductNgram::~ProductNgram(){ delete fngramLM; delete factoredStats; delete fnSpecs;}voidProductNgram::memStats(MemStats &stats){ stats.total += sizeof(*this); if (factoredStats != 0) factoredStats->memStats(stats); if (fngramLM != 0) fngramLM->memStats(stats);}BooleanProductNgram::read(File &file, Boolean limitVocab) // limitVocab is ignored for now{ delete fngramLM; fngramLM = 0; delete fnSpecs; fnSpecs = 0; delete factoredStats; factoredStats = 0; // create and initialize FNgramSpecs object fnSpecs = new FNgramSpecs<FNgramCount>(file, vocab.fvocab, debuglevel()); if (!fnSpecs) { //cerr << "Error creating fnspecs object\n"; return false; } // create and initialize FNgramStats object FNgramStats *factoredStats = new FNgramStats(vocab.fvocab, *fnSpecs); assert(factoredStats != 0); factoredStats->debugme(debuglevel()); // read in the counts, we need to do this for now. // TODO: change so that counts are not needed for ppl/rescoring. if (!factoredStats->read()) { //cerr << "error reading in counts in factor file\n"; return false; } factoredStats->estimateDiscounts(); factoredStats->computeCardinalityFunctions(); factoredStats->sumCounts(); // create and initialize FNgram object fngramLM = new FNgram(vocab.fvocab, *fnSpecs); assert(fngramLM != 0); // Don't enable debug levels >= 2 in FNgram since they just duplicate // debugging output in LM, violating the common format. fngramLM->debugme(debuglevel() > DEBUG_PRINT_SENT_PROBS ? DEBUG_PRINT_SENT_PROBS : debuglevel()); // For now, set to values to get backwards compat with ngram.cc fngramLM->virtualBeginSentence = false; fngramLM->virtualEndSentence = false; fngramLM->noScoreSentenceBoundaryMarks = true; // Once the FNgram object is allocated, skipOOVs() and trustTotals() // return referenecs to its parameters, but we need to make sure that // values set before allocation are inherited by the new FNgram. fngramLM->skipOOVs = _skipOOVs; fngramLM->trustTotals = _trustTotals; if (!fngramLM->read()) { //cerr << "error reading in factored LM files\n"; return false; } return true;}/* * The product LM forms its probability by looking at the current * factored LM and then multiplying (forming the product) of the various * factors currently loaded. */LogPProductNgram::wordProbBO(VocabIndex word, const VocabIndex *context, unsigned int clen){ assert(fngramLM != 0); if (fngramLM == 0) { return LogP_Zero; } // word is w[t] // context[0] is w[t-1] // context[1] is w[t-2] // and so on. static WidMatrix widMatrix; const unsigned childPos = clen; // load up the word and the context. vocab.loadWidFactors(word,widMatrix[clen]); for (unsigned pos=1;pos<=clen;pos++) { vocab.loadWidFactors(context[pos-1],widMatrix[clen-pos]); } return fngramLM->wordProb(widMatrix,childPos,clen+1);}/* * Returns unique identifier for the context used by the LM (and its length). * We just create the hash code for all the context words within the * N-gram order given by the model. This ignores the possibilities of hashing * collisions, but should work ok in practice. */void *ProductNgram::contextID(VocabIndex word, const VocabIndex *context, unsigned &length){ // ProductNgram uses the full context given to it, // up to the maximal order specified // (n-gram model uses at most n-1 words of context) length = Vocab::length(context); if (order - 1 < length) { length = order - 1; } // truncate context to used length and compute a hash code VocabIndex saved = context[length]; ((VocabIndex *)context)[length] = Vocab_None; // override const unsigned hash = LHash_hashKey(context, 30); ((VocabIndex *)context)[length] = saved; return (void *)hash;}LogPProductNgram::contextBOW(const VocabIndex *context, unsigned length){ return LogP_One;}ProbProductNgram::wordProbSum(const VocabIndex *context){ double total = 0.0; VocabIter iter(vocab); VocabIndex wid; /* * prob summing interrupts sequential processing mode */ Boolean wasRunning = running(false); while (iter.next(wid)) { if (!isNonWord(wid)) { Prob p = LogPtoProb(wordProb(wid, context)); total += p; if (debug(DEBUG_WORD_PROB_SUMS)) { cerr << "summing: " << vocab.getWord(wid) << " " << p << " total " << total << endl; } } } running(wasRunning); return total;}#endif /* EXCLUDE_CONTRIB_END */#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -