📄 productngram.cc

📁 这是一款很好用的工具包
💻 CC
字号:
#ifndef ProductNgram_cc
#define ProductNgram_cc
/* * ProductNgram.cc -- *	Product N-gram backoff language models *      Jeff Bilmes <bilmes@ee.washington.edu> * */#ifndef lintstatic char Copyright[] = "Copyright (c) 1995-2004, SRI International.  All Rights Reserved.";static char RcsId[] = "@(#)$Header: /home/srilm/devel/flm/src/RCS/ProductNgram.cc,v 1.7 2004/12/03 00:52:51 stolcke Exp $";#endif#ifndef EXCLUDE_CONTRIB#include "ProductNgram.h"#include "FNgramStats.cc"#include "FNgramSpecs.cc"/* * Debug levels used */#define DEBUG_PRINT_SENT_PROBS		1	/* from LM.cc */#define DEBUG_WORD_PROB_SUMS		4ProductNgram::ProductNgram(ProductVocab &vocab, unsigned order)    : Ngram(vocab, order), vocab(vocab),      fnSpecs(0), factoredStats(0), fngramLM(0){}ProductNgram::~ProductNgram(){    delete fngramLM;    delete factoredStats;    delete fnSpecs;}voidProductNgram::memStats(MemStats &stats){    stats.total += sizeof(*this);    if (factoredStats != 0) factoredStats->memStats(stats);    if (fngramLM != 0) fngramLM->memStats(stats);}BooleanProductNgram::read(File &file, Boolean limitVocab)				// limitVocab is ignored for now{    delete fngramLM;		fngramLM = 0;    delete fnSpecs;		fnSpecs = 0;    delete factoredStats;	factoredStats = 0;    // create and initialize FNgramSpecs object    fnSpecs = new FNgramSpecs<FNgramCount>(file, vocab.fvocab, debuglevel());    if (!fnSpecs) {	//cerr << "Error creating fnspecs object\n";	return false;    }    // create and initialize FNgramStats object    FNgramStats *factoredStats = new FNgramStats(vocab.fvocab, *fnSpecs);    assert(factoredStats != 0);          factoredStats->debugme(debuglevel());    // read in the counts, we need to do this for now.    // TODO: change so that counts are not needed for ppl/rescoring.    if (!factoredStats->read()) {        //cerr << "error reading in counts in factor file\n";	return false;    }    factoredStats->estimateDiscounts();    factoredStats->computeCardinalityFunctions();    factoredStats->sumCounts();    // create and initialize FNgram object    fngramLM = new FNgram(vocab.fvocab, *fnSpecs);    assert(fngramLM != 0);    // Don't enable debug levels >= 2 in FNgram since they just duplicate    // debugging output in LM, violating the common format.    fngramLM->debugme(debuglevel() > DEBUG_PRINT_SENT_PROBS ?					    DEBUG_PRINT_SENT_PROBS :					    debuglevel());    // For now, set to values to get backwards compat with ngram.cc    fngramLM->virtualBeginSentence = false;    fngramLM->virtualEndSentence = false;    fngramLM->noScoreSentenceBoundaryMarks = true;    // Once the FNgram object is allocated, skipOOVs() and trustTotals()    // return referenecs to its parameters, but we need to make sure that    // values set before allocation are inherited by the new FNgram.    fngramLM->skipOOVs = _skipOOVs;    fngramLM->trustTotals = _trustTotals;    if (!fngramLM->read()) {	//cerr << "error reading in factored LM files\n";	return false;    }    return true;}/* * The product LM forms its probability by looking at the current * factored LM and then multiplying (forming the product) of the various * factors currently loaded. */LogPProductNgram::wordProbBO(VocabIndex word, const VocabIndex *context,							unsigned int clen){    assert(fngramLM != 0);    if (fngramLM == 0) {	return LogP_Zero;    }    // word is w[t]    // context[0] is w[t-1]    // context[1] is w[t-2]    // and so on.    static WidMatrix widMatrix;    const unsigned childPos = clen;    // load up the word and the context.        vocab.loadWidFactors(word,widMatrix[clen]);    for (unsigned pos=1;pos<=clen;pos++) {	vocab.loadWidFactors(context[pos-1],widMatrix[clen-pos]);    }    return fngramLM->wordProb(widMatrix,childPos,clen+1);}/* * Returns unique identifier for the context used by the LM (and its length). * We just create the hash code for all the context words within the * N-gram order given by the model.  This ignores the possibilities of hashing * collisions, but should work ok in practice. */void *ProductNgram::contextID(VocabIndex word, const VocabIndex *context,							unsigned &length){    // ProductNgram uses the full context given to it,    // up to the maximal order specified    // (n-gram model uses at most n-1 words of context)    length = Vocab::length(context);    if (order - 1 < length) {	length = order - 1;    }    // truncate context to used length and compute a hash code    VocabIndex saved = context[length];    ((VocabIndex *)context)[length] = Vocab_None; 	// override const    unsigned hash = LHash_hashKey(context, 30);    ((VocabIndex *)context)[length] = saved;    return (void *)hash;}LogPProductNgram::contextBOW(const VocabIndex *context, unsigned length){    return LogP_One;}ProbProductNgram::wordProbSum(const VocabIndex *context){    double total = 0.0;    VocabIter iter(vocab);    VocabIndex wid;    /*     * prob summing interrupts sequential processing mode     */    Boolean wasRunning = running(false);    while (iter.next(wid)) {	if (!isNonWord(wid)) {	    Prob p = LogPtoProb(wordProb(wid, context));	    total += p;	    if (debug(DEBUG_WORD_PROB_SUMS)) {		cerr << "summing: " << vocab.getWord(wid) << " " << p		     << " total " << total << endl;	    }	}    }    running(wasRunning);    return total;}#endif /* EXCLUDE_CONTRIB_END */#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -