📄 fngramlm.cc

📁 这是一款很好用的工具包
💻 CC
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* * FNgramLM.cc -- *	factored N-gram general graph backoff language models * * Jeff Bilmes <bilmes@ee.washington.edu> *       but based on some code from NgramLM.cc and Ngram.h */#ifndef lintstatic char Copyright[] = "Copyright (c) 1995-2006 SRI International.  All Rights Reserved.";static char RcsId[] = "@(#)$Header: /home/srilm/devel/flm/src/RCS/FNgramLM.cc,v 1.14 2006/01/09 19:01:44 stolcke Exp $";#endif#ifndef EXCLUDE_CONTRIB#include <new>#include <iostream>using namespace std;#include <stdlib.h>#include <math.h>#include <string.h>#include "FNgram.h"#include "FactoredVocab.h"#include "FDiscount.h"#include "File.h"#include "Array.cc"#include "Trie.cc"#include "hexdec.h"#include "FNgramSpecs.cc"#ifdef INSTANTIATE_TEMPLATESINSTANTIATE_TRIE(VocabIndex,FNgram::BOnode);INSTANTIATE_ARRAY(FNgram::FNgramLM); INSTANTIATE_ARRAY(FNgram::ParentSubset); #ifdef USE_SARRAY#include "SArray.cc"INSTANTIATE_SARRAY(VocabIndex,FNgram::ProbEntry); #else#include "LHash.cc"INSTANTIATE_LHASH(VocabIndex,FNgram::ProbEntry); #endif#endif /* INSTANTIATE_TEMPLATES *//* * Debug levels used */#define DEBUG_ESTIMATE_WARNINGS	1#define DEBUG_FIXUP_WARNINGS 3#define DEBUG_PRINT_GTPARAMS 2#define DEBUG_READ_STATS 1#define DEBUG_WRITE_STATS 1#define DEBUG_NGRAM_HITS 2#define DEBUG_ESTIMATES 4#define DEBUG_ESTIMATE_LM 4#define DEBUG_BOWS 4#define DEBUG_EXTREME 20/* these are the same as in LM.cc */#define DEBUG_PRINT_SENT_PROBS		1#define DEBUG_PRINT_WORD_PROBS		2#define DEBUG_PRINT_PROB_SUMS		3const LogP LogP_PseudoZero = -99.0;	/* non-inf value used for log 0 *//* * Low level methods to access context (BOW) nodes and probs */voidFNgram::memStats(MemStats &stats){  // TODO: finish this function.}FNgram::FNgram(FactoredVocab &vocab, FNgramSpecsType& _fngs)  : LM(vocab), fngs(_fngs), skipOOVs(false), trustTotals(false),    combineLMScores(true),    virtualBeginSentence(true), virtualEndSentence(true), noScoreSentenceBoundaryMarks(false){  // we could pre-allocate the fngrams arrays here  // but Array objects do not export alloc.  fNgrams = new FNgramLM[fngs.fnSpecArray.size()];  fNgramsSize = fngs.fnSpecArray.size();  for (unsigned specNum=0;specNum<fngs.fnSpecArray.size();specNum++) {      fNgrams[specNum].parentSubsets =      new ParentSubset[fngs.fnSpecArray[specNum].numSubSets];    fNgrams[specNum].parentSubsetsSize = fngs.fnSpecArray[specNum].numSubSets;    for (unsigned node=0;node<fngs.fnSpecArray[specNum].numSubSets;node++) {      fNgrams[specNum].parentSubsets[node].active =	(fngs.fnSpecArray[specNum].parentSubsets[node].counts != NULL);      // make copy for convenient access      fNgrams[specNum].parentSubsets[node].order =	fngs.fnSpecArray[specNum].parentSubsets[node].order;    }  }}FNgram::~FNgram(){  clear();  for (unsigned specNum=0;specNum<fNgramsSize;specNum++) {      delete [] fNgrams[specNum].parentSubsets;  }  delete [] fNgrams;}/* * Locate a BOW entry in the n-gram trie */LogP *FNgram::ParentSubset::findBOW(const VocabIndex *context){    BOnode *bonode = contexts.find(context);    if (bonode) {	return &(bonode->bow);    } else {	return 0;    }}/* * Locate a prob entry in the n-gram trie. */LogP *FNgram::ParentSubset::findProb(VocabIndex word, const VocabIndex *context){    BOnode *bonode = contexts.find(context);    if (bonode) {	return &(bonode->probs.find(word)->prob);    } else {	return 0;    }}/* * Locate or create a BOW entry in the n-gram trie */LogP *FNgram::ParentSubset::insertBOW(const VocabIndex *context){    Boolean found;    BOnode *bonode = contexts.insert(context, found);    if (!found) {	/*	 * initialize the index in the BOnode	 */	new (&bonode->probs) PROB_INDEX_T<VocabIndex,ProbEntry>(0);    }    return &(bonode->bow);}/* * Locate or create a prob entry in the n-gram trie */LogP *FNgram::ParentSubset::insertProb(VocabIndex word, const VocabIndex *context){    Boolean found;    BOnode *bonode = contexts.insert(context, found);    // fprintf(stderr,"inserting word %d context starting with %d\n",word,*context);    if (!found) {      // fprintf(stderr,"not found\n");	/*	 * initialize the index in the BOnode	 */	new (&bonode->probs) PROB_INDEX_T<VocabIndex,ProbEntry>(0);    }    ProbEntry* res = bonode->probs.insert(word);    // fprintf(stderr,"inserted into probs, got back 0x%X, size now %d\n",    // res,bonode->probs.numEntries());    return res ? &(res->prob) : NULL;    // return bonode->probs.insert(word);}/* * Locate or create a prob entry in the n-gram trie */LogP *FNgram::ParentSubset::insertProbAndCNT(VocabIndex word, const VocabIndex *context,				       const unsigned int cnt){    Boolean found;    BOnode *bonode = contexts.insert(context, found);    if (!found) {	/*	 * initialize the index in the BOnode	 */	new (&bonode->probs) PROB_INDEX_T<VocabIndex,ProbEntry>(0);    }    ProbEntry* res = bonode->probs.insert(word);    if (res) {      res->cnt = cnt;      return &(res->prob);    }    return NULL;}/* * Remove a BOW node (context) from the n-gram trie */voidFNgram::ParentSubset::removeBOW(const VocabIndex *context){    contexts.removeTrie(context);}/* * Remove a prob entry from the n-gram trie */voidFNgram::ParentSubset::removeProb(VocabIndex word, const VocabIndex *context){    BOnode *bonode = contexts.find(context);    if (bonode) {	bonode->probs.remove(word);    }}FNgram::BOnode*FNgram::ParentSubset::findTrieNodeSubCtx(const VocabIndex *context,					 unsigned int bits){  // layout of arguments  // variables: p1 p2 p3 p4 (i.e., parent number)  // bits:      b1 b2 b3 b4  // context[i]: 0  1  2  3  // From model p(c|p1,p2,p3,p4)  // LM tries have p1 at the root tree level, then p2, p3, and p4 at  // the tree leaf level. Therefore, this routine indexes lm tries  // in ascending context array order.  const int wlen = Vocab::length(context);  assert (FNgramSpecsType::numBitsSet(bits) <= (unsigned)wlen);  BOtrie* boTrie = &contexts;  VocabIndex word[2];  word[1] = Vocab_None;  for (int i=0; i<wlen && bits;i++) {    if (bits & 0x1) {      word[0] = context[i];      if ((boTrie = boTrie->findTrie(word)) == NULL)	return NULL;    }    bits >>= 1;  }  return boTrie ? &(boTrie->value()) : NULL;}/* * Locate a prob entry in the n-gram trie. */LogP *FNgram::ParentSubset::findProbSubCtx(VocabIndex word1,				     const VocabIndex *context,				     unsigned int bits){  BOnode* bonode = findTrieNodeSubCtx(context,bits);  if (!bonode)    return NULL;  ProbEntry* pe = bonode->probs.find(word1);  return pe ? &(pe->prob) : NULL;  }/* * Locate a bow in the n-gram trie. */LogP *FNgram::ParentSubset::findBOWSubCtx(const VocabIndex *context,				    unsigned int bits){  BOnode* bonode = findTrieNodeSubCtx(context,bits);  return bonode ? &(bonode->bow) : NULL;  }/* * Locate both prob entry and bow in the n-gram trie. */BooleanFNgram::ParentSubset::findBOWProbSubCtx(VocabIndex word1,					const VocabIndex *context,					LogP*& prob, LogP*& bow,					unsigned int bits){  BOnode* bonode = findTrieNodeSubCtx(context,bits);  if (bonode) {    prob = &(bonode->probs.find(word1)->prob);    bow = &(bonode->bow);    return true;  } else    return false;}FNgram::BOnode*FNgram::ParentSubset::insertTrieNodeSubCtx(const VocabIndex *context,					   unsigned int bits,					   Boolean& foundP){  // same as findTrieNodeSubCtx except we do inserts rather than finds.  const int wlen = Vocab::length(context);  assert (FNgramSpecsType::numBitsSet(bits) <= (unsigned)wlen);  BOtrie* boTrie = &contexts;  VocabIndex word[2];  word[1] = Vocab_None;  for (int i=0; i<wlen && bits;i++) {    if (bits & 0x1) {      word[0] = context[i];      if ((boTrie = boTrie->insertTrie(word,foundP)) == NULL)	return NULL;    }    bits >>= 1;  }  foundP = true;  return boTrie ? &(boTrie->value()) : NULL;}/* * Locate or create a BOW entry in the n-gram trie */LogP *FNgram::ParentSubset::insertBOWSubCtx(const VocabIndex *context,				      unsigned int bits){    Boolean found;    BOnode *bonode = insertTrieNodeSubCtx(context, bits, found);    if (!found) {	/*	 * initialize the index in the BOnode	 */	new (&bonode->probs) PROB_INDEX_T<VocabIndex,ProbEntry>(0);    }    return &(bonode->bow);}/* * Locate or create a BOW entry in the n-gram trie */LogP *FNgram::ParentSubset::insertProbSubCtx(VocabIndex word,				       const VocabIndex *context,				       unsigned int bits){    Boolean found;    BOnode *bonode = insertTrieNodeSubCtx(context, bits, found);    if (!found) {	/*	 * initialize the index in the BOnode	 */	new (&bonode->probs) PROB_INDEX_T<VocabIndex,ProbEntry>(0);    }    ProbEntry* pe = bonode->probs.insert(word);    return pe ? &(pe->prob) : NULL;}voidFNgram::ParentSubset::removeBOWSubCtx(const VocabIndex *context,				      unsigned int bits){  // same as findTrieNodeSubCtx except that we pack context  // into a local words array rather than do the trie search  // explicitly. This is because this routine  // it is probably not called very often and  // doesn't need to be as fast as the others.  const int wlen = Vocab::length(context);  assert (FNgramSpecsType::numBitsSet(bits) <= (unsigned)wlen);  VocabIndex words[maxNumParentsPerChild+2];  unsigned j=0;  for (int i=0; i<wlen && bits;i++) {    if (bits & 0x1) {      words[j] = context[i];    }    bits >>= 1;  }  words[j] = Vocab_None;  removeBOW(words);}voidFNgram::ParentSubset::removeProbSubCtx(VocabIndex word1,				       const VocabIndex *context,				       unsigned int bits){  const int wlen = Vocab::length(context);  assert (FNgramSpecsType::numBitsSet(bits) <= (unsigned)wlen);  VocabIndex words[maxNumParentsPerChild+2];  unsigned j=0;  for (int i=0; i<wlen && bits;i++) {    if (bits & 0x1) {      words[j] = context[i];    }    bits >>= 1;  }  words[j] = Vocab_None;  removeProb(word1,words);}/* * Remove all probabilities and contexts from n-gram trie */voidFNgram::clear(unsigned int specNum){    VocabIndex context[maxNumParentsPerChild+2];    BOnode *node;    if (specNum >= fNgramsSize)      return;
12 3 4 5 下一页
💿 文件大小 3034 K
👤 上传用户 wanghaihah
📂 所属分类其他
🏷️ 相关标签

#工具包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -