📄 ngram.h
字号:
/* * Ngram.h -- * N-gram backoff language models * * Copyright (c) 1995-2004 SRI International. All Rights Reserved. * * @(#)$Header: /home/srilm/devel/lm/src/RCS/Ngram.h,v 1.41 2005/07/17 22:50:07 stolcke Exp $ * */#ifndef _Ngram_h_#define _Ngram_h_#include <stdio.h>#include "LM.h"#include "NgramStats.h"#include "Discount.h"#ifdef USE_SARRAY# define PROB_INDEX_T SArray# define PROB_ITER_T SArrayIter# include "SArray.h"#else /* ! USE_SARRAY */# define PROB_INDEX_T LHash# define PROB_ITER_T LHashIter# include "LHash.h"#endif /* USE_SARRAY */#include "Trie.h"typedef struct { LogP bow; /* backoff weight */ PROB_INDEX_T<VocabIndex,LogP> probs; /* word probabilities */} BOnode;typedef Trie<VocabIndex,BOnode> BOtrie;const unsigned defaultNgramOrder = 3;class Ngram: public LM{ friend class NgramBOsIter;public: Ngram(Vocab &vocab, unsigned order = defaultNgramOrder); virtual ~Ngram(); unsigned setorder(unsigned neworder = 0); /* change/return ngram order */ /* * LM interface */ virtual LogP wordProb(VocabIndex word, const VocabIndex *context); virtual void *contextID(const VocabIndex *context, unsigned &length) { return contextID(Vocab_None, context, length); }; virtual void *contextID(VocabIndex word, const VocabIndex *context, unsigned &length); virtual LogP contextBOW(const VocabIndex *context, unsigned length); virtual Boolean read(File &file, Boolean limitVocab = false); virtual void write(File &file) { writeWithOrder(file, order); }; virtual void writeWithOrder(File &file, unsigned int order); virtual Boolean &skipOOVs() { return _skipOOVs; }; /* backward compatiability: return * zero prob if <unk> is in context */ virtual Boolean &trustTotals() { return _trustTotals; } /* use lower-order counts for ngram totals */ /* * Estimation */ virtual Boolean estimate(NgramStats &stats, unsigned *mincount = 0, unsigned *maxcounts = 0); virtual Boolean estimate(NgramStats &stats, Discount **discounts); virtual Boolean estimate(NgramCounts<FloatCount> &stats, Discount **discounts); virtual void mixProbs(Ngram &lm2, double lambda); virtual void mixProbs(Ngram &lm1, Ngram &lm2, double lambda); virtual void recomputeBOWs(); virtual void pruneProbs(double threshold, unsigned minorder = 2); virtual void pruneLowProbs(unsigned minorder = 2); virtual void rescoreProbs(LM &lm); /* * Statistics */ virtual unsigned int numNgrams(unsigned int n); virtual void memStats(MemStats &stats); /* * Low-level access */ LogP *findBOW(const VocabIndex *context); LogP *insertBOW(const VocabIndex *context); LogP *findProb(VocabIndex word, const VocabIndex *context); LogP *insertProb(VocabIndex word, const VocabIndex *context); void removeBOW(const VocabIndex *context); void removeProb(VocabIndex word, const VocabIndex *context); void clear(); /* remove all parameters */protected: BOtrie contexts; /* n-1 gram context trie */ unsigned int order; /* maximal ngram order */ Boolean _skipOOVs; Boolean _trustTotals; /* * Helper functions */ virtual LogP wordProbBO(VocabIndex word, const VocabIndex *context, unsigned int clen); virtual unsigned vocabSize(); template <class CountType> Boolean estimate2(NgramCounts<CountType> &stats, Discount **discounts); virtual void fixupProbs(); virtual void distributeProb(Prob mass, VocabIndex *context); virtual Boolean computeBOW(BOnode *node, const VocabIndex *context, unsigned clen, Prob &numerator, Prob &denominator); virtual Boolean computeBOWs(unsigned order);};/* * Iteration over all backoff nodes of a given order */class NgramBOsIter{public: NgramBOsIter(Ngram &lm, VocabIndex *keys, unsigned order, int (*sort)(VocabIndex, VocabIndex) = 0) : myIter(lm.contexts, keys, order, sort) {}; void init() { myIter.init(); }; BOnode *next() { Trie<VocabIndex,BOnode> *node = myIter.next(); return node ? &(node->value()) : 0; }private: TrieIter2<VocabIndex,BOnode> myIter;};/* * Iteration over all probs at a backoff node */class NgramProbsIter{public: NgramProbsIter(BOnode &bonode, int (*sort)(VocabIndex, VocabIndex) = 0) : myIter(bonode.probs, sort) {}; void init() { myIter.init(); }; LogP *next(VocabIndex &word) { return myIter.next(word); };private: PROB_ITER_T<VocabIndex,LogP> myIter;};#endif /* _Ngram_h_ */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -