lm.h

来自「这是一款很好用的工具包」· C头文件 代码 · 共 174 行

H
174
字号
/* * LM.h -- *	Generic LM interface * * The LM class defines an abstract languge model interface which all * other classes refine and inherit from. * * Copyright (c) 1995-2005 SRI International.  All Rights Reserved. * * @(#)$Header: /home/srilm/devel/lm/src/RCS/LM.h,v 1.37 2006/01/05 20:21:27 stolcke Exp $ * */#ifndef _LM_h_#define _LM_h_#include <iostream>using namespace std;#include "Boolean.h"#include "Prob.h"#include "File.h"#include "Vocab.h"#include "SubVocab.h"#include "TextStats.h"#include "Debug.h"#include "MemStats.h"typedef unsigned int Count;		/* a count of something */typedef double FloatCount;		/* a fractional count */class LM;		/* forward declaration */class NgramStats;	/* forward declaration *//* * This is the iter class from which more specialized iters can be * derived.  Not to be confused with the wrapper object above. * The default behavior implemented here is to simply enumerate all * words in the vocabulary. */class _LM_FollowIter{public:    _LM_FollowIter(LM &lm, const VocabIndex *context);    virtual void init();    virtual VocabIndex next();    virtual VocabIndex next(LogP &prob);private:    LM &myLM;    const VocabIndex *myContext;    VocabIter myIter;};class LM: public Debug{    friend class _LM_FollowIter;public:    LM(Vocab &vocab);    virtual ~LM();    virtual LogP wordProb(VocabIndex word, const VocabIndex *context) = 0;    virtual LogP wordProb(VocabString word, const VocabString *context);    virtual LogP wordProbRecompute(VocabIndex word, const VocabIndex *context);		    /* recompute word prob using last wordProb() context */    virtual LogP sentenceProb(const VocabIndex *sentence, TextStats &stats);    virtual LogP sentenceProb(const VocabString *sentence, TextStats &stats);    virtual LogP contextProb(const VocabIndex *context,					unsigned clength = maxWordsPerLine);		    /* joint probability of a reversed word string */    virtual LogP countsProb(NgramStats &counts, TextStats &stats,				    unsigned order, Boolean entropy = false);						/* probability from counts */    virtual unsigned pplCountsFile(File &file, unsigned order, TextStats &stats,					const char *escapeString = 0,					Boolean entropy = false);    virtual unsigned pplFile(File &file, TextStats &stats,				const char *escapeString = 0);    virtual unsigned rescoreFile(File &file, double lmScale, double wtScale,			       LM &oldLM, double oldLmScale, double oldWtScale,			       const char *escapeString = 0);    virtual void setState(const char *state);	/* hook to manipulate global						   LM state */    virtual Prob wordProbSum(const VocabIndex *context);						/* sum of all word probs */    virtual VocabIndex generateWord(const VocabIndex *context);    virtual VocabIndex *generateSentence(unsigned maxWords = maxWordsPerLine,				VocabIndex *sentence = 0);    virtual VocabString *generateSentence(unsigned maxWords = maxWordsPerLine,				VocabString *sentence = 0);    virtual void *contextID(const VocabIndex *context)	{ unsigned length; return contextID(context, length); };    virtual void *contextID(const VocabIndex *context, unsigned &length)	{ return contextID(Vocab_None, context, length); };				    /* context used by LM */    virtual void *contextID(VocabIndex word, const VocabIndex *context,							unsigned &length);				    /* context used for specific word */    virtual LogP contextBOW(const VocabIndex *context, unsigned length);				   /* backoff weight for truncating context */    virtual Boolean isNonWord(VocabIndex word);    virtual Boolean read(File &file, Boolean limitVocab = false);    virtual void write(File &file);    virtual Boolean running() { return _running; }    virtual Boolean running(Boolean newstate)      { Boolean old = _running; _running = newstate; return old; };    virtual _LM_FollowIter *followIter(const VocabIndex *context)	{ return new _LM_FollowIter(*this, context); };    virtual void memStats(MemStats &stats);    Vocab &vocab;			/* vocabulary */    SubVocab noiseVocab;		/* noise tag set */    virtual VocabIndex *removeNoise(VocabIndex *words);					/* strip noise and pause tags */    const char *stateTag;		/* tag introducing global state info */    Boolean reverseWords;		/* compute word probs in reverse */protected:    Boolean _running;	/* indicates the LM is being used for sequential			 * word prob computation */    unsigned prepareSentence(const VocabIndex *sentence,				VocabIndex *reversed, unsigned len);			/* reverse sentence for wordProb computation */};/* * LMFollowIter -- *	Iterator enumerating possible follow words and their probabilities * * The idea here is that the user can declare an iterator  *    LM_FollowIter(lm) * without refering to the classname of lm itself.  This will create * the following wrapper object that contains a pointer to the actual * class-specific iterator, using the LM::followIter virtual function. * All iterator operations then simply dispatch to the real iterator. */class LM_FollowIter{public:    LM_FollowIter(LM &lm, VocabIndex *context)	: realIter(lm.followIter(context)) {};    ~LM_FollowIter() { delete realIter; };    virtual void init() { realIter->init(); };    virtual VocabIndex next() { LogP prob; return next(prob); }    virtual VocabIndex next(LogP &prob) { return realIter->next(prob); }private:    _LM_FollowIter *realIter;		/* LM-specific iterator */};#endif /* _LM_h_ */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?