languagemodel.h

来自「解码器是基于短语的统计机器翻译系统的核心模块」· C头文件 代码 · 共 62 行

H
62
字号


#ifndef LANGUAGE_MODEL_H
#define LANGUAGE_MODEL_H

#include <vector>
#include <map>
#include <string>
#include <fstream>
#include <algorithm>
#include <iostream>
#include <cmath>
#include "Vocab.h"

using namespace std;

const double LN = 2.30258592994;  //10的自然对数值
const double INFINITE = -99999;
//const double UNKPROB = -10;//Pharaoh的release值,但是koehn建议设到-50或者更低
const double UNKBO = 0;
const double UNKTM = 0;
const double avs = 1.7e-100;
const double STEP = 4.09021e-15;//LN /(POW(2, 128))
const long SEP = 2147483647;

class ProbAndBO
{
public:
	double prob;
	double backoffWeight;
};
typedef vector<int> GramIDs;
typedef map<GramIDs, ProbAndBO> mapNgram;

class LanguageModel
{
public:
	LanguageModel(double prob);
//	double getLMweifht();
	bool load(string fileName, Vocab *vocab);
	double uniProb(int word);
	double uniBO(int word);
	double biProb(int word1, int word2);
	double biBO(int word1, int word2);
	double triProb(int word1, int word2, int word3);
	double wordProb(vector<int> Ephrase);// 在TransOptions中使用,使用两个纯为了方便
//	double wordsProb(vector<string> Ephrase);//在Hypothesis中用

private://可以使用一个,简便;使用三个为了提高查找效率
	mapNgram uniGram;
	mapNgram biGram;
	mapNgram triGram;
	ifstream input;
	double WEITHTLM;
	double UNKPROB;
	Vocab *vcb;
};


#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?