⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hypothesis.h

📁 解码器是基于短语的统计机器翻译系统的核心模块
💻 H
字号:
#ifndef HYPOTHESIS_H
#define HYPOTHESIS_H

#include <vector>
#include <strstream>
#include <ctime>
#include <list>
#include <deque>

#include "LanguageModel.h"
#include "TransOptions.h"
#include "HypothesisElement.h"
#include "ConfigINI.h"
#include "Vocab.h"

typedef vector<HypothesisElement> VECPOOL;
typedef deque<int> VECUNSED;

extern VECPOOL vecHypo;
extern VECUNSED vecNotUsed;

using namespace std;

class Greaters
{
public:
	bool operator()(HypothesisElement *hyp1, HypothesisElement *hyp2)
	{
		double dTmp = hyp1->totalScore - hyp2->totalScore;
		return (dTmp >= avs);
	}
};

class Less
{
public:
	bool operator()(int hyp1, int hyp2)
	{
		double dTmp = vecHypo[hyp1].totalScore - vecHypo[hyp2].totalScore;
		return (dTmp <= avs);
	}
};

//typedef multiset<HypothesisElement *, Greaters> multisetHYE;
//typedef vector<multisetHYE> HypothesisStack;

//typedef vector<HypothesisElement *> vecHYE;
//typedef vector<vecHYE> HypothesisStack;

class Arc
{
public:
	int from;
	int to;
	double diffCost;
	vector<int> tosPhrase;
};

typedef	vector<Arc> ARC;
typedef vector<int> multisetHYE;
typedef vector<multisetHYE> HypothesisStack;  //存放假设的在pool中的位置号(注意:不是假设的ID),


class Hypothesis
{
public:
	Hypothesis(double thresholdI, int stackThrehold, int nBest, double dislimit, double lmlimit, int len);
	bool load(string inifileName);//加载ini,lm,to
	void initialize(string sentence);//根据输入的句子,初始化stack大小
	void clear();//释放hypothesisStack
	string decoder(string fileName);
	~Hypothesis();
	

private:
	void split(const string& line, vector<string>& strs);
	double fcCal(set<int> phraseID, int stackSize);
	double lmCal(int last1, int last2, vector<int> newPhrase, int& newlast1, int& newlast2);
	void recombineAndbeam(int newHyp);
	void eraseMultiSet(multisetHYE& multisethye, int limit);
	void cutStack(multisetHYE& multisethye, int limit, int stackNO);
	void CutStack(multisetHYE& multisethye, int limit, int stackNO);
	void dealout(string& str);
	void findNBest(string outputFile);
	string findBest();
	int findProper(VECUNSED& vecnused);


	ConfigINI config;
	Para para;
	LanguageModel *lm;//LM
	TransOptions *to;//Translation Option
	Vocab *enVcb;
	Vocab *cnVcb;

	HypothesisStack hypothesisStack;
	ARC arc;

	PhraseSnippet phraseSnippet;
	PhraseSnippetPosition phraseSnippetPosition;
	int stackSize;//实际是nf,即stack的个数
	
	int eachStackSize; //每个stack的最大容量 默认:100

	double threshold;//beam-threshold在在假设入stack的閥值 0.00001
	vector<double> THRESHOLD;

	int NBEST; //输出n-best
	double DISTORTIONLIMIT;
	double LMLimit;
	int DISLENGHT;

//	map<int, string> unkTMP;
	vector<int> sentenceIDS;
};


#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -