⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 silkroad.cpp

📁 解码器是基于短语的统计机器翻译系统的核心模块
💻 CPP
字号:
#include <iostream>
#include <string>
#include <fstream>

#include "Hypothesis.h"

using namespace std;

bool printmore = false;

//假设pool和使用情况vector

VECPOOL vecHypo;

VECUNSED vecNotUsed;


int main(int argc, char** argv) 
{
	string configFile;
	string inputFile;
	string outputFile; 
	string envcb;
	string chvcb;
	int STACKSIZE = 100;  //每个栈的默认大小
	double BEAMTHRESHOLD = 0.00001;//
	double DISTOR = -2.30259;
	double LMLIMIT = -10;
	int LEN = 9;
	int NBEST = 1;
	int TOTALHYPO = 200;

	ifstream input;
	ofstream output;

	string arg;
	int i = 0;
	if (argc < 2) 
	{
		cout << "Usage: Please specify the lm, config and the data to be translated" << endl;
		cout << "-f		Specify the Configuration file" << endl;
		cout << "-in	Specify the input data" << endl;
		cout << "-out	Specify the output data" << endl;
		cout << "-s		Specify the maximum size of the beam, default 100 " << endl;
		cout << "-b		Specify the beam threshold, default 0.00001 " << endl;
		cout << "-l		Specify the N-best output, default 1" << endl;
		cout << "-d		Specify the distortion score, default -2.30259" << endl;
		cout << "-dl		Specify the distortion lenght, default -9" << endl;
		cout << "-m		Specify the lm score, default -10" << endl;
		cout << "-r		Specefy the maximum of the sentence, just for reference" << endl;
		cout << "-printmore Specify whether output more details" << endl;
		return 0;
	}
	else
	{
		while (++i < argc) 
		{
			arg = argv[i];
			if (arg == "-f") configFile = argv[++i];
			else if (arg == "-in") inputFile = argv[++i];
			else if (arg == "-out") outputFile = argv[++i];
			else if (arg == "-s") STACKSIZE = atoi(argv[++i]);
			else if (arg == "-b") BEAMTHRESHOLD = atof(argv[++i]);
			else if (arg == "-l") NBEST = atoi(argv[++i]);
			else if (arg == "-dl") DISTOR = atof(argv[++i]);
			else if (arg == "-m") LMLIMIT = atof(argv[++i]);
			else if (arg == "-len") LEN = atoi(argv[++i]);
			else if (arg == "-r") TOTALHYPO = atoi(argv[++i]);
			else if (arg == "-printmore") printmore = true;
			else {
				cerr << "Unrecognized option: " << arg << "\n";
				exit(1);
			}
		}
	}
	


	input.open(inputFile.c_str(), std::ios::in);
	if (!input) 
	{
		cout << "Input File Error!" <<	endl;
		return 0; 
	}

	Hypothesis hypothesis(BEAMTHRESHOLD, STACKSIZE, NBEST, DISTOR, LMLIMIT, LEN);

	if (!hypothesis.load(configFile)) {
		cout << "Hypothesis load config files error !" << endl;
		return 0;
	}
	
	//分配内存池

	TOTALHYPO = TOTALHYPO * STACKSIZE;
	vecHypo.resize(TOTALHYPO);
	
	int sizeo = TOTALHYPO * sizeof(HypothesisElement);

//	vecNotUsed.assign(TOTALHYPO, 0);
	for (int n = 0; n < TOTALHYPO; n++)
	{
		vecNotUsed.push_back(n);
	}

	string str;
	time_t oldTime, newTime;
	time(&oldTime);
	cout << "Start decoding ..." << endl;
	
	//
	string decoTimeFileSuff(".time");
	string decoTimeFile = outputFile + decoTimeFileSuff;
	ofstream decoTime;
	decoTime.open(decoTimeFile.c_str(), std::ios::out | std::ios::app);
	if (!decoTime) {
		cout << "open decode time file error! " << endl;
	}

	if (NBEST == 1) {
		output.open(outputFile.c_str(), std::ios::out | std::ios::app);
		if (!output) 
		{
			cout << "Output File Error!" <<	endl;
			return 0; 
		}
		while (getline(input, str)) {
			int pos;
			if ((pos = str.find("srcset")) != string::npos ) {
				str.replace(pos, 4, "tst");
			}
			else if ((pos = str.find("<doc")) != string::npos) {
				str.replace(str.length() - 1, 1, " site=\"HIT\">");
			}
			else if ((pos = str.find("id=")) != string::npos ) {
				while (str.find("</s") == string::npos) {
					string strTmp;
					getline(input, strTmp);
					str += strTmp;
				}
				pos = str.find_last_of("<");
				int startpos = str.find(">");
				string strDeal(str, startpos + 1, pos - startpos - 1);
				string strEnd(str, pos, str.length() - pos);
				//开始解码
				time_t onestart, oneend;
				time(&onestart);
				hypothesis.initialize(strDeal);
				string strDealtoSen = hypothesis.decoder(outputFile);
				
				time(&oneend);
				decoTime << strDeal.length() << "\t" << difftime(oneend, onestart) << endl;

				hypothesis.clear();	
				
				string strStart(str, 0, startpos + 1);

				output << strStart << strDealtoSen << strEnd << endl;
				continue;
			}
			output << str << endl;
		}
		output.clear();
		output.close();
	}
	else {
		while (getline(input, str)) 
		{
			hypothesis.initialize(str);
			string result = hypothesis.decoder(outputFile);
			
			cout << "NBest output file is " << result << endl;			
			hypothesis.clear();		
		}	
	}
	input.clear();
	input.close();
	decoTime.clear();
	decoTime.close();
	time(&newTime);
	double diff = difftime(newTime, oldTime);
	cout << "decode finished , it take " << diff << " seconds!" << endl;
	return 1;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -