⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 merge_lm.cpp

📁 这是一款很好用的工具包
💻 CPP
字号:
// merge_lm.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include "MyNgram.h"#include <string>
#include <vector>
using namespace std;

string g_in_arpa;
string g_out_arpa;
string g_out_vocab;
string g_order;
string g_lambda;


const char* GetDirFromPath(const char* szPathName);

LM * makeMixLM(const char *filename, Vocab &vocab, unsigned order, LM *oldLM, double lambda1, double lambda2);
bool parse_option_file(const char* option_file);


bool split(string str, vector<string>& out)
{
	out.clear();
	CString csTemp = str.c_str();
	csTemp.Replace(" ", "");
	csTemp.Replace("\t", "");
	int curPos= 0;
	CString resToken = csTemp.Tokenize(",", curPos);
	while (resToken != "")
	{
		out.push_back(string(resToken));
		resToken = csTemp.Tokenize(",",curPos);
	};
	return true;
}

int _tmain(int argc, _TCHAR* argv[])
{
	if (argc != 2)
	{
		printf("Usage:	merge_lm cfg_file!\n");
		exit(-1);
	}

	parse_option_file( argv[1] );
	
	vector<string> in_lms;
	vector<string> in_lambdas;
	unsigned order = (unsigned)atoi(g_order.c_str());

	split(g_in_arpa, in_lms);
	split(g_lambda, in_lambdas);
	if ( in_lambdas.size() != in_lms.size() || (order != 2 && order != 3 ))
	{
		printf("config file %s format error!", argv[1]);
		exit(-1);
	}
	
	for(int i = 0 ; i < (int)in_lms.size() ; i++)
	{
		printf("%s\t%s\n", in_lms[i].c_str(), in_lambdas[i].c_str());
	}

	const char *lmFile1 = in_lms[0].c_str() ;
	double lambda = atof(in_lambdas[0].c_str());
	
	Vocab *vocab ;
	Ngram *ngramLM ;
	vocab = new Vocab;	assert(vocab != 0);	vocab->remove(vocab->unkIndex());	vocab->remove(vocab->pauseIndex());	vocab->unkIndex() = vocab->addWord("<UNK>");
	ngramLM = new MyNgram(*vocab, order);	assert(ngramLM != 0);	File file(lmFile1, "r");	if (!ngramLM->read(file)) {		cerr << "format error in lm file\n";		exit(1);	}	
	const char *lmFile2 ;
	double mixLambda = lambda;	for(int i = 1 ; i < (int)in_lms.size(); i++)	{		lmFile2 = in_lms[i].c_str();		double d = atof(in_lambdas[i].c_str());		mixLambda += d;		ngramLM = (Ngram *)makeMixLM(lmFile2, *vocab, order, ngramLM, d, mixLambda);	}	
	ngramLM->write(File(g_out_arpa.c_str(), "w"));
	ngramLM->vocab.write(File(g_out_vocab.c_str(), "w"));
	
	delete vocab;
	delete ngramLM;
	return 0;
}

const char* GetDirFromPath(const char* szPathName)
{
	static char szPath[1024];
	const char* p = strrchr(szPathName, '\\');
	
	if (p)
	{
		strncpy(szPath, szPathName, p-szPathName);
		szPath[p-szPathName] = 0;
	}
	else
	{
		strcpy(szPath, ".");
	}
	return szPath;
}


//lambda1为要mix的filename的权重,lambda2为2个lm的权重和LM * makeMixLM(const char *filename, Vocab &vocab, unsigned order, LM *oldLM, double lambda1, double lambda2){	File file(filename, "r");	/*	* create factored LM if -factored was specified, 	* class-ngram if -classes were specified,	* and otherwise a regular ngram	*/	MyNgram *lm = new MyNgram(vocab, order);	assert(lm != 0);	if (!lm->read(file)) {		cerr << "format error in second-lm file " << filename << endl;		exit(1);	}	/*	* Compute mixture lambda (make sure 0/0 = 0)	*/	Prob lambda = (lambda1 == 0.0) ? 0.0 : lambda1/lambda2;	if (oldLM == 0) {		return lm;	} else {		/*		* static mixture		*/		((MyNgram *)oldLM)->mixProbs(*lm, 1-lambda,true);		//((MyNgram *)oldLM)->mixProbs(*lm, 1-lambda,false);		delete lm;		return oldLM;	}}bool parse_option_file(const char* option_file)
{
	FILE* fp = fopen(option_file, "rt");
	if ( !fp )
	{
		cout << option_file << " open for error!" << endl ;
		return false;
	}

	CString csTemp;
	char szBuffer[1024];
	while ( fgets(szBuffer, 1024, fp) )
	{
		csTemp = szBuffer;
		csTemp.TrimRight();
		csTemp.TrimLeft();
		csTemp.Replace(" ", "");
		csTemp.Replace("\t", "");
		if ( csTemp.Mid(0, 8) == "in_arpa=" )
		{
			g_in_arpa = csTemp.Mid(8);
			continue;
		}
		if ( csTemp.Mid(0, 9) == "out_arpa=" )
		{
			g_out_arpa = csTemp.Mid(9);
			continue;
		}
		if ( csTemp.Mid(0, 6) == "order=" )
		{
			g_order = csTemp.Mid(6);
			continue;
		}
		if ( csTemp.Mid(0, 10) == "out_vocab=" )
		{
			g_out_vocab = csTemp.Mid(10);
			continue;
		}
		if ( csTemp.Mid(0, 7) ==  "lambda=" )
		{
			g_lambda = csTemp.Mid(7);
			continue;
		}
	}
	fclose(fp);

	if ( g_in_arpa.empty() || g_order.empty() || g_out_arpa.empty() || g_out_vocab.empty() || g_lambda.empty() )
	{
		printf("some values in %s is empty", option_file);
		return false;
	}
	return true;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -