⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ttable.cpp

📁 解码器是基于短语的统计机器翻译系统的核心模块
💻 CPP
字号:
/** TTable.cpp  -  Bilingual Phrases Translation Table** Copyright (C) 2006 by Zhongjun He <zjhe@ict.ac.cn> Multilingual Interaction Technology and Evaluation Laboratory, ICT, CAS* Begin       : 04/13/2006* Last Change : 04/13/2006** This program is free software; you can redistribute it and/or* modify it under the terms of the GNU Lesser General Public* License as published by the Free Software Foundation; either* version 2.1 of the License, or (at your option) any later version.** This program is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the* GNU General Public License for more details.** You should have received a copy of the GNU Lesser General Public* License along with this program; if not, write to the Free Software* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.*/

#include "TTable.h"

/************************************************
  construction function
************************************************/
TTable::TTable()
{
	prob_num = 0;

	bp_limit = 10;
}

/************************************************************************/
/* set translation probability weight and bp limit                      */
/************************************************************************/
void TTable::Set(const vector<double> &l, const int lt)
{
  lambda = l;
  bp_limit = lt;
}

/************************************************
 insert a bilingual phrase 
************************************************/
void TTable::Insert(const string &cp, const string &ep, const vector<double> &prob)
{
	map<string,map<string,vector<double> > >::iterator it1 = tmap.find(cp);

	if (it1 == tmap.end())
	{
		map<string,vector<double> > temp;
		temp[ep] = prob;
		tmap[cp] = temp;
	}
	else
	{
		map<string,vector<double> >::iterator it2 = (*it1).second.find(ep);

		if (it2 == (*it1).second.end())
		{
			insert_to_map((*it1).second, bp_limit, ep, prob);
		}
		else
		{			
			if ( (*it2).second < prob)
				(*it2).second = prob;				
		}		
	}
}

/************************************************
  get phrase translation probability
************************************************/
vector<double> TTable::GetProb(const string &cp, const string &ep)
{
	int i;
	vector<double> vp;
	for (i=0; i<prob_num; i++)
		vp.push_back(PROB_SMOOTH);

	map<string,map<string,vector<double> > >::const_iterator it1= tmap.find(cp);
	
	if (it1 == tmap.end())
		return vp;
	else
	{	
		map<string,vector<double> >::const_iterator it2 = (*it1).second.find(ep);

	    if (it2 == (*it1).second.end())
		   return vp;
		else
		{
			for(i=1; i<prob_num+1; i++)
				vp[i-1] = (*it2).second[i];
			return vp;
		}
    }
}

/************************************************
  write to file
************************************************/
void TTable::WriteToFile(const char* fileName)
{
	ofstream out(fileName);
	map<string,map<string,vector<double> > >::iterator it1=tmap.begin();

	for (; it1!=tmap.end(); it1++)
	{
		map<string,vector<double> >::iterator it2 = (*it1).second.begin();
		for (; it2!=(*it1).second.end(); it2++)
		{
			out << (*it1).first << " ||| " << (*it2).first << " ||| ";

			for (int i=1; i<prob_num+1; i++)
				out << (*it2).second[i] << " ";
			
			out << endl;
		}
	}
}

/************************************************
  read from file
************************************************/
void TTable::ReadFromFile(const char* fileName)
{
	ifstream in(fileName);

	if (!in)
	{
		cerr << "ERROR at [TTable::ReadFromFile]: Cannot open file "	<< fileName << "!\n";
		exit(1);
	}
	string line;
	cout<<"Reading Bilingual Phrases Translation From File : "<<fileName<<endl;

	while (getline(in, line))
	{
		string cp, ep;
		vector<double> p;

		if (!split(line, cp, ep, p))
			continue;

		Insert(cp, ep, p);
	}

	cout<<"TTable size:"<<tmap.size()<<endl;
}

/*******************************************
  get English translations for a Chinese phrase
************************************************/
bool TTable::GetTranslations(const string &cp, map<string, vector<double> > &translation) 
{
	map<string,map<string,vector<double> > >::const_iterator it1=tmap.find(cp);
	
	if (it1 != tmap.end())
	{
		translation = (*it1).second;
		return true;
	}
	else 
	{
		return false;
	}
}

/************************************************************************/
/* for reading bilingual phrases                                        */
/************************************************************************/
bool TTable::split(const string &line, string &cp, string &ep, vector<double> &prob)
{
	int spp1 = line.find("|||");
	if (spp1 == string::npos)
		 return false;

	string t = line.substr(0, spp1);
    istrstream b(t.c_str());
	string cword;
	
	//chinese phrase
	while (b>>cword)
		cp += cword;

	spp1 += 4;
	int spp2 = line.find(" |||", spp1);
	if (spp2 == string::npos)
		return false;
	ep = line.substr(spp1, spp2 - spp1);//english phrase

	spp1 = spp2 + 4;

	string temp = line.substr(spp1, line.size() - spp1);
	istrstream buf(temp.c_str());
	string pt;
	
	prob.push_back(0.0);//prob[0] = sigma lamda[i] * prob[i]; i=1 to prob_num
	double sigprob = 0.0;

	int count = 0;
	while (buf>>pt)
	{
		double p;
		p = atof(pt.c_str());
        if(p<PROB_SMOOTH)            p = PROB_SMOOTH;        p = log(p);	
		prob.push_back(p);
		sigprob += lambda[count++] * p;
	}
//	prob[0] = sigprob;
        prob[0] = prob[2];
	if(prob_num == 0)
		prob_num = count;

	int wl = WordLen(ep);
	prob.push_back(wl);
	return true;
}

/************************************************************************/
/* English phrase length                                                */
/************************************************************************/
int TTable::WordLen(const string &s)
{
	istrstream b(s.c_str());
	string temp;
	int count=0;
	while(b>>temp)
		count++;
	return count;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -