📄 ttable.cpp
字号:
/** TTable.cpp - Bilingual Phrases Translation Table** Copyright (C) 2006 by Zhongjun He <zjhe@ict.ac.cn> Multilingual Interaction Technology and Evaluation Laboratory, ICT, CAS* Begin : 04/13/2006* Last Change : 04/13/2006** This program is free software; you can redistribute it and/or* modify it under the terms of the GNU Lesser General Public* License as published by the Free Software Foundation; either* version 2.1 of the License, or (at your option) any later version.** This program is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the* GNU General Public License for more details.** You should have received a copy of the GNU Lesser General Public* License along with this program; if not, write to the Free Software* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/
#include "TTable.h"
/************************************************
construction function
************************************************/
TTable::TTable()
{
prob_num = 0;
bp_limit = 10;
}
/************************************************************************/
/* set translation probability weight and bp limit */
/************************************************************************/
void TTable::Set(const vector<double> &l, const int lt)
{
lambda = l;
bp_limit = lt;
}
/************************************************
insert a bilingual phrase
************************************************/
void TTable::Insert(const string &cp, const string &ep, const vector<double> &prob)
{
map<string,map<string,vector<double> > >::iterator it1 = tmap.find(cp);
if (it1 == tmap.end())
{
map<string,vector<double> > temp;
temp[ep] = prob;
tmap[cp] = temp;
}
else
{
map<string,vector<double> >::iterator it2 = (*it1).second.find(ep);
if (it2 == (*it1).second.end())
{
insert_to_map((*it1).second, bp_limit, ep, prob);
}
else
{
if ( (*it2).second < prob)
(*it2).second = prob;
}
}
}
/************************************************
get phrase translation probability
************************************************/
vector<double> TTable::GetProb(const string &cp, const string &ep)
{
int i;
vector<double> vp;
for (i=0; i<prob_num; i++)
vp.push_back(PROB_SMOOTH);
map<string,map<string,vector<double> > >::const_iterator it1= tmap.find(cp);
if (it1 == tmap.end())
return vp;
else
{
map<string,vector<double> >::const_iterator it2 = (*it1).second.find(ep);
if (it2 == (*it1).second.end())
return vp;
else
{
for(i=1; i<prob_num+1; i++)
vp[i-1] = (*it2).second[i];
return vp;
}
}
}
/************************************************
write to file
************************************************/
void TTable::WriteToFile(const char* fileName)
{
ofstream out(fileName);
map<string,map<string,vector<double> > >::iterator it1=tmap.begin();
for (; it1!=tmap.end(); it1++)
{
map<string,vector<double> >::iterator it2 = (*it1).second.begin();
for (; it2!=(*it1).second.end(); it2++)
{
out << (*it1).first << " ||| " << (*it2).first << " ||| ";
for (int i=1; i<prob_num+1; i++)
out << (*it2).second[i] << " ";
out << endl;
}
}
}
/************************************************
read from file
************************************************/
void TTable::ReadFromFile(const char* fileName)
{
ifstream in(fileName);
if (!in)
{
cerr << "ERROR at [TTable::ReadFromFile]: Cannot open file " << fileName << "!\n";
exit(1);
}
string line;
cout<<"Reading Bilingual Phrases Translation From File : "<<fileName<<endl;
while (getline(in, line))
{
string cp, ep;
vector<double> p;
if (!split(line, cp, ep, p))
continue;
Insert(cp, ep, p);
}
cout<<"TTable size:"<<tmap.size()<<endl;
}
/*******************************************
get English translations for a Chinese phrase
************************************************/
bool TTable::GetTranslations(const string &cp, map<string, vector<double> > &translation)
{
map<string,map<string,vector<double> > >::const_iterator it1=tmap.find(cp);
if (it1 != tmap.end())
{
translation = (*it1).second;
return true;
}
else
{
return false;
}
}
/************************************************************************/
/* for reading bilingual phrases */
/************************************************************************/
bool TTable::split(const string &line, string &cp, string &ep, vector<double> &prob)
{
int spp1 = line.find("|||");
if (spp1 == string::npos)
return false;
string t = line.substr(0, spp1);
istrstream b(t.c_str());
string cword;
//chinese phrase
while (b>>cword)
cp += cword;
spp1 += 4;
int spp2 = line.find(" |||", spp1);
if (spp2 == string::npos)
return false;
ep = line.substr(spp1, spp2 - spp1);//english phrase
spp1 = spp2 + 4;
string temp = line.substr(spp1, line.size() - spp1);
istrstream buf(temp.c_str());
string pt;
prob.push_back(0.0);//prob[0] = sigma lamda[i] * prob[i]; i=1 to prob_num
double sigprob = 0.0;
int count = 0;
while (buf>>pt)
{
double p;
p = atof(pt.c_str());
p = log(p);
prob.push_back(p);
sigprob += lambda[count++] * p;
}
prob[0] = sigprob;
if(prob_num == 0)
prob_num = count;
int wl = WordLen(ep);
prob.push_back(wl);
return true;
}
/************************************************************************/
/* English phrase length */
/************************************************************************/
int TTable::WordLen(const string &s)
{
istrstream b(s.c_str());
string temp;
int count=0;
while(b>>temp)
count++;
return count;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -