⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 symal.cpp.svn-base

📁 解码器是基于短语的统计机器翻译系统的核心模块
💻 SVN-BASE
字号:
using namespace std;#include <cassert>#include <iomanip>#include <iostream>#include <fstream>#include <sstream>#include <string>#include <list>#include <vector>#include <set>#include <algorithm>#include "cmd.h"#define MAX_WORD 1000  //maximum lengthsource/target strings #define MAX_M 200     //maximum length of source strings#define MAX_N 200     //maximum length of target strings #define UNION                      1#define INTERSECT                  2#define GROW                       3#define BOOL_YES                   1#define BOOL_NO                    0#define END_ENUM    {   (char*)0,  0 }static Enum_T AlignEnum [] = {{    "union",                        UNION }, {    "u",                            UNION }, {    "intersect",                    INTERSECT},{    "i",                            INTERSECT},{    "grow",                         GROW }, {    "g",                            GROW },   END_ENUM};static Enum_T BoolEnum [] = {   {    "true",        BOOL_YES },    {    "yes",         BOOL_YES },    {    "y",           BOOL_YES },       {    "false",       BOOL_NO },    {    "no",          BOOL_NO },    {    "n",           BOOL_NO },    END_ENUM};// global variables and constantsint* fa; //counters of covered foreign positionsint* ea; //counters of covered english positionsint** A; //alignment matrix with information symmetric/direct/inverse alignmentsint verbose=0;//read an alignment pair from the input stream. int lc = 0;int getals(fstream& inp,int& m, int *a,int& n, int *b){  char w[MAX_WORD], dummy[10];  int i,j,freq;  if (inp >> freq){    ++lc;    //target sentence    inp >> n; assert(n<MAX_N);    for (i=1;i<=n;i++){       inp >> setw(MAX_WORD) >> w;      if (strlen(w)>=MAX_WORD-1) {        cerr << lc << ": target len=" << strlen(w) << " is not less than MAX_WORD-1="              << MAX_WORD-1 << endl;        assert(strlen(w)<MAX_WORD-1);      }    }        inp >> dummy; //# separator    // inverse alignment    for (i=1;i<=n;i++) inp >> b[i];    //source sentence    inp >> m; assert(m<MAX_M);    for (j=1;j<=m;j++){      inp >> setw(MAX_WORD) >> w;      if (strlen(w)>=MAX_WORD-1) {        cerr << lc << ": source len=" << strlen(w) << " is not less than MAX_WORD-1="              << MAX_WORD-1 << endl;        assert(strlen(w)<MAX_WORD-1);      }    }        inp >> dummy; //# separator    // direct alignment    for (j=1;j<=m;j++) {      inp >> a[j];      assert(0<=a[j] && a[j]<=n);    }    //check inverse alignemnt    for (i=1;i<=n;i++)      assert(0<=b[i] && b[i]<=m);        return 1;  }  else    return 0;};//compute union alignmentint prunionalignment(fstream& out,int m,int *a,int n,int* b){    ostringstream sout;    for (int j=1;j<=m;j++)    if (a[j])       sout << j-1 << "-" << a[j]-1 << " ";  for (int i=1;i<=n;i++)    if (b[i] && a[b[i]]!=i)      sout << b[i]-1 <<  "-" << i-1 << " ";  //fix the last " "  string str = sout.str();  str.replace(str.length()-1,1,"\n");  out << str; 	return 1;}//Compute unionalignment Alignmentint printersect(fstream& out,int m,int *a,int n,int* b){    ostringstream sout;  for (int j=1;j<=m;j++)    if (a[j] && b[a[j]]==j)      sout << j-1 << "-" << a[j]-1 << " ";    //fix the last " "  string str = sout.str();  str.replace(str.length()-1,1,"\n");  out << str;	return 1;}//Compute Grow Diagonal Alignment//Nice property: you will never introduce more points//than the unionalignment alignemt. Hence, you will always be able//to represent the grow alignment as the unionalignment of a//directed and inverted alignmentint printgrow(fstream& out,int m,int *a,int n,int* b, bool diagonal=false,bool final=false,bool bothuncovered=false){      ostringstream sout;      vector <pair <int,int> > neighbors; //neighbors      pair <int,int> entry;      neighbors.push_back(make_pair(-1,-0));   neighbors.push_back(make_pair(0,-1));   neighbors.push_back(make_pair(1,0));   neighbors.push_back(make_pair(0,1));         if (diagonal){      neighbors.push_back(make_pair(-1,-1));      neighbors.push_back(make_pair(-1,1));      neighbors.push_back(make_pair(1,-1));      neighbors.push_back(make_pair(1,1));       }         int i,j,o;         //covered foreign and english positions        memset(fa,0,(m+1)*sizeof(int));    memset(ea,0,(n+1)*sizeof(int));      //matrix to quickly check if one point is in the symmetric   //alignment (value=2), direct alignment (=1) and inverse alignment       for (int i=1;i<=n;i++) memset(A[i],0,(m+1)*sizeof(int));      set <pair <int,int> > currentpoints; //symmetric alignment   set <pair <int,int> > unionalignment; //union alignment      pair <int,int> point; //variable to store points   set<pair <int,int> >::const_iterator k; //iterator over sets      //fill in the alignments   for (j=1;j<=m;j++){      if (a[j]){         unionalignment.insert(make_pair(a[j],j));         if (b[a[j]]==j){             fa[j]=1;ea[a[j]]=1;            A[a[j]][j]=2;               currentpoints.insert(make_pair(a[j],j));         }                  else             A[a[j]][j]=-1;      }   }      for (i=1;i<=n;i++)       if (b[i] && a[b[i]]!=i){ //not intersection         unionalignment.insert(make_pair(i,b[i]));          A[i][b[i]]=1;      }                          int added=1;      while (added){      added=0;      ///scan the current alignment      for (k=currentpoints.begin();k!=currentpoints.end();k++){         //cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}";         for (o=0;o<neighbors.size();o++){            //cout << "go over check all neighbors\n";            point.first=k->first+neighbors[o].first;                  point.second=k->second+neighbors[o].second;             //cout << point.second-1 << " " << point.first-1 << "\n";            //check if neighbor is inside 'matrix'            if (point.first>0 && point.first <=n && point.second>0 && point.second<=m)               //check if neighbor is in the unionalignment alignment               if (b[point.first]==point.second || a[point.second]==point.first){                  //cout << "In unionalignment ";cout.flush();                  //check if it connects at least one uncovered word                  if (!(ea[point.first] && fa[point.second]))                  {                     //insert point in currentpoints!                     currentpoints.insert(point);                     A[point.first][point.second]=2;                     ea[point.first]=1; fa[point.second]=1;                     added=1;                     //cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush();                  }               }         }      }   }            if (final){         for (k=unionalignment.begin();k!=unionalignment.end();k++)            if (A[k->first][k->second]==1)            {                           point.first=k->first;point.second=k->second;               //one of the two words is not covered yet               //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";               if ((bothuncovered &&  !ea[point.first] && !fa[point.second]) ||                  (!bothuncovered && !(ea[point.first] && fa[point.second])))               {                  //add it!                  currentpoints.insert(point);                  A[point.first][point.second]=2;                  //keep track of new covered positions                                  ea[point.first]=1;fa[point.second]=1;                                    //added=1;                  //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";               }            }                              for (k=unionalignment.begin();k!=unionalignment.end();k++)                  if (A[k->first][k->second]==-1)                  {                                 point.first=k->first;point.second=k->second;                     //one of the two words is not covered yet                     //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";                     if ((bothuncovered &&  !ea[point.first] && !fa[point.second]) ||                         (!bothuncovered && !(ea[point.first] && fa[point.second])))                     {                        //add it!                        currentpoints.insert(point);                        A[point.first][point.second]=2;                        //keep track of new covered positions                                        ea[point.first]=1;fa[point.second]=1;                                                //added=1;                        //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";                     }                  }      }                                     for (k=currentpoints.begin();k!=currentpoints.end();k++)            sout << k->second-1 << "-" << k->first-1 << " ";                           //fix the last " "         string str = sout.str();         str.replace(str.length()-1,1,"\n");                  out << str;         out.flush();         return 1;                  return 1;}//Main file hereint main(int argc, char** argv){	int alignment=0;   char* input="/dev/stdin";char* output="/dev/stdout";int diagonal=false;int final=false;int bothuncovered=false;		DeclareParams("a", CMDENUMTYPE,  &alignment, AlignEnum,                 "alignment", CMDENUMTYPE,  &alignment, AlignEnum,                 "d", CMDENUMTYPE,  &diagonal, BoolEnum,                 "diagonal", CMDENUMTYPE,  &diagonal, BoolEnum,                 "f", CMDENUMTYPE,  &final, BoolEnum,                 "final", CMDENUMTYPE,  &final, BoolEnum,                 "b", CMDENUMTYPE,  &bothuncovered, BoolEnum,                   "both", CMDENUMTYPE,  &bothuncovered, BoolEnum,                   "i", CMDSTRINGTYPE, &input,                 "o", CMDSTRINGTYPE, &output,                 "v", CMDENUMTYPE,  &verbose, BoolEnum,                 "verbose", CMDENUMTYPE,  &verbose, BoolEnum,                 (char *)NULL);  	GetParams(&argc, &argv, (char*) NULL);      if (alignment==0){      cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"      << "Input file or std must be in .bal format (see script giza2bal.pl).\n";               exit(1);           }	fstream inp(input,ios::in);   fstream out(output,ios::out);	if (!inp.is_open()){		cerr << "cannot open " << input << "\n";		exit(1);	}      if (!out.is_open()){		cerr << "cannot open " << output << "\n";		exit(1);	}     int a[MAX_M],b[MAX_N],m,n;  fa=new int[MAX_M+1];  ea=new int[MAX_N+1];         A=new int *[MAX_N+1];   for (int i=1;i<=MAX_N;i++) A[i]=new int[MAX_M+1];      	switch (alignment){		case UNION:         cerr << "symal: computing union alignment\n";			while(getals(inp,m,a,n,b)) prunionalignment(out,m,a,n,b);			break;		case INTERSECT:          cerr << "symal: computing intersect alignment\n";			while(getals(inp,m,a,n,b)) printersect(out,m,a,n,b);			break;      case GROW:     cerr << "symal: computing grow alignment: diagonal ("         << diagonal << ") final ("<< final << ")"          <<  "both-uncovered (" << bothuncovered <<")\n";                   while(getals(inp,m,a,n,b))                printgrow(out,m,a,n,b,diagonal,final,bothuncovered);                  break;		default:			exit(1);	}      delete [] fa; delete [] ea;    for (int i=1;i<=MAX_N;i++) delete [] A[i];   delete [] A;      exit(0);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -