📄 symal.cpp.svn-base
字号:
using namespace std;#include <cassert>#include <iomanip>#include <iostream>#include <fstream>#include <sstream>#include <string>#include <list>#include <vector>#include <set>#include <algorithm>#include "cmd.h"#define MAX_WORD 1000 //maximum lengthsource/target strings #define MAX_M 200 //maximum length of source strings#define MAX_N 200 //maximum length of target strings #define UNION 1#define INTERSECT 2#define GROW 3#define BOOL_YES 1#define BOOL_NO 0#define END_ENUM { (char*)0, 0 }static Enum_T AlignEnum [] = {{ "union", UNION }, { "u", UNION }, { "intersect", INTERSECT},{ "i", INTERSECT},{ "grow", GROW }, { "g", GROW }, END_ENUM};static Enum_T BoolEnum [] = { { "true", BOOL_YES }, { "yes", BOOL_YES }, { "y", BOOL_YES }, { "false", BOOL_NO }, { "no", BOOL_NO }, { "n", BOOL_NO }, END_ENUM};// global variables and constantsint* fa; //counters of covered foreign positionsint* ea; //counters of covered english positionsint** A; //alignment matrix with information symmetric/direct/inverse alignmentsint verbose=0;//read an alignment pair from the input stream. int lc = 0;int getals(fstream& inp,int& m, int *a,int& n, int *b){ char w[MAX_WORD], dummy[10]; int i,j,freq; if (inp >> freq){ ++lc; //target sentence inp >> n; assert(n<MAX_N); for (i=1;i<=n;i++){ inp >> setw(MAX_WORD) >> w; if (strlen(w)>=MAX_WORD-1) { cerr << lc << ": target len=" << strlen(w) << " is not less than MAX_WORD-1=" << MAX_WORD-1 << endl; assert(strlen(w)<MAX_WORD-1); } } inp >> dummy; //# separator // inverse alignment for (i=1;i<=n;i++) inp >> b[i]; //source sentence inp >> m; assert(m<MAX_M); for (j=1;j<=m;j++){ inp >> setw(MAX_WORD) >> w; if (strlen(w)>=MAX_WORD-1) { cerr << lc << ": source len=" << strlen(w) << " is not less than MAX_WORD-1=" << MAX_WORD-1 << endl; assert(strlen(w)<MAX_WORD-1); } } inp >> dummy; //# separator // direct alignment for (j=1;j<=m;j++) { inp >> a[j]; assert(0<=a[j] && a[j]<=n); } //check inverse alignemnt for (i=1;i<=n;i++) assert(0<=b[i] && b[i]<=m); return 1; } else return 0;};//compute union alignmentint prunionalignment(fstream& out,int m,int *a,int n,int* b){ ostringstream sout; for (int j=1;j<=m;j++) if (a[j]) sout << j-1 << "-" << a[j]-1 << " "; for (int i=1;i<=n;i++) if (b[i] && a[b[i]]!=i) sout << b[i]-1 << "-" << i-1 << " "; //fix the last " " string str = sout.str(); str.replace(str.length()-1,1,"\n"); out << str; return 1;}//Compute unionalignment Alignmentint printersect(fstream& out,int m,int *a,int n,int* b){ ostringstream sout; for (int j=1;j<=m;j++) if (a[j] && b[a[j]]==j) sout << j-1 << "-" << a[j]-1 << " "; //fix the last " " string str = sout.str(); str.replace(str.length()-1,1,"\n"); out << str; return 1;}//Compute Grow Diagonal Alignment//Nice property: you will never introduce more points//than the unionalignment alignemt. Hence, you will always be able//to represent the grow alignment as the unionalignment of a//directed and inverted alignmentint printgrow(fstream& out,int m,int *a,int n,int* b, bool diagonal=false,bool final=false,bool bothuncovered=false){ ostringstream sout; vector <pair <int,int> > neighbors; //neighbors pair <int,int> entry; neighbors.push_back(make_pair(-1,-0)); neighbors.push_back(make_pair(0,-1)); neighbors.push_back(make_pair(1,0)); neighbors.push_back(make_pair(0,1)); if (diagonal){ neighbors.push_back(make_pair(-1,-1)); neighbors.push_back(make_pair(-1,1)); neighbors.push_back(make_pair(1,-1)); neighbors.push_back(make_pair(1,1)); } int i,j,o; //covered foreign and english positions memset(fa,0,(m+1)*sizeof(int)); memset(ea,0,(n+1)*sizeof(int)); //matrix to quickly check if one point is in the symmetric //alignment (value=2), direct alignment (=1) and inverse alignment for (int i=1;i<=n;i++) memset(A[i],0,(m+1)*sizeof(int)); set <pair <int,int> > currentpoints; //symmetric alignment set <pair <int,int> > unionalignment; //union alignment pair <int,int> point; //variable to store points set<pair <int,int> >::const_iterator k; //iterator over sets //fill in the alignments for (j=1;j<=m;j++){ if (a[j]){ unionalignment.insert(make_pair(a[j],j)); if (b[a[j]]==j){ fa[j]=1;ea[a[j]]=1; A[a[j]][j]=2; currentpoints.insert(make_pair(a[j],j)); } else A[a[j]][j]=-1; } } for (i=1;i<=n;i++) if (b[i] && a[b[i]]!=i){ //not intersection unionalignment.insert(make_pair(i,b[i])); A[i][b[i]]=1; } int added=1; while (added){ added=0; ///scan the current alignment for (k=currentpoints.begin();k!=currentpoints.end();k++){ //cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}"; for (o=0;o<neighbors.size();o++){ //cout << "go over check all neighbors\n"; point.first=k->first+neighbors[o].first; point.second=k->second+neighbors[o].second; //cout << point.second-1 << " " << point.first-1 << "\n"; //check if neighbor is inside 'matrix' if (point.first>0 && point.first <=n && point.second>0 && point.second<=m) //check if neighbor is in the unionalignment alignment if (b[point.first]==point.second || a[point.second]==point.first){ //cout << "In unionalignment ";cout.flush(); //check if it connects at least one uncovered word if (!(ea[point.first] && fa[point.second])) { //insert point in currentpoints! currentpoints.insert(point); A[point.first][point.second]=2; ea[point.first]=1; fa[point.second]=1; added=1; //cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush(); } } } } } if (final){ for (k=unionalignment.begin();k!=unionalignment.end();k++) if (A[k->first][k->second]==1) { point.first=k->first;point.second=k->second; //one of the two words is not covered yet //cout << "{" << point.second-1 << "-" << point.first-1 << "} "; if ((bothuncovered && !ea[point.first] && !fa[point.second]) || (!bothuncovered && !(ea[point.first] && fa[point.second]))) { //add it! currentpoints.insert(point); A[point.first][point.second]=2; //keep track of new covered positions ea[point.first]=1;fa[point.second]=1; //added=1; //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n"; } } for (k=unionalignment.begin();k!=unionalignment.end();k++) if (A[k->first][k->second]==-1) { point.first=k->first;point.second=k->second; //one of the two words is not covered yet //cout << "{" << point.second-1 << "-" << point.first-1 << "} "; if ((bothuncovered && !ea[point.first] && !fa[point.second]) || (!bothuncovered && !(ea[point.first] && fa[point.second]))) { //add it! currentpoints.insert(point); A[point.first][point.second]=2; //keep track of new covered positions ea[point.first]=1;fa[point.second]=1; //added=1; //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n"; } } } for (k=currentpoints.begin();k!=currentpoints.end();k++) sout << k->second-1 << "-" << k->first-1 << " "; //fix the last " " string str = sout.str(); str.replace(str.length()-1,1,"\n"); out << str; out.flush(); return 1; return 1;}//Main file hereint main(int argc, char** argv){ int alignment=0; char* input="/dev/stdin";char* output="/dev/stdout";int diagonal=false;int final=false;int bothuncovered=false; DeclareParams("a", CMDENUMTYPE, &alignment, AlignEnum, "alignment", CMDENUMTYPE, &alignment, AlignEnum, "d", CMDENUMTYPE, &diagonal, BoolEnum, "diagonal", CMDENUMTYPE, &diagonal, BoolEnum, "f", CMDENUMTYPE, &final, BoolEnum, "final", CMDENUMTYPE, &final, BoolEnum, "b", CMDENUMTYPE, &bothuncovered, BoolEnum, "both", CMDENUMTYPE, &bothuncovered, BoolEnum, "i", CMDSTRINGTYPE, &input, "o", CMDSTRINGTYPE, &output, "v", CMDENUMTYPE, &verbose, BoolEnum, "verbose", CMDENUMTYPE, &verbose, BoolEnum, (char *)NULL); GetParams(&argc, &argv, (char*) NULL); if (alignment==0){ cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n" << "Input file or std must be in .bal format (see script giza2bal.pl).\n"; exit(1); } fstream inp(input,ios::in); fstream out(output,ios::out); if (!inp.is_open()){ cerr << "cannot open " << input << "\n"; exit(1); } if (!out.is_open()){ cerr << "cannot open " << output << "\n"; exit(1); } int a[MAX_M],b[MAX_N],m,n; fa=new int[MAX_M+1]; ea=new int[MAX_N+1]; A=new int *[MAX_N+1]; for (int i=1;i<=MAX_N;i++) A[i]=new int[MAX_M+1]; switch (alignment){ case UNION: cerr << "symal: computing union alignment\n"; while(getals(inp,m,a,n,b)) prunionalignment(out,m,a,n,b); break; case INTERSECT: cerr << "symal: computing intersect alignment\n"; while(getals(inp,m,a,n,b)) printersect(out,m,a,n,b); break; case GROW: cerr << "symal: computing grow alignment: diagonal (" << diagonal << ") final ("<< final << ")" << "both-uncovered (" << bothuncovered <<")\n"; while(getals(inp,m,a,n,b)) printgrow(out,m,a,n,b,diagonal,final,bothuncovered); break; default: exit(1); } delete [] fa; delete [] ea; for (int i=1;i<=MAX_N;i++) delete [] A[i]; delete [] A; exit(0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -