📄 chart.c
字号:
k=add_edge(i,i); } add_singles_stops(i,i); }}void add_singles(int s,int e,int si,int ei){ int i,j,k; edge_type etemp; double prob,probl,probr; int nl,nr; int il,ir; int flaggap; int wh,th,p,ch; for(i=si;i<=ei;i++) if(edges[i].valid==1&&inbeam(i,s,e)&&edges[i].stop==1) for(j=0;j<unary_nums[edges[i].label];j++) if(unaries[edges[i].label][j]!=-1) { etemp=edges[i]; etemp.stop=0; etemp.type=1; etemp.label=unaries[edges[i].label][j]; etemp.headch=i; etemp.headlabel=edges[i].label; if(etemp.label == NT_NPB) etemp.hasverb = 0; clear_args(&etemp.lc); clear_args(&etemp.rc); etemp.lc.nvs=0; etemp.lc.adj=1; etemp.rc.nvs=0; etemp.rc.adj=1; wh=current->wordnos[edges[i].head]; th=edges[i].headtag; p=etemp.label; ch=edges[i].label; prob=get_unary_prob_witheffhash(ch,wh,th,p,&new_hash,&eff_hash)+edges[i].prob; nl=lsubcats_counts[p][ch]; nr=rsubcats_counts[p][ch]; flaggap=(hasgap[etemp.label] && !hasgap[edges[i].label]); for(il=0;il<nl;il++) { if(nl==1) probl=0; else probl=get_subcat_prob_witheffhash(lsubcats[p][ch][il],ch,wh,th,p,0,&new_hash,&eff_hash); find_args(&etemp.lc,lsubcats[p][ch][il]); for(ir=0;ir<nr;ir++) { if(nr==1) probr=0; else probr=get_subcat_prob_witheffhash(rsubcats[p][ch][ir],ch,wh,th,p,1,&new_hash,&eff_hash); find_args(&etemp.rc,rsubcats[p][ch][ir]); etemp.prob=prob+probl+probr; if(flaggap==0) { edges[numedges]=etemp; if((k=add_edge(s,e))!=-1) { edges[k].child1=numchilds; edges[k].numchild=1; childs[numchilds]=i; numchilds++; } } else { edges[numedges].prob=prob+probl+probr; /*case3 -- from child to left*/ edges[numedges]=etemp; edges[numedges].lc.gap=1; edges[numedges].prob+=get_gap_prob_witheffhash(3,ch,wh,th,p,&new_hash,&eff_hash); if((k=add_edge(s,e))!=-1) { edges[k].child1=numchilds; edges[k].numchild=1; childs[numchilds]=i; numchilds++; } /*case4 -- from child to right*/ edges[numedges]=etemp; edges[numedges].rc.gap=1; edges[numedges].prob+=get_gap_prob_witheffhash(4,ch,wh,th,p,&new_hash,&eff_hash);/* printf("CHECK %g %g\n",edges[i].prob,edges[numedges].prob); print_edge(i,0);*/ if((k=add_edge(s,e))!=-1) { edges[k].child1=numchilds; edges[k].numchild=1; childs[numchilds]=i; numchilds++; } } } } }}void print_edge(int e,int off){ int i,j,newoff; int flag; flag=1; if(e==-1) { printf("TRACE T\n"); return; } if( (edges[e].label == NT_NP || edges[e].label == NT_NPA) && edges[e].numchild ==1) { j= childs[edges[e].child1];/* if(edges[j].label == NT_NPB) flag=0;*/ } if(flag) { printf("%s %g ",nts[edges[e].label],edges[e].prob); newoff=off+strlen(nts[edges[e].label])+1; } else newoff=off; /*POS tag case*/ if(edges[e].type==0) { printf("%s\n",current->words[current->wordpos[edges[e].head]]); return; } print_edge(childs[edges[e].child1],newoff); for(i=edges[e].child1+1;i<edges[e].child1+edges[e].numchild;i++) { for(j=0;j<newoff;j++) printf(" "); print_edge(childs[i],newoff); }}int find_childno(int e,int c){ int i; for(i=0;i<edges[e].numchild;i++) { if(childs[edges[e].child1+i] == c) return i+1; } return -1;}void print_wholent(int e){ printf("%s",nts[edges[e].label]); if(edges[e].type==4) { printf("_NA~%d",edges[e].numchild); return; } if(1) { printf("~%s",current->words[current->wordpos[edges[e].head]]); printf("~%d",edges[e].numchild); printf("~%d",find_childno(e,edges[e].headch)); }}void print_edges_flat(int e){ int i,j,w,next; int flag; flag=1; if(e==-1) { printf("T/TRACE "); return; }if(TREEBANKOUTPUTFLAG && (edges[e].label == NT_NP || edges[e].label == NT_NPA) && edges[e].numchild ==1) { j= childs[edges[e].child1]; if(edges[j].label == NT_NPB) flag=0; } /*POS tag case*/ /*POS tag - print preceding punctuation*/ if(edges[e].type==0) { if(edges[e].head==0) for(i=0;i<current->wordpos[edges[e].head];i++) printf("%s/PUNC%s ",current->words[i],current->tags[i]); printf("%s/%s ",current->words[current->wordpos[edges[e].head]] ,nts[edges[e].headtag]); w=current->wordpos[edges[e].head]; if(edges[e].head==current->nws_np-1) next=current->nws; else next=current->wordpos[edges[e].head+1]; for(i=w+1;i<next;i++) printf("%s/PUNC%s ",current->words[i],current->tags[i]); return; } if(flag) { printf("("); print_wholent(e); printf(" "); } print_edges_flat(childs[edges[e].child1]); for(i=edges[e].child1+1;i<edges[e].child1+edges[e].numchild;i++) { print_edges_flat(childs[i]); } if(flag) printf(") ");}void print_chart(){ int i; int dist,subcat; for(i=0;i<numedges;i++) { printf("EDGE %d %g %g %d %d %d ",i,edges[i].prob,edges[i].prob2,edges[i].stop,edges[i].type,edges[i].label); printf(" L "); onectxt_to_dist_subcat(&edges[i].lc,&dist,&subcat,1); printf("%d %d HV%d",dist,subcat,edges[i].hasverb); printf(" R "); onectxt_to_dist_subcat(&edges[i].rc,&dist,&subcat,0); printf("%d %d\n",dist,subcat); print_edge(i,0); printf("\n\n"); }}void join_2_edges_follow(int e1,int s,int m,int e,int *e2s,int ne2s){ int h2_is_verb,e2,i2; int e1hlabel; /*e3,e4,d3,d4 are variables for new edges/distances created when e1/e2 are respectively the heads e3flag,e4flag = 0/1 if e3/e4 haven't/have been created */ edge_type e3; int e3flag=0; int e3flag2=0; int dist,subcat,cc,punc; int wcc,tcc,wpunc,tpunc; int wh,ch,th; int wm,cm,tm; int p; int i,k; double prob,combineprob; int e1c;/*ccprob is used to prefer wide scope premodification of CCed phrases, and narrow scope post modification*/ double ccprob_post; /* d3 is e1 as head, e2 as modifier d4 is e2 as head, e1 as modifier calulate the distance variables and heads - the parent depends on later calls in this routine */ e1c = childs[edges[e1].child1+edges[e1].numchild-1]; if(edges[e1].type==0) e1hlabel=edges[e1].headtag; else e1hlabel=edges[edges[e1].headch].label; if(edges[e1].type==1&&edges[e1].label==NT_NPB) e1hlabel=edges[e1c].label; if(edges[e1].stop==0 && (edges[e1].type==3||edges[e1].type==1))/* &&tablef[edges[e1].label][e1hlabel][edges[e2].label])*/ { for(i2=0;i2<ne2s;i2++) { e2 = e2s[i2]; combineprob=edges[e1].prob+edges[e2].prob; if(combineprob>=pthresh && (sindex[s][e]==-1 || combineprob>=(bestprobs[s][e]-BEAMPROB)) && edges[e2].stop ==1 && tablef[edges[e1].label][e1hlabel][edges[e2].label]) { h2_is_verb=edges[e2].hasverb; if(e3flag2==0) { if(edges[e1].label!=NT_NPB) { onectxt_to_dist_subcat(&edges[e1].rc,&dist,&subcat,0); punc = current->commaats[m]; if(punc) { wpunc=current->commawords[m]; tpunc=current->commatags[m]; } else wpunc=tpunc=0; wh=current->wordnos[edges[e1].head]; th=edges[e1].headtag; ch=edges[e1].label; ch=e1hlabel; } else { wh=current->wordnos[edges[e1c].head]; th=edges[e1c].headtag; ch=edges[e1c].label; wm=current->wordnos[edges[e2].head]; tm=edges[e2].headtag; cm=edges[e2].label; dist = 10; subcat = 0; punc=current->commaats[m]; if(punc) { wpunc=current->commawords[m]; tpunc=current->commatags[m]; } else wpunc=tpunc=0; } wcc=tcc=cc=0; p=edges[e1].label; e3flag2 = 1; } wm=current->wordnos[edges[e2].head]; tm=edges[e2].headtag; cm=edges[e2].label; if(edges[e1].type!=0 && argmap[edges[e1].label]==argmap[edges[edges[e1].headch].label]) ccprob_post=-CCPROBSMALL; else ccprob_post=0; prob=get_dependency_prob_witheffhash(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,wcc,tcc,punc,wpunc,tpunc,&new_hash,&eff_hash)+combineprob+ccprob_post; if(prob>pthresh) { calc_contexts(&edges[e1].lc,&edges[e1].rc,&edges[e2].lc,&edges[e2].rc,&e3.lc,&e3.rc,1,h2_is_verb,edges[e2].label); e3.hasverb = edges[e1].hasverb || edges[e2].hasverb; if(edges[e1].label==NT_NPB) { e3.lc.nvs=0; e3.lc.adj=1; e3.rc.nvs=0; e3.rc.adj=1; e3.hasverb=0; } if(e3flag==0) { e3.head=edges[e1].head; e3.headtag=edges[e1].headtag; e3.type=3; e3flag=1; e3.headch=edges[e1].headch; e3.label=edges[e1].label; e3.headlabel=edges[e1].headlabel; e3.valid=1; e3.stop=0; e3flag = 1; } e3.prob=prob; edges[numedges]=e3; if((k=add_edge(s,e))!=-1) { edges[k].numchild=edges[e1].numchild+1; edges[k].child1=numchilds; for(i=0;i<edges[e1].numchild;i++) childs[numchilds+i]=childs[edges[e1].child1+i]; childs[numchilds+edges[k].numchild-1]=e2; /* printf("AAAA\n"); print_edge(k,0);*/ numchilds+=edges[k].numchild; } } } } }}void join_2_edges_precede(int e2,int s,int m,int e,int *e1s,int ne1){ int h1_is_verb,e1; int e2hlabel; /*e3,e4,d3,d4 are variables for new edges/distances created when e1/e2 are respectively the heads e3flag,e4flag = 0/1 if e3/e4 haven't/have been created */ edge_type e4; int e4flag=0; int e4flag2=0; int dist,subcat,cc,punc; int wh,ch,th; int wm,cm,tm; int p; int i,i2,k; int wcc,tcc,wpunc,tpunc; double prob,combineprob; int e2c;/*ccprob is used to prefer wide scope premodification of CCed phrases, and narrow scope post modification*/ double ccprob_pre;/* if(edges[e1].label==5) return; if(edges[e2].label==5) return;*/ /* d3 is e1 as head, e2 as modifier d4 is e2 as head, e1 as modifier calulate the distance variables and heads - the parent depends on later calls in this routine */ /*now for the extend case*/ e2c = childs[edges[e2].child1]; if(edges[e2].type==0) e2hlabel=edges[e2].headtag; else e2hlabel=edges[edges[e2].headch].label; if(edges[e2].label == NT_NPB) e2hlabel=edges[e2c].label; /*now the extend case again*/ if(edges[e2].stop==0 && (edges[e2].type==3||edges[e2].type==1))/* &&tablep[edges[e2].label][e2hlabel][edges[e1].label])*/ { for(i2=0;i2<ne1;i2++) { e1 = e1s[i2]; combineprob=edges[e1].prob+edges[e2].prob;/* printf("BBB %g\n",combineprob); print_edge(e1,0); print_edge(e2,0);*/ if(tablep[edges[e2].label][e2hlabel][edges[e1].label] && combineprob>=pthresh && edges[e1].stop ==1 && (sindex[s][e]==-1 || combineprob>=(bestprobs[s][e]-BEAMPROB))) { h1_is_verb=edges[e1].hasverb; if(e4flag2==0) { if(edges[e2].type!=0 && argmap[edges[e2].label]==argmap[edges[edges[e2].headch].label]) ccprob_pre=0; else ccprob_pre=-CCPROBSMALL; if(edges[e2].label!=NT_NPB) { onectxt_to_dist_subcat(&edges[e2].lc,&dist,&subcat,1); punc = current->commaats[m]; if(punc) { wpunc = current->commawords[m]; tpunc = current->commatags[m]; } else wpunc=tpunc=0; wh=current->wordnos[edges[e2].head]; th=edges[e2].headtag; ch=edges[e2].label; ch=e2hlabel; } else { wh=current->wordnos[edges[e2c].head]; th=edges[e2c].headtag; ch=th; cc=0; dist = 110; subcat = 0; punc=current->commaats[m]; if(punc) { wpunc = current->commawords[m]; tpunc = current->commatags[m]; } else wpunc=tpunc=0; } wcc=tcc=cc=0; p=edges[e2].label; e4flag2 = 1; } wm=current->wordnos[edges[e1].head]; tm=edges[e1].headtag; cm=edges[e1].label; prob=get_dependency_prob_witheffhash(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,wcc,tcc,punc,wpunc,tpunc,&new_hash,&eff_hash)+combineprob+ccprob_pre; if(prob>pthresh) { calc_contexts(&edges[e1].lc,&edges[e1].rc,&edges[e2].lc,&edges[e2].rc,&e4.lc,&e4.rc,2,h1_is_verb,edges[e1].label); e4.hasverb=edges[e1].hasverb || edges[e2].hasverb; if(edges[e2].label==NT_NPB) { e4.lc.nvs=0; e4.lc.adj=1; e4.rc.nvs=0; e4.rc.adj=1; e4.hasverb=0; } if(e4flag==0) { e4.head=edges[e2].head; e4.headtag=edges[e2].headtag; e4.type=3; e4.headch=edges[e2].headch; e4.label=edges[e2].label; e4.headlabel=edges[e2].headlabel; e4.valid=1; e4.stop=0; e4flag=1; } e4.prob=prob; edges[numedges]=e4; if((k=add_edge(s,e))!=-1) { edges[k].numchild=edges[e2].numchild+1; edges[k].child1=numchilds; for(i=0;i<edges[e2].numchild;i++) childs[numchilds+i+1]=childs[edges[e2].child1+i]; childs[numchilds]=e1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -