⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chart.c

📁 中心词驱动的短语结构句法分析器。该模型考虑了跟随介词短语的名词短语的中心词的作用。 有MIT大学Colling开发
💻 C
📖 第 1 页 / 共 3 页
字号:
	    k=add_edge(i,i);	  }      add_singles_stops(i,i);    }}void add_singles(int s,int e,int si,int ei){  int i,j,k;  edge_type etemp;  double prob,probl,probr;  int nl,nr;  int il,ir;  int flaggap;  int wh,th,p,ch;  for(i=si;i<=ei;i++)    if(edges[i].valid==1&&inbeam(i,s,e)&&edges[i].stop==1)      for(j=0;j<unary_nums[edges[i].label];j++)	if(unaries[edges[i].label][j]!=-1)	{	  etemp=edges[i];	  etemp.stop=0;	  etemp.type=1;	  etemp.label=unaries[edges[i].label][j];	  etemp.headch=i;	  etemp.headlabel=edges[i].label;	  if(etemp.label == NT_NPB)	    etemp.hasverb = 0;	  clear_args(&etemp.lc);	  clear_args(&etemp.rc);	  	  etemp.lc.nvs=0;	  etemp.lc.adj=1;	  	  etemp.rc.nvs=0;	  etemp.rc.adj=1;	  wh=current->wordnos[edges[i].head];	  th=edges[i].headtag;	  p=etemp.label;	  ch=edges[i].label;	  prob=get_unary_prob_witheffhash(ch,wh,th,p,&new_hash,&eff_hash)+edges[i].prob;	  nl=lsubcats_counts[p][ch];	  nr=rsubcats_counts[p][ch];	  flaggap=(hasgap[etemp.label] && !hasgap[edges[i].label]);	  for(il=0;il<nl;il++)	    {	      if(nl==1)		probl=0;	      else		probl=get_subcat_prob_witheffhash(lsubcats[p][ch][il],ch,wh,th,p,0,&new_hash,&eff_hash);	      find_args(&etemp.lc,lsubcats[p][ch][il]);	      for(ir=0;ir<nr;ir++)		{		  if(nr==1)		    probr=0;		  else		    probr=get_subcat_prob_witheffhash(rsubcats[p][ch][ir],ch,wh,th,p,1,&new_hash,&eff_hash);		  find_args(&etemp.rc,rsubcats[p][ch][ir]);		  etemp.prob=prob+probl+probr;		  if(flaggap==0)		    {		      edges[numedges]=etemp;		      if((k=add_edge(s,e))!=-1)			{			  edges[k].child1=numchilds;			  edges[k].numchild=1;			  childs[numchilds]=i;			  numchilds++;			}		    }		  else		    {		      edges[numedges].prob=prob+probl+probr;		      /*case3 -- from child to left*/		      edges[numedges]=etemp;		      edges[numedges].lc.gap=1;		      edges[numedges].prob+=get_gap_prob_witheffhash(3,ch,wh,th,p,&new_hash,&eff_hash);		      if((k=add_edge(s,e))!=-1)			{			  edges[k].child1=numchilds;			  edges[k].numchild=1;			  childs[numchilds]=i;			  numchilds++;			}		      /*case4 -- from child to right*/		      edges[numedges]=etemp;		      edges[numedges].rc.gap=1;		      edges[numedges].prob+=get_gap_prob_witheffhash(4,ch,wh,th,p,&new_hash,&eff_hash);/*		      printf("CHECK %g %g\n",edges[i].prob,edges[numedges].prob);		      print_edge(i,0);*/		      if((k=add_edge(s,e))!=-1)			{			  edges[k].child1=numchilds;			  edges[k].numchild=1;			  childs[numchilds]=i;			  numchilds++;			}		    }		}	    }	}}void print_edge(int e,int off){  int i,j,newoff;  int flag;  flag=1;  if(e==-1)    {      printf("TRACE T\n");      return;    }  if( (edges[e].label == NT_NP || edges[e].label == NT_NPA)     && edges[e].numchild ==1)    {      j= childs[edges[e].child1];/*      if(edges[j].label == NT_NPB)	flag=0;*/    }      if(flag)    {      printf("%s %g ",nts[edges[e].label],edges[e].prob);      newoff=off+strlen(nts[edges[e].label])+1;    }  else    newoff=off;  /*POS tag case*/  if(edges[e].type==0)    {      printf("%s\n",current->words[current->wordpos[edges[e].head]]);      return;    }  print_edge(childs[edges[e].child1],newoff);  for(i=edges[e].child1+1;i<edges[e].child1+edges[e].numchild;i++)    {          for(j=0;j<newoff;j++)	printf(" ");      print_edge(childs[i],newoff);    }}int find_childno(int e,int c){  int i;  for(i=0;i<edges[e].numchild;i++)    {      if(childs[edges[e].child1+i] == c)	return i+1;    }  return -1;}void print_wholent(int e){  printf("%s",nts[edges[e].label]);  if(edges[e].type==4)    {      printf("_NA~%d",edges[e].numchild);      return;    }    if(1)    {      printf("~%s",current->words[current->wordpos[edges[e].head]]);      printf("~%d",edges[e].numchild);      printf("~%d",find_childno(e,edges[e].headch));    }}void print_edges_flat(int e){  int i,j,w,next;  int flag;  flag=1;  if(e==-1)    {      printf("T/TRACE ");      return;    }if(TREEBANKOUTPUTFLAG &&   (edges[e].label == NT_NP || edges[e].label == NT_NPA)     && edges[e].numchild ==1)    {      j= childs[edges[e].child1];      if(edges[j].label == NT_NPB)	flag=0;    }  /*POS tag case*/  /*POS tag - print preceding punctuation*/  if(edges[e].type==0)    {      if(edges[e].head==0)	for(i=0;i<current->wordpos[edges[e].head];i++)	  printf("%s/PUNC%s ",current->words[i],current->tags[i]);      printf("%s/%s ",current->words[current->wordpos[edges[e].head]]	     ,nts[edges[e].headtag]);      w=current->wordpos[edges[e].head];      if(edges[e].head==current->nws_np-1)	next=current->nws;      else	next=current->wordpos[edges[e].head+1];      for(i=w+1;i<next;i++)	printf("%s/PUNC%s ",current->words[i],current->tags[i]);      return;    }  if(flag)    {      printf("(");      print_wholent(e);      printf(" ");    }  print_edges_flat(childs[edges[e].child1]);  for(i=edges[e].child1+1;i<edges[e].child1+edges[e].numchild;i++)    {          print_edges_flat(childs[i]);    }  if(flag)    printf(") ");}void print_chart(){  int i;  int dist,subcat;  for(i=0;i<numedges;i++)    {      printf("EDGE %d %g %g %d %d %d ",i,edges[i].prob,edges[i].prob2,edges[i].stop,edges[i].type,edges[i].label);      printf(" L ");      onectxt_to_dist_subcat(&edges[i].lc,&dist,&subcat,1);      printf("%d %d HV%d",dist,subcat,edges[i].hasverb);      printf(" R ");      onectxt_to_dist_subcat(&edges[i].rc,&dist,&subcat,0);      printf("%d %d\n",dist,subcat);      print_edge(i,0);      printf("\n\n");    }}void join_2_edges_follow(int e1,int s,int m,int e,int *e2s,int ne2s){  int h2_is_verb,e2,i2;  int e1hlabel;  /*e3,e4,d3,d4 are variables for new edges/distances created when    e1/e2 are respectively the heads    e3flag,e4flag = 0/1 if e3/e4 haven't/have been created  */  edge_type e3;  int e3flag=0;  int e3flag2=0;  int dist,subcat,cc,punc;  int wcc,tcc,wpunc,tpunc;  int wh,ch,th;  int wm,cm,tm;  int p;  int i,k;  double prob,combineprob;  int e1c;/*ccprob is used to prefer wide scope premodification of CCed phrases,    and narrow scope post modification*/  double ccprob_post;  /* d3 is e1 as head, e2 as modifier     d4 is e2 as head, e1 as modifier          calulate the distance variables and heads - the parent depends on      later calls in this routine  */  e1c = childs[edges[e1].child1+edges[e1].numchild-1];  if(edges[e1].type==0)    e1hlabel=edges[e1].headtag;  else    e1hlabel=edges[edges[e1].headch].label;  if(edges[e1].type==1&&edges[e1].label==NT_NPB)    e1hlabel=edges[e1c].label;  if(edges[e1].stop==0 &&     (edges[e1].type==3||edges[e1].type==1))/*     &&tablef[edges[e1].label][e1hlabel][edges[e2].label])*/    {      for(i2=0;i2<ne2s;i2++)	{	  e2 = e2s[i2];	  combineprob=edges[e1].prob+edges[e2].prob;  	  if(combineprob>=pthresh &&	     (sindex[s][e]==-1 || combineprob>=(bestprobs[s][e]-BEAMPROB)) &&	     edges[e2].stop ==1 &&	     tablef[edges[e1].label][e1hlabel][edges[e2].label])	    {	      h2_is_verb=edges[e2].hasverb;	      if(e3flag2==0)		{		  if(edges[e1].label!=NT_NPB)		    {		      onectxt_to_dist_subcat(&edges[e1].rc,&dist,&subcat,0);		      punc = current->commaats[m];		      if(punc)			{			  wpunc=current->commawords[m];			  tpunc=current->commatags[m];			}		      else			wpunc=tpunc=0;		      wh=current->wordnos[edges[e1].head];		      th=edges[e1].headtag;		      ch=edges[e1].label;		      ch=e1hlabel;		    }		  else		    {		      wh=current->wordnos[edges[e1c].head];		      th=edges[e1c].headtag;		      ch=edges[e1c].label;		      		      wm=current->wordnos[edges[e2].head];		      tm=edges[e2].headtag;		      cm=edges[e2].label;		      dist = 10;		      subcat = 0;		      punc=current->commaats[m];		      if(punc)			{			  wpunc=current->commawords[m];			  tpunc=current->commatags[m];			}		      else			wpunc=tpunc=0;		    }		  		  wcc=tcc=cc=0;		  		  p=edges[e1].label;		  e3flag2 = 1;		}	  	      wm=current->wordnos[edges[e2].head];	      tm=edges[e2].headtag;	      cm=edges[e2].label;	      if(edges[e1].type!=0 && argmap[edges[e1].label]==argmap[edges[edges[e1].headch].label])		ccprob_post=-CCPROBSMALL;	      else		ccprob_post=0;	      	      prob=get_dependency_prob_witheffhash(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,wcc,tcc,punc,wpunc,tpunc,&new_hash,&eff_hash)+combineprob+ccprob_post;	      if(prob>pthresh)		{		  calc_contexts(&edges[e1].lc,&edges[e1].rc,&edges[e2].lc,&edges[e2].rc,&e3.lc,&e3.rc,1,h2_is_verb,edges[e2].label);		  e3.hasverb = edges[e1].hasverb || edges[e2].hasverb;		  if(edges[e1].label==NT_NPB)		    {		      e3.lc.nvs=0;		      e3.lc.adj=1;		      e3.rc.nvs=0;		      		      e3.rc.adj=1;		      e3.hasverb=0;		    }		  		  if(e3flag==0)		    {		      e3.head=edges[e1].head;		      e3.headtag=edges[e1].headtag;		      e3.type=3;		      e3flag=1;		      e3.headch=edges[e1].headch;		      e3.label=edges[e1].label;		      e3.headlabel=edges[e1].headlabel;		      e3.valid=1;		      e3.stop=0;		      e3flag = 1;		    }		  e3.prob=prob;		  		  edges[numedges]=e3;		  		  if((k=add_edge(s,e))!=-1)		    {		      edges[k].numchild=edges[e1].numchild+1;		      edges[k].child1=numchilds;		      for(i=0;i<edges[e1].numchild;i++)			childs[numchilds+i]=childs[edges[e1].child1+i];		      childs[numchilds+edges[k].numchild-1]=e2;		      /*	      printf("AAAA\n");				      print_edge(k,0);*/		      numchilds+=edges[k].numchild;		    }		}	    }	}    }}void join_2_edges_precede(int e2,int s,int m,int e,int *e1s,int ne1){  int h1_is_verb,e1;  int e2hlabel;  /*e3,e4,d3,d4 are variables for new edges/distances created when    e1/e2 are respectively the heads    e3flag,e4flag = 0/1 if e3/e4 haven't/have been created  */  edge_type e4;  int e4flag=0;  int e4flag2=0;  int dist,subcat,cc,punc;  int wh,ch,th;  int wm,cm,tm;  int p;  int i,i2,k;  int wcc,tcc,wpunc,tpunc;  double prob,combineprob;  int e2c;/*ccprob is used to prefer wide scope premodification of CCed phrases,    and narrow scope post modification*/  double ccprob_pre;/*  if(edges[e1].label==5) return;  if(edges[e2].label==5) return;*/  /* d3 is e1 as head, e2 as modifier     d4 is e2 as head, e1 as modifier          calulate the distance variables and heads - the parent depends on      later calls in this routine  */  /*now for the extend case*/  e2c = childs[edges[e2].child1];    if(edges[e2].type==0)    e2hlabel=edges[e2].headtag;  else    e2hlabel=edges[edges[e2].headch].label;  if(edges[e2].label == NT_NPB)    e2hlabel=edges[e2c].label;  /*now the extend case again*/  if(edges[e2].stop==0 &&      (edges[e2].type==3||edges[e2].type==1))/*      &&tablep[edges[e2].label][e2hlabel][edges[e1].label])*/    {      for(i2=0;i2<ne1;i2++)	{	  e1 = e1s[i2];	  	  combineprob=edges[e1].prob+edges[e2].prob;/*	  printf("BBB %g\n",combineprob);	  	  print_edge(e1,0);	  print_edge(e2,0);*/	  	  if(tablep[edges[e2].label][e2hlabel][edges[e1].label] &&	     combineprob>=pthresh &&	     edges[e1].stop ==1 &&	     (sindex[s][e]==-1 || combineprob>=(bestprobs[s][e]-BEAMPROB)))	    {	      h1_is_verb=edges[e1].hasverb;	      if(e4flag2==0)		{		  if(edges[e2].type!=0 &&		     argmap[edges[e2].label]==argmap[edges[edges[e2].headch].label])		    ccprob_pre=0;		  else		    ccprob_pre=-CCPROBSMALL;		  if(edges[e2].label!=NT_NPB)		    {		      onectxt_to_dist_subcat(&edges[e2].lc,&dist,&subcat,1);		      punc = current->commaats[m];		      if(punc)			{			  wpunc = current->commawords[m];			  tpunc = current->commatags[m];			}		      else			wpunc=tpunc=0;		      		      wh=current->wordnos[edges[e2].head];		      th=edges[e2].headtag;		      ch=edges[e2].label;		      ch=e2hlabel;		    }		  else		    {		      wh=current->wordnos[edges[e2c].head];		      th=edges[e2c].headtag;		      ch=th;		      cc=0;		      		      dist = 110;		      subcat = 0;		      punc=current->commaats[m];		      if(punc)			{			  wpunc = current->commawords[m];			  tpunc = current->commatags[m];			}		      else			wpunc=tpunc=0;		    }		  wcc=tcc=cc=0;		  		  p=edges[e2].label;		  		  		  e4flag2 = 1;		}	      wm=current->wordnos[edges[e1].head];	      tm=edges[e1].headtag;	      cm=edges[e1].label;	      	      	      prob=get_dependency_prob_witheffhash(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,wcc,tcc,punc,wpunc,tpunc,&new_hash,&eff_hash)+combineprob+ccprob_pre;	      if(prob>pthresh)		{		  calc_contexts(&edges[e1].lc,&edges[e1].rc,&edges[e2].lc,&edges[e2].rc,&e4.lc,&e4.rc,2,h1_is_verb,edges[e1].label);		  e4.hasverb=edges[e1].hasverb || edges[e2].hasverb;		  if(edges[e2].label==NT_NPB)		    {		      e4.lc.nvs=0;		      e4.lc.adj=1;		      e4.rc.nvs=0;		      e4.rc.adj=1;		      e4.hasverb=0;		    }		  if(e4flag==0)		    {		      e4.head=edges[e2].head;		      e4.headtag=edges[e2].headtag;		      e4.type=3;		      e4.headch=edges[e2].headch;		      e4.label=edges[e2].label;		      e4.headlabel=edges[e2].headlabel;		      e4.valid=1;		      e4.stop=0;		      		      e4flag=1;		    }		  e4.prob=prob;		  edges[numedges]=e4;		  		  if((k=add_edge(s,e))!=-1)		    {		      edges[k].numchild=edges[e2].numchild+1;		      edges[k].child1=numchilds;		      for(i=0;i<edges[e2].numchild;i++)			childs[numchilds+i+1]=childs[edges[e2].child1+i];		      childs[numchilds]=e1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -