⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chart.c

📁 中心词驱动的短语结构句法分析器。该模型考虑了跟随介词短语的名词短语的中心词的作用。 有MIT大学Colling开发
💻 C
📖 第 1 页 / 共 3 页
字号:
		      /*printf("AAAA\n");			print_edge(k,0);*/		      numchilds+=edges[k].numchild;		    }		}	    }	}    }}void complete(int s,int e){  int i,j,k,k2;  int t2[100000],t3[100000],t5[100000];  int nt2,nt3,nt5;  if(numedges>(PMAXEDGES*(2.0/3.0))     || numchilds > (PMAXCHILDS*(2.0/3.0)))    return;  init_saments();  for(i=s;i<e;i++)    if( !PUNC_FLAG || (!current->commaats2[i]||e==(current->nws_np-1)||current->commaats[e]) )      {	nt2=nt3=nt5=0;		for(k=sindex[i+1][e];k<=eindex[i+1][e];k++)	  {		    if(inbeam2(k,i+1,e))	      if(edges[k].stop==1)		      		{		  t2[nt2] = k;		  nt2++;		}	      else		{		  t3[nt3] = k;		  nt3++;		} 	  }	for(j=sindex[s][i];j<=eindex[s][i];j++)	  if(inbeam2(j,s,i))	    if(edges[j].stop==0)	      {		join_2_edges_follow(j,s,i,e,t2,nt2);	      }	    else	      {		t5[nt5] = j;		nt5++;	      }	for(k2=0;k2<nt3;k2++)	  {	    k = t3[k2];	    join_2_edges_precede(k,s,i,e,t5,nt5);	  }	if(current->tagnos[i+1]==NT_CC&&i<e-1)	  for(j=sindex[s][i];j<=eindex[s][i];j++)	    if(inbeam2(j,s,i))	      for(k=sindex[i+2][e];k<=eindex[i+2][e];k++)		if(inbeam2(k,i+2,e))		  join_2_edges_cc(j,k,s,i,e);      }    else      {	nt2=nt3=nt5=0;		for(k=sindex[i+1][e];k<=eindex[i+1][e];k++)	  {		    if(inbeam2(k,i+1,e))	      if(edges[k].stop==1)		      		{		  t2[nt2] = k;		  nt2++;		}	      else		{		  t3[nt3] = k;		  nt3++;		} 	  }	for(j=sindex[s][i];j<=eindex[s][i];j++)	  if(inbeam2(j,s,i))	    if(edges[j].stop==0)	      {		;/*		join_2_edges_follow(j,s,i,e,t2,nt2);*/	      }	    else	      {		t5[nt5] = j;		nt5++;	      }	for(k2=0;k2<nt3;k2++)	  {	    k = t3[k2];	    if(edges[k].label == NT_NPB)	      join_2_edges_precede(k,s,i,e,t5,nt5);	  }      }    add_singles_stops(s,e);}int print_best_parse(){  int i,best;  best=-1;  for(i=sindex[0][current->nws_np-1];i<=eindex[0][current->nws_np-1];i++)    if(edges[i].label==43 && edges[i].stop==0)      {/*	printf("prob %g\n",edges[i].prob);	print_edge(i,0);*/	if(best==-1)	  best=i;	else	  if(edges[i].prob>edges[best].prob)	    best=i;      }  if(best!=-1)    {      printf("PROB %d %g %d \n",best,edges[best].prob,edges[best].stop);      print_edge(best,0);      print_edges_flat(best);      printf("\n");    }  return best;}void join_2_edges_cc(int e1,int e2,int s,int m,int e){  int h2_is_verb;  int e1hlabel;  /*e3,d3,are variables for new edges/distances created when    e1 is the head    e3flag = 0/1 if e3 hasn't/has been created  */  edge_type e3;  int e3flag=0;  int dist,subcat,cc,punc;  int wh,ch,th;  int wm,cm,tm;  int p;  int wcc,tcc,wpunc,tpunc;  int i,k;  double prob,combineprob;  combineprob=edges[e1].prob+edges[e2].prob;  if(combineprob<pthresh) return;  if(sindex[s][e]!=-1)    if( combineprob < (bestprobs[s][e]-BEAMPROB) )      return;  h2_is_verb=edges[e2].hasverb;  /* d3 is e1 as head, e2 as modifier          calulate the distance variables and heads - the parent depends on      later calls in this routine  */  onectxt_to_dist_subcat(&edges[e1].rc,&dist,&subcat,0);  punc = current->commaats[m]||current->commaats[m+1];  if(current->commaats[m])    {      wpunc = current->commawords[m];      tpunc = current->commatags[m];    }  else if(current->commaats[m+1])    {      wpunc = current->commawords[m+1];      tpunc = current->commatags[m+1];    }  else    wpunc=tpunc=0;    wh=current->wordnos[edges[e1].head];  th=edges[e1].headtag;  wm=current->wordnos[edges[e2].head];  tm=edges[e2].headtag;  ch=edges[e1].label;  cm=edges[e2].label;  cc=1;  wcc=current->wordnos[m+1];  tcc=current->tagnos[m+1];  /*now for the extend case*/  if(edges[e1].type==0)    e1hlabel=edges[e1].headtag;  else    e1hlabel=edges[edges[e1].headch].label;  if((edges[e1].type==3||edges[e1].type==1)     &&tablef[edges[e1].label][e1hlabel][edges[e2].label]     &&edges[e1].stop==0 && edges[e2].stop==1)    {      p=edges[e1].label;      ch=e1hlabel;      prob=get_dependency_prob_witheffhash(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,wcc,tcc,punc,wpunc,tpunc,&new_hash,&eff_hash)+combineprob;      if(prob>pthresh)	{	  calc_contexts(&edges[e1].lc,&edges[e1].rc,&edges[e2].lc,&edges[e2].rc,&e3.lc,&e3.rc,1,h2_is_verb,edges[e2].label);	  e3.hasverb = edges[e1].hasverb || edges[e2].hasverb;	      	  if(e3flag==0)	    {	      e3.head=edges[e1].head;	      e3.headtag=edges[e1].headtag;	      e3.type=3;	      	      e3flag=1;	    }	  e3.headch=edges[e1].headch;	  e3.prob=prob;	  e3.label=edges[e1].label;	  e3.headlabel=edges[e2].label;	  e3.valid=1;	  e3.stop=0;	  edges[numedges]=e3;	  if((k=add_edge(s,e))!=-1)	    {	      edges[k].numchild=edges[e1].numchild+2;	      edges[k].child1=numchilds;	      for(i=0;i<edges[e1].numchild;i++)		childs[numchilds+i]=childs[edges[e1].child1+i];	      childs[numchilds+edges[k].numchild-2]=sindex[m+1][m+1];	      childs[numchilds+edges[k].numchild-1]=e2;	      numchilds+=edges[k].numchild;	    }	}    }}#define MAXSINGLES 5/* continue to add singles and stops until no more edges are being added */void add_singles_stops(int s,int e){  int i,n;  ADDFLAG=0;  add_singles(s,e,sindex[s][e],eindex[s][e]);  add_traces(s,e);  numadds = &numadds1;  adds = adds1;  numadds1 = 0;  add_stops(s,e,sindex[s][e],eindex[s][e]);/*  printf("ADDS");  for(i=0;i<numadds;i++)    printf(" %d",adds[i]);  printf("\n");*/  n=1;  while(ADDFLAG==1&&n<=MAXSINGLES)    {      ADDFLAG=0;      numadds = &numadds2;      adds = adds2;      numadds2 = 0;      for(i=0;i<numadds1;i++)	add_singles(s,e,adds1[i],adds1[i]);      add_traces(s,e);      numadds = &numadds1;      adds = adds1;      numadds1 = 0;      for(i=0;i<numadds2;i++)	add_stops(s,e,adds2[i],adds2[i]);      n++;    }  numadds1=numadds2=0;}int empty_ctxt(ctxt_type *c){  if(c->np > 0 ||     c->s > 0 ||     c->sbar > 0 ||     c->vp > 0 ||     c->other > 0 ||     c->gap > 0 )    return 0;  return 1;}void add_stops(int s,int e,int si,int ei){  int i,j,k;  int dist,subcat,cc,punc;  int wh,ch,th;  int wm,cm,tm;  int p;  int ec;  int ehlabel;  for(i=si;i<=ei;i++)    if(edges[i].stop==0&&inbeam(i,s,e)&&edges[i].valid==1 &&       empty_ctxt(&edges[i].lc) && empty_ctxt(&edges[i].rc))      {	edges[numedges]=edges[i];	edges[numedges].stop=1;		wm=STOPWORD;	tm=STOPNT;	cm=STOPNT;		cc=0;		ec = childs[edges[i].child1+edges[i].numchild-1];	    		if(edges[i].type==0)	  ehlabel=edges[i].headtag;	else	  ehlabel=edges[edges[i].headch].label;		if(edges[i].label == NT_NPB)	  {	    ehlabel=edges[ec].label;	    wh=current->wordnos[edges[ec].head];	    th=edges[ec].headtag;	    subcat = 0;	    dist = 10;	  }	else	  {	    wh=current->wordnos[edges[i].head];	    th=edges[i].headtag;		    onectxt_to_dist_subcat(&edges[i].rc,&dist,&subcat,0);	  }		punc = 0;	p=edges[i].label;	ch=ehlabel;	/*	printf("EDGE1 %d %g %g %d %d %d ",i,edges[i].prob,edges[i].prob2,edges[i].stop,edges[i].type,edges[i].label);	printf(" L ");	onectxt_to_dist_subcat(&edges[i].lc,&dist2,&subcat2,1);	printf("%d %d",dist2,subcat2);       	printf(" R ");	onectxt_to_dist_subcat(&edges[i].rc,&dist2,&subcat2,0);	printf("%d %d\n",dist2,subcat2);		print_edge(i,0);	printf("\n\n");	printf("PROB1 %g\n",get_dependency_prob_witheffhash(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,punc,&new_hash,&eff_hash));*/		edges[numedges].prob=get_dependency_prob_witheffhash(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,0,0,punc,0,0,&new_hash,&eff_hash)+edges[i].prob;		ec = childs[edges[i].child1];		if(edges[i].type==0)	  ehlabel=edges[i].headtag;	else	  ehlabel=edges[edges[i].headch].label;		if(edges[i].label == NT_NPB)	  {	    ehlabel=edges[ec].label;	    wh=current->wordnos[edges[ec].head];	    th=edges[ec].headtag;	    subcat = 0;	    dist = 110;	  }	else	  {	    wh=current->wordnos[edges[i].head];	    th=edges[i].headtag;		    onectxt_to_dist_subcat(&edges[i].lc,&dist,&subcat,1);	  }		punc = 0;	p=edges[i].label;	ch=ehlabel;	/*	printf("PROB2 %g\n",get_dependency_prob_witheffhash(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,punc,&new_hash,&eff_hash));*/		edges[numedges].prob+=get_dependency_prob_witheffhash(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,0,0,punc,0,0,&new_hash,&eff_hash);		if((k=add_edge(s,e))!=-1) /* edge is added to the chart, so add */	  {			/* its children */	    edges[k].child1=numchilds;	    edges[k].numchild=edges[i].numchild;	    for(j=0;j<edges[i].numchild;j++)	      childs[numchilds+j]=childs[edges[i].child1+j];	    numchilds+=edges[k].numchild;	  }	      }}void calc_prob2(int edge){  int ch,wh,th;  double prob;  if(edges[edge].type==0)    {      edges[edge].prob2 = 0;      return;    }  ch=edges[edge].label;  wh=current->wordnos[edges[edge].head];  th=edges[edge].headtag;  prob= get_prior_prob_witheffhash(ch,wh,th,&new_hash,edges[edge].head,edges[edge].headtag);  edges[edge].prob2 = edges[edge].prob + prob;}int inbeam(int edge,int s,int e){  if(edges[edge].type!=1)    {      if(edges[edge].prob2 < (bestprobs[s][e]-BEAMPROB) )         return 0;    }  else    if(edges[edge].label == NT_NP || edges[edge].label == NT_NPA)      {        if(edges[edge].prob2 < (bestprobs[s][e]-BEAMPROB - 3) )           return 0;      }  else    if(edges[edge].prob2 < (bestprobs[s][e]-BEAMPROB) )         return 0;  return 1;}int inbeam2(int edge,int s,int e){  if(edges[edge].inbeam<=1) return edges[edge].inbeam;  if(edges[edge].type!=1)    {      if(edges[edge].prob2 < (bestprobs[s][e]-BEAMPROB) )         {edges[edge].inbeam=0;return 0;}    }  else    if(edges[edge].label == NT_NP || edges[edge].label == NT_NPA)      {        if(edges[edge].prob2 < (bestprobs[s][e]-BEAMPROB - 3) )           {edges[edge].inbeam=0;return 0;}      }  else    if(edges[edge].prob2 < (bestprobs[s][e]-BEAMPROB) )         {edges[edge].inbeam=0;return 0;}  edges[edge].inbeam=1;  return 1;}void add_traces(int s,int e){  int i,j,k,si,ei;  char *tl;  int ehlabel;  int dist,subcat,cc,punc;  int wh,ch,th;  int wm,cm,tm;  int p;  int ec;    si=sindex[s][e];  ei=eindex[s][e];  for(i=si;i<=ei;i++)    if(edges[i].rc.gap>0 && edges[i].rc.np>0 &&       edges[i].stop==0&&inbeam(i,s,e)&&edges[i].valid==1)      {	edges[numedges]=edges[i];	/*	onectxt_to_dist_subcat(&edges[i].rc,&dist,0,1,e<current->nws_np-1&&current->commaats[e],0);*/	edges[numedges].rc.gap--;	edges[numedges].rc.np--;	edges[numedges].rc.adj=0;	wm=TRACEWORD;	tm=TRACETAG;	cm=TRACENT;		cc=0;		ec = childs[edges[i].child1+edges[i].numchild-1];	    		if(edges[i].type==0)	  ehlabel=edges[i].headtag;	else	  ehlabel=edges[edges[i].headch].label;		if(edges[i].label == NT_NPB)	  {	    ehlabel=edges[ec].label;	    wh=current->wordnos[edges[ec].head];	    th=edges[ec].headtag;	    subcat = 0;	    dist = 10;	  }	else	  {	    wh=current->wordnos[edges[i].head];	    th=edges[i].headtag;		    onectxt_to_dist_subcat(&edges[i].rc,&dist,&subcat,0);	  }		punc = 0;	p=edges[i].label;	ch=ehlabel;	edges[numedges].prob=get_dependency_prob_witheffhash(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,0,0,punc,0,0,&new_hash,&eff_hash)+edges[i].prob;	    	if((k=add_edge(s,e))!=-1) /* edge is added to the chart, so add */	  {			  /* its children */	    edges[k].child1=numchilds;	    edges[k].numchild=edges[i].numchild+1;	    for(j=0;j<edges[i].numchild;j++)	      childs[numchilds+j]=childs[edges[i].child1+j];	    childs[numchilds+edges[i].numchild]=-1;	    numchilds+=edges[k].numchild;	  }      }    else if(edges[i].lc.gap>0 && edges[i].lc.np>0 &&       edges[i].stop==0&&inbeam(i,s,e)&&edges[i].valid==1)      {	edges[numedges]=edges[i];	edges[numedges].lc.gap--;	edges[numedges].lc.np--;	edges[numedges].lc.adj=0;	wm=TRACEWORD;	tm=TRACETAG;	cm=TRACENT;	cc = 0;	ec = childs[edges[i].child1];		if(edges[i].type==0)	  ehlabel=edges[i].headtag;	else	  ehlabel=edges[edges[i].headch].label;		if(edges[i].label == NT_NPB)	  {	    ehlabel=edges[ec].label;	    wh=current->wordnos[edges[ec].head];	    th=edges[ec].headtag;	    subcat = 0;	    dist = 110;	  }	else	  {	    wh=current->wordnos[edges[i].head];	    th=edges[i].headtag;		    onectxt_to_dist_subcat(&edges[i].lc,&dist,&subcat,1);	  }		punc = 0;	p=edges[i].label;	ch=ehlabel;		edges[numedges].prob=get_dependency_prob_witheffhash(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,0,0,punc,0,0,&new_hash,&eff_hash)+edges[i].prob;	    	if((k=add_edge(s,e))!=-1) /* edge is added to the chart, so add */	  {			/* its children */	    edges[k].child1=numchilds;	    edges[k].numchild=edges[i].numchild+1;	    for(j=0;j<edges[i].numchild;j++)	      childs[numchilds+j+1]=childs[edges[i].child1+j];/*	    childs[numchilds]=childs[edges[i].child1];*/	    childs[numchilds]=-1;	    numchilds+=edges[k].numchild;	  }      }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -