⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 prob.c

📁 中心词驱动的短语结构句法分析器。该模型考虑了跟随介词短语的名词短语的中心词的作用。 有MIT大学Colling开发
💻 C
📖 第 1 页 / 共 2 页
字号:
/* This code is the statistical natural language parser described in   M. Collins. 1999.  Head-Driven   Statistical Models for Natural Language Parsing. PhD Dissertation,   University of Pennsylvania.   Copyright (C) 1999 Michael Collins    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/#include <assert.h>#include "prob.h"#define START_NT 43/*dependency parameters: D1 = generating nt,tag, D2 = generating word*/#define D1PROBTYPE 0#define D1OLEN 4int D1BACKOFFS[] = {3,9,7,6};#define D2PROBTYPE 1#define D2OLEN 2int D2BACKOFFS[] = {3,13,11,1};int D2BACKOFFS_NOTAG[] = {2,13,11};/*unary parameters */#define UPROBTYPE 2#define UOLEN 1int UBACKOFFS[] = {3,4,2,1};/*start parameters: S1 = generating nt,tag, S2 = generating word*/#define S1PROBTYPE 3#define S1OLEN 2int S1BACKOFFS[] = {1,1};#define S2PROBTYPE 4#define S2OLEN 2int S2BACKOFFS[] = {2,3,1};int S2BACKOFFS_NOTAG[] = {1,3};/*subcategorisation parameters */#define SCPROBTYPE 5#define SCOLEN 3int SCBACKOFFS[] = {3,6,4,3};/*prior parameters: P1 = generating nt,tag, P2 = generating word*/#define P1PROBTYPE 6#define P1OLEN 1int P1BACKOFFS[] = {3,4,2,1};#define P2PROBTYPE 7#define P2OLEN 3int P2BACKOFFS[] = {1,1};/*coordination/punc parameters: CP1 generates tag, CP2 generates the word*/#define CP1PROBTYPE 8#define CP1OLEN 1int CP1BACKOFFS[] = {3,9,6,1};#define CP2PROBTYPE 9#define CP2OLEN 2int CP2BACKOFFS[] = {3,10,7,1};int CP2BACKOFFS_NOTAG[] = {2,10,7};/*gap parameters */#define GPROBTYPE 10#define GOLEN 1int GBACKOFFS[] = {3,5,3,2};/*make string for P(cm,t | context) */void make_dep1_string(unsigned char *string,int wm,int tm,int cm,int wh,int th,int p,int ch,int dist,int subcat,int cc,int punc);/*make string for P(wm | cm,t, context) */void make_dep2_string(unsigned char *string,int wm,int tm,int cm,int wh,int th,int p,int ch,int dist,int subcat,int cc,int punc);/*make string for P(ch | p,th,wh) */void make_unary_string(unsigned char *string,int ch,int wh,int th,int p);/*make string for P(ch,th | p==TOP) */void make_s1_string(unsigned char *string,int ch,int wh,int th,int p);/*make string for P(wh | ch,th,p==TOP) */void make_s2_string(unsigned char *string,int ch,int wh,int th,int p);/*make string for P(subcat | ch,th,p,dir) */void make_subcat_string(unsigned char *string,int subcat,int ch,int wh,int th,int p,int dir);/*make strings for P(ch,th | anything) P(wh | ch,th,anything)*/void make_prior1_string(unsigned char *string,int ch,int wh,int th);void make_prior2_string(unsigned char *string,int ch,int wh,int th);/*make strings for P(ccword,cctag | ...) or P(pword,ptag | ...)  type is 0 for coordination 1 for punctuation */void make_pcc1_string(unsigned char *string,int t,int p,int ch,int th,int wh,int cm,int tm,int wm,int type);void make_pcc2_string(unsigned char *string,int w,int t,int p,int ch,int th,int wh,int cm,int tm,int wm,int type);/*make string for P(gap | wh,ch,th,p) */void make_gap_string(unsigned char *string,int gap,int ch,int wh,int th,int p);void add_dependency_counts(int wm,int tm,int cm,int wh,int th,int p,int ch,int dist,int subcat,int cc,int wcc,int tcc,int punc,int wpunc,int tpunc,hash_table *hash){  unsigned char buffer[1000];  if(cm != STOPNT)    add_prior_counts(cm,wm,tm,hash);  wm = fwords[wm];  p  = argmap[p];  ch = gapmap[ch];  make_dep1_string(buffer,wm,tm,cm,wh,th,p,ch,dist,subcat,cc,punc);  add_counts(buffer,D1OLEN,D1BACKOFFS,D1PROBTYPE,hash);  make_dep2_string(buffer,wm,tm,cm,wh,th,p,ch,dist,subcat,cc,punc);  add_counts(buffer,D2OLEN,D2BACKOFFS_NOTAG,D2PROBTYPE,hash);  if(cc==1)    {      make_pcc1_string(buffer,tcc,p,ch,th,wh,cm,tm,wm,0);      add_counts(buffer,CP1OLEN,CP1BACKOFFS,CP1PROBTYPE,hash);      make_pcc2_string(buffer,wcc,tcc,p,ch,th,wh,cm,tm,wm,0);      add_counts(buffer,CP2OLEN,CP2BACKOFFS_NOTAG,CP2PROBTYPE,hash);    }  if(punc==1)    {      make_pcc1_string(buffer,tpunc,p,ch,th,wh,cm,tm,wm,1);      add_counts(buffer,CP1OLEN,CP1BACKOFFS,CP1PROBTYPE,hash);      make_pcc2_string(buffer,wpunc,tpunc,p,ch,th,wh,cm,tm,wm,1);      add_counts(buffer,CP2OLEN,CP2BACKOFFS_NOTAG,CP2PROBTYPE,hash);    }}double get_dependency_prob(int wm,int tm,int cm,int wh,int th,int p,int ch,int dist,int subcat,int cc,int wcc,int tcc,int punc,int wpunc,int tpunc,hash_table *hash){  unsigned char buffer[1000];  double p1,p2,p3,p4;  wm = fwords[wm];  p  = argmap[p];  ch = gapmap[ch];  make_dep1_string(buffer,wm,tm,cm,wh,th,p,ch,dist,subcat,cc,punc);  p1 = get_prob(buffer,D1OLEN,D1BACKOFFS,D1PROBTYPE,0,5,hash);  if(cm != STOPNT)    {      make_dep2_string(buffer,wm,tm,cm,wh,th,p,ch,dist,subcat,cc,punc);      p2 = get_prob(buffer,D2OLEN,D2BACKOFFS,D2PROBTYPE,0,5,hash);    }  else    p2 = 1;  if(cc==1)    {      make_pcc1_string(buffer,tcc,p,ch,th,wh,cm,tm,wm,0);      p3 = get_prob(buffer,CP1OLEN,CP1BACKOFFS,CP1PROBTYPE,0,5,hash);/*      printf("P3 %g ",p3);*/      make_pcc2_string(buffer,wcc,tcc,p,ch,th,wh,cm,tm,wm,0);      p3 *= get_prob(buffer,CP2OLEN,CP2BACKOFFS,CP2PROBTYPE,0,5,hash);/*      printf("%g\n",p3);*/    }  else    p3 = 1;  if(punc==1)    {      make_pcc1_string(buffer,tpunc,p,ch,th,wh,cm,tm,wm,1);      p4 = get_prob(buffer,CP1OLEN,CP1BACKOFFS,CP1PROBTYPE,0,5,hash);/*      printf("P4 %g ",p4);*/      make_pcc2_string(buffer,wpunc,tpunc,p,ch,th,wh,cm,tm,wm,1);      p4 *= get_prob(buffer,CP2OLEN,CP2BACKOFFS,CP2PROBTYPE,0,5,hash);/*      printf("%g\n",p4);*/    }  else    p4 = 1;/*  printf("DEP %d %d %d %d %d %d %d %d %d %d %d %g %g %g\n",	 wm,tm,cm,	 wh,th,p,	 ch,dist,subcat,cc,punc,p1,p2,log(p1*p2));*/  return log(p1*p2*p3*p4);}void add_prior_counts(int ch,int wh,int th,hash_table *hash){  unsigned char buffer[1000];  ch = gapmap[ch];  wh = fwords[wh];    make_prior1_string(buffer,ch,wh,th);  add_counts(buffer,P1OLEN,P1BACKOFFS,P1PROBTYPE,hash);    make_prior2_string(buffer,ch,wh,th);  add_counts(buffer,P2OLEN,P2BACKOFFS,P2PROBTYPE,hash);}double get_prior_prob(int ch,int wh,int th,hash_table *hash){  unsigned char buffer[1000];  double p1,p2;/*  printf("PRIOR %d %d %d\n",ch,wh,th);*/  ch = gapmap[ch];  wh = fwords[wh];    make_prior1_string(buffer,ch,wh,th);  p1=get_prob(buffer,P1OLEN,P1BACKOFFS,P1PROBTYPE,0,5,hash);    make_prior2_string(buffer,ch,wh,th);  p2=get_prob(buffer,P2OLEN,P2BACKOFFS,P2PROBTYPE,1,0,hash);  return log(p1*p2);}void add_unary_counts(int ch,int wh,int th,int p,hash_table *hash){  unsigned char buffer[1000];  add_prior_counts(ch,wh,th,hash);  if(p==START_NT)    {      wh = fwords[wh];      make_s1_string(buffer,ch,wh,th,p);      add_counts(buffer,S1OLEN,S1BACKOFFS,S1PROBTYPE,hash);      make_s2_string(buffer,ch,wh,th,p);      add_counts(buffer,S2OLEN,S2BACKOFFS_NOTAG,S2PROBTYPE,hash);    }  else    {      make_unary_string(buffer,ch,wh,th,p);      add_counts(buffer,UOLEN,UBACKOFFS,UPROBTYPE,hash);    }}double get_unary_prob(int ch,int wh,int th,int p,hash_table *hash){  unsigned char buffer[1000];  double p1,p2;/*  printf("UNARY %d %d %d %d\n",ch,wh,th,p);*/  if(p==START_NT)    {      wh = fwords[wh];      make_s1_string(buffer,ch,wh,th,p);      p1=get_prob(buffer,S1OLEN,S1BACKOFFS,S1PROBTYPE,0,5,hash);      make_s2_string(buffer,ch,wh,th,p);      p2=get_prob(buffer,S2OLEN,S2BACKOFFS,S2PROBTYPE,0,5,hash);      return log(p1*p2);    }  else    {      make_unary_string(buffer,ch,wh,th,p);      return log(get_prob(buffer,UOLEN,UBACKOFFS,UPROBTYPE,0,5,hash));    }}/*subcat: dir=0 means left, dir=1 means right*/void add_subcat_counts(int subcat,int ch,int wh,int th,int p,int dir,hash_table *hash){  unsigned char buffer[1000];  p=argmap[p];  ch=argmap[ch];  make_subcat_string(buffer,subcat,ch,wh,th,p,dir);  add_counts(buffer,SCOLEN,SCBACKOFFS,SCPROBTYPE,hash);}double get_subcat_prob(int subcat,int ch,int wh,int th,int p,int dir,hash_table *hash){  unsigned char buffer[1000];  p=argmap[p];  ch=argmap[ch];  make_subcat_string(buffer,subcat,ch,wh,th,p,dir);  return log(get_prob(buffer,SCOLEN,SCBACKOFFS,SCPROBTYPE,5,0,hash));}void add_gap_counts(int gap,int ch,int wh,int th,int p,hash_table *hash){  unsigned char buffer[1000];  p=argmap[p];  ch=argmap[ch];  make_gap_string(buffer,gap,ch,wh,th,p);  add_counts(buffer,GOLEN,GBACKOFFS,GPROBTYPE,hash);}double get_gap_prob(int gap,int ch,int wh,int th,int p,hash_table *hash){  unsigned char buffer[1000];  p=argmap[p];  ch=argmap[ch];  make_gap_string(buffer,gap,ch,wh,th,p);  return log(get_prob(buffer,GOLEN,GBACKOFFS,GPROBTYPE,5,0,hash));}/*make string for P(cm,t | context)   position    element  3           tm  4           cm

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -