📄 prob.c
字号:
/* This code is the statistical natural language parser described in M. Collins. 1999. Head-Driven Statistical Models for Natural Language Parsing. PhD Dissertation, University of Pennsylvania. Copyright (C) 1999 Michael Collins This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/#include <assert.h>#include "prob.h"#define START_NT 43/*dependency parameters: D1 = generating nt,tag, D2 = generating word*/#define D1PROBTYPE 0#define D1OLEN 4int D1BACKOFFS[] = {3,9,7,6};#define D2PROBTYPE 1#define D2OLEN 2int D2BACKOFFS[] = {3,13,11,1};int D2BACKOFFS_NOTAG[] = {2,13,11};/*unary parameters */#define UPROBTYPE 2#define UOLEN 1int UBACKOFFS[] = {3,4,2,1};/*start parameters: S1 = generating nt,tag, S2 = generating word*/#define S1PROBTYPE 3#define S1OLEN 2int S1BACKOFFS[] = {1,1};#define S2PROBTYPE 4#define S2OLEN 2int S2BACKOFFS[] = {2,3,1};int S2BACKOFFS_NOTAG[] = {1,3};/*subcategorisation parameters */#define SCPROBTYPE 5#define SCOLEN 3int SCBACKOFFS[] = {3,6,4,3};/*prior parameters: P1 = generating nt,tag, P2 = generating word*/#define P1PROBTYPE 6#define P1OLEN 1int P1BACKOFFS[] = {3,4,2,1};#define P2PROBTYPE 7#define P2OLEN 3int P2BACKOFFS[] = {1,1};/*coordination/punc parameters: CP1 generates tag, CP2 generates the word*/#define CP1PROBTYPE 8#define CP1OLEN 1int CP1BACKOFFS[] = {3,9,6,1};#define CP2PROBTYPE 9#define CP2OLEN 2int CP2BACKOFFS[] = {3,10,7,1};int CP2BACKOFFS_NOTAG[] = {2,10,7};/*gap parameters */#define GPROBTYPE 10#define GOLEN 1int GBACKOFFS[] = {3,5,3,2};/*make string for P(cm,t | context) */void make_dep1_string(unsigned char *string,int wm,int tm,int cm,int wh,int th,int p,int ch,int dist,int subcat,int cc,int punc);/*make string for P(wm | cm,t, context) */void make_dep2_string(unsigned char *string,int wm,int tm,int cm,int wh,int th,int p,int ch,int dist,int subcat,int cc,int punc);/*make string for P(ch | p,th,wh) */void make_unary_string(unsigned char *string,int ch,int wh,int th,int p);/*make string for P(ch,th | p==TOP) */void make_s1_string(unsigned char *string,int ch,int wh,int th,int p);/*make string for P(wh | ch,th,p==TOP) */void make_s2_string(unsigned char *string,int ch,int wh,int th,int p);/*make string for P(subcat | ch,th,p,dir) */void make_subcat_string(unsigned char *string,int subcat,int ch,int wh,int th,int p,int dir);/*make strings for P(ch,th | anything) P(wh | ch,th,anything)*/void make_prior1_string(unsigned char *string,int ch,int wh,int th);void make_prior2_string(unsigned char *string,int ch,int wh,int th);/*make strings for P(ccword,cctag | ...) or P(pword,ptag | ...) type is 0 for coordination 1 for punctuation */void make_pcc1_string(unsigned char *string,int t,int p,int ch,int th,int wh,int cm,int tm,int wm,int type);void make_pcc2_string(unsigned char *string,int w,int t,int p,int ch,int th,int wh,int cm,int tm,int wm,int type);/*make string for P(gap | wh,ch,th,p) */void make_gap_string(unsigned char *string,int gap,int ch,int wh,int th,int p);void add_dependency_counts(int wm,int tm,int cm,int wh,int th,int p,int ch,int dist,int subcat,int cc,int wcc,int tcc,int punc,int wpunc,int tpunc,hash_table *hash){ unsigned char buffer[1000]; if(cm != STOPNT) add_prior_counts(cm,wm,tm,hash); wm = fwords[wm]; p = argmap[p]; ch = gapmap[ch]; make_dep1_string(buffer,wm,tm,cm,wh,th,p,ch,dist,subcat,cc,punc); add_counts(buffer,D1OLEN,D1BACKOFFS,D1PROBTYPE,hash); make_dep2_string(buffer,wm,tm,cm,wh,th,p,ch,dist,subcat,cc,punc); add_counts(buffer,D2OLEN,D2BACKOFFS_NOTAG,D2PROBTYPE,hash); if(cc==1) { make_pcc1_string(buffer,tcc,p,ch,th,wh,cm,tm,wm,0); add_counts(buffer,CP1OLEN,CP1BACKOFFS,CP1PROBTYPE,hash); make_pcc2_string(buffer,wcc,tcc,p,ch,th,wh,cm,tm,wm,0); add_counts(buffer,CP2OLEN,CP2BACKOFFS_NOTAG,CP2PROBTYPE,hash); } if(punc==1) { make_pcc1_string(buffer,tpunc,p,ch,th,wh,cm,tm,wm,1); add_counts(buffer,CP1OLEN,CP1BACKOFFS,CP1PROBTYPE,hash); make_pcc2_string(buffer,wpunc,tpunc,p,ch,th,wh,cm,tm,wm,1); add_counts(buffer,CP2OLEN,CP2BACKOFFS_NOTAG,CP2PROBTYPE,hash); }}double get_dependency_prob(int wm,int tm,int cm,int wh,int th,int p,int ch,int dist,int subcat,int cc,int wcc,int tcc,int punc,int wpunc,int tpunc,hash_table *hash){ unsigned char buffer[1000]; double p1,p2,p3,p4; wm = fwords[wm]; p = argmap[p]; ch = gapmap[ch]; make_dep1_string(buffer,wm,tm,cm,wh,th,p,ch,dist,subcat,cc,punc); p1 = get_prob(buffer,D1OLEN,D1BACKOFFS,D1PROBTYPE,0,5,hash); if(cm != STOPNT) { make_dep2_string(buffer,wm,tm,cm,wh,th,p,ch,dist,subcat,cc,punc); p2 = get_prob(buffer,D2OLEN,D2BACKOFFS,D2PROBTYPE,0,5,hash); } else p2 = 1; if(cc==1) { make_pcc1_string(buffer,tcc,p,ch,th,wh,cm,tm,wm,0); p3 = get_prob(buffer,CP1OLEN,CP1BACKOFFS,CP1PROBTYPE,0,5,hash);/* printf("P3 %g ",p3);*/ make_pcc2_string(buffer,wcc,tcc,p,ch,th,wh,cm,tm,wm,0); p3 *= get_prob(buffer,CP2OLEN,CP2BACKOFFS,CP2PROBTYPE,0,5,hash);/* printf("%g\n",p3);*/ } else p3 = 1; if(punc==1) { make_pcc1_string(buffer,tpunc,p,ch,th,wh,cm,tm,wm,1); p4 = get_prob(buffer,CP1OLEN,CP1BACKOFFS,CP1PROBTYPE,0,5,hash);/* printf("P4 %g ",p4);*/ make_pcc2_string(buffer,wpunc,tpunc,p,ch,th,wh,cm,tm,wm,1); p4 *= get_prob(buffer,CP2OLEN,CP2BACKOFFS,CP2PROBTYPE,0,5,hash);/* printf("%g\n",p4);*/ } else p4 = 1;/* printf("DEP %d %d %d %d %d %d %d %d %d %d %d %g %g %g\n", wm,tm,cm, wh,th,p, ch,dist,subcat,cc,punc,p1,p2,log(p1*p2));*/ return log(p1*p2*p3*p4);}void add_prior_counts(int ch,int wh,int th,hash_table *hash){ unsigned char buffer[1000]; ch = gapmap[ch]; wh = fwords[wh]; make_prior1_string(buffer,ch,wh,th); add_counts(buffer,P1OLEN,P1BACKOFFS,P1PROBTYPE,hash); make_prior2_string(buffer,ch,wh,th); add_counts(buffer,P2OLEN,P2BACKOFFS,P2PROBTYPE,hash);}double get_prior_prob(int ch,int wh,int th,hash_table *hash){ unsigned char buffer[1000]; double p1,p2;/* printf("PRIOR %d %d %d\n",ch,wh,th);*/ ch = gapmap[ch]; wh = fwords[wh]; make_prior1_string(buffer,ch,wh,th); p1=get_prob(buffer,P1OLEN,P1BACKOFFS,P1PROBTYPE,0,5,hash); make_prior2_string(buffer,ch,wh,th); p2=get_prob(buffer,P2OLEN,P2BACKOFFS,P2PROBTYPE,1,0,hash); return log(p1*p2);}void add_unary_counts(int ch,int wh,int th,int p,hash_table *hash){ unsigned char buffer[1000]; add_prior_counts(ch,wh,th,hash); if(p==START_NT) { wh = fwords[wh]; make_s1_string(buffer,ch,wh,th,p); add_counts(buffer,S1OLEN,S1BACKOFFS,S1PROBTYPE,hash); make_s2_string(buffer,ch,wh,th,p); add_counts(buffer,S2OLEN,S2BACKOFFS_NOTAG,S2PROBTYPE,hash); } else { make_unary_string(buffer,ch,wh,th,p); add_counts(buffer,UOLEN,UBACKOFFS,UPROBTYPE,hash); }}double get_unary_prob(int ch,int wh,int th,int p,hash_table *hash){ unsigned char buffer[1000]; double p1,p2;/* printf("UNARY %d %d %d %d\n",ch,wh,th,p);*/ if(p==START_NT) { wh = fwords[wh]; make_s1_string(buffer,ch,wh,th,p); p1=get_prob(buffer,S1OLEN,S1BACKOFFS,S1PROBTYPE,0,5,hash); make_s2_string(buffer,ch,wh,th,p); p2=get_prob(buffer,S2OLEN,S2BACKOFFS,S2PROBTYPE,0,5,hash); return log(p1*p2); } else { make_unary_string(buffer,ch,wh,th,p); return log(get_prob(buffer,UOLEN,UBACKOFFS,UPROBTYPE,0,5,hash)); }}/*subcat: dir=0 means left, dir=1 means right*/void add_subcat_counts(int subcat,int ch,int wh,int th,int p,int dir,hash_table *hash){ unsigned char buffer[1000]; p=argmap[p]; ch=argmap[ch]; make_subcat_string(buffer,subcat,ch,wh,th,p,dir); add_counts(buffer,SCOLEN,SCBACKOFFS,SCPROBTYPE,hash);}double get_subcat_prob(int subcat,int ch,int wh,int th,int p,int dir,hash_table *hash){ unsigned char buffer[1000]; p=argmap[p]; ch=argmap[ch]; make_subcat_string(buffer,subcat,ch,wh,th,p,dir); return log(get_prob(buffer,SCOLEN,SCBACKOFFS,SCPROBTYPE,5,0,hash));}void add_gap_counts(int gap,int ch,int wh,int th,int p,hash_table *hash){ unsigned char buffer[1000]; p=argmap[p]; ch=argmap[ch]; make_gap_string(buffer,gap,ch,wh,th,p); add_counts(buffer,GOLEN,GBACKOFFS,GPROBTYPE,hash);}double get_gap_prob(int gap,int ch,int wh,int th,int p,hash_table *hash){ unsigned char buffer[1000]; p=argmap[p]; ch=argmap[ch]; make_gap_string(buffer,gap,ch,wh,th,p); return log(get_prob(buffer,GOLEN,GBACKOFFS,GPROBTYPE,5,0,hash));}/*make string for P(cm,t | context) position element 3 tm 4 cm
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -