edges.h
来自「中心词驱动的短语结构句法分析器。该模型考虑了跟随介词短语的名词短语的中心词的作用」· C头文件 代码 · 共 181 行
H
181 行
/* This code is the statistical natural language parser described in M. Collins. 1999. Head-Driven Statistical Models for Natural Language Parsing. PhD Dissertation, University of Pennsylvania. Copyright (C) 1999 Michael Collins This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/#ifndef EDGES_H#define EDGES_H#include <stdio.h>#include <string.h>#include <math.h>#include <stdlib.h>#include <time.h>#include "grammar.h"/* the basic context type nvs - number of verbs adj - 0/1 if head isn't/is at the end of the edge np/s/sbar/vp/other/gap are number of np/s/sbar/vp/other complements/gaps in the subcat bag*/typedef struct { char nvs; char adj; char np; char s; char sbar; char vp; char other; char gap;} ctxt_type;/* basic data structure for an edge head - an index into the sentence being parsed, identifying the head word headtag - the tag of the head word (the actual value, _not_ a pointer into the sentence) headch - an index into the edges,identifying edge which is the head. if type=0, headch=-1 prob - the probability of the edge prob2 - prob with added 'prior' prob cost child1 - index into childsarray, the first child of the non-terminal numchild - number of children next - next edge of same nt, start,end in the chart; we store a linked list of edges with the same non-terminal for efficiency label - non-terminal label of the edge headlabel - label of the head child (needed for dynamic programming) lc/rc - contexts to the left and right of the head hasverb - 1 if the edge contains a verb somewhere in its surface string (but not counting verbs within basenps) type - 0=POS tag, 1=non-terminal which is not a POS, 3 for a unary valid - 0/1 for false/true. Only a valid edge can be built on, a non-valid edge can be extended though stop - 0/1 if edge doesn't/does have the stop probability added inbeam - 2 means it's not yet calculate whether the edge is in the beam or not, 1/0 means it is/isn't in the beam (2 value is for efficiency reasons, effectively we make sure the value of inbeam is only calculated once)*/typedef struct { int head; char headtag; int headch; int child1; char numchild; int next; double prob; double prob2; char label; ctxt_type lc; ctxt_type rc; int hasverb; char type; char valid; char stop; char headlabel; char inbeam;} edge_type;/*returns 1 if contexts are equal as far as dynamic programming is concerned*/int equal_ctxts(ctxt_type *c1,ctxt_type *c2);/*returns 1 if edges are equal as far as dynamic programming is concerned*/int equal_edges(edge_type *e1,edge_type *e2);/* calc_contexts is used when joining two edges in the chart, e1 e2 -> e3 to calculate the left and right contexts of the new edge e3 l1,r1 l2,r2 are left and right contexts of the two edges being joined l3,r3 will be the left and right contexts of the edge resulting from the join head==1 if edge1 contributes the head, 2 if edge2 contributes the head verb==1 if the modifying edge has a verb as its head modnt is the label of the modifying non-terminal, used to calculate the subcat frame of the new edge (any complements must be removed from the subcat requirements) */void calc_contexts(ctxt_type *l1,ctxt_type *r1,ctxt_type *l2,ctxt_type *r2,ctxt_type *l3,ctxt_type *r3,char head,char verb,int modnt);/* onectxt_to_dist_subcat converts a context to a distance and subcat value which can be used in probability calculations l is the subcat frame drc is 1 if direction is preceding, 0 for following this function converts l into the distance "dist" and "subcat" value given the context and direction*/void onectxt_to_dist_subcat(ctxt_type *l,int *dist,int *subcat,char drc);/* puts all the subcat values to 0 for c*/void clear_args(ctxt_type *c);/* converts an integer subcat value "sub" to a context "c" */void find_args(ctxt_type *c,int sub);#endif
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?