⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 svm_struct_api.c

📁 SVMcfg: Learns a weighted context free grammar from examples. Training examples (e.g. for natural la
💻 C
📖 第 1 页 / 共 4 页
字号:

void        write_label(FILE *fp, LABEL y)
{
  /* Writes label y to file handle fp. */
  tree t = remove_parent_annotation(y.parse, y.si); 
  if(y.parse)
    write_tree(fp, t, y.si);
  else 
    fprintf(fp,"NULL");
  fprintf(fp,"\n");
  free_tree(t);
} 

void        free_pattern(PATTERN x) {
  /* Frees the memory of x. */
  free(x.sentence.e);
}

void        free_label(LABEL y) {
  /* Frees the memory of y. */
  if(y.parse) free_tree(y.parse);
}

void        free_struct_model(STRUCTMODEL sm) 
{
  /* Frees the memory of model. */
  /* if(sm.w) free(sm.w); */ /* this is free'd in free_model */
  if(sm.svm_model) free_model(sm.svm_model,1);
  /* add free calls for user defined data here */
  free_grammar(sm.grammar);
  free_vihashl(sm.weightid_ht);
  si_free(sm.si);
}

void        free_struct_sample(SAMPLE s)
{
  /* Frees the memory of sample s. */
  int i;
  for(i=0;i<s.n;i++) { 
    free_pattern(s.examples[i].x);
    free_label(s.examples[i].y);
  }
  free(s.examples);
}

void        print_struct_help()
{
  /* Prints a help text that is appended to the common help text of
     svm_struct_learn. */
  printf("         --s [1..]   -> Maximum length of sentences to use (default 10)\n");
  printf("         --p {0,1}   -> Use parent annotation for rules (default 0)\n");
  printf("         --a {0,1}   -> Use borders as feature (default 0)\n");
  printf("         --b {0,1}   -> Use length of parent span as feature (default 0)\n");
  printf("         --c {0,1}   -> Use length of childrens spans as features (default 0)\n");
  printf("         --d {0,1}   -> Use difference between children spans as feature\n");
  printf("                        (default 0)\n");
  printf("         -l {0,1}    NOTE: Added loss type '1' in addition to the 0/1-loss '0'.\n");
  printf("                     Loss type '1' uses (1-F_1) as the loss as in [2].\n");
  printf("\n");
  printf("The CKY parser is based on the implementation of Mark Johnson described in:\n");
  printf("   Johnson, M. (1999). PCFG models of linguistic tree representations.\n");
  printf("   Computational Linguistics.\n");
}
 
void         parse_struct_parameters(STRUCT_LEARN_PARM *sparm)
{
  /* Parses the command line parameters that start with -- */
  int i;

  sparm->maxsentlen=10;
  sparm->parent_annotation=0;
  sparm->feat_borders=0;
  sparm->feat_parent_span_length=0;
  sparm->feat_children_span_length=0;
  sparm->feat_diff_children_length=0;
  sparm->si=NULL;

  for(i=0;(i<sparm->custom_argc) && ((sparm->custom_argv[i])[0] == '-');i++) {
    switch ((sparm->custom_argv[i])[2]) 
      { 
      case 's': i++; sparm->maxsentlen=atol(sparm->custom_argv[i]); break;
      case 'p': i++; sparm->parent_annotation=atol(sparm->custom_argv[i]); break;
      case 'a': i++; sparm->feat_borders=atol(sparm->custom_argv[i]); break;
      case 'b': i++; sparm->feat_parent_span_length=atol(sparm->custom_argv[i]); break;
      case 'c': i++; sparm->feat_children_span_length=atol(sparm->custom_argv[i]); break;
      case 'd': i++; sparm->feat_diff_children_length=atol(sparm->custom_argv[i]); break;
      default: printf("\nUnrecognized option %s!\n\n",sparm->custom_argv[i]);
	       exit(0);
      }
  }
}

void        print_struct_help_classify()
{
  /* Prints a help text that is appended to the common help text of
     svm_struct_classify. */
  printf("         --* string -> custom parameters that can be adapted for struct\n");
  printf("                       learning. The * can be replaced by any character\n");
  printf("                       and there can be multiple options starting with --.\n");
}

void         parse_struct_parameters_classify(char *attribute, char *value)
{
  /* Parses one command line parameters that start with -- . The name
     of the parameter is given in attribute, the value is given in
     value. */

  switch (attribute[2]) 
    { 
      /* case 'x': strcpy(xvalue,value); break; */
      default: printf("\nUnrecognized option %s!\n\n",attribute);
	       exit(0);
    }
}

/* ---------------------------------------------------------------- */
  
SVECTOR *collect_phi(bintree parse, STRUCTMODEL *sm, 
		     size_t lpos, size_t *rpos, size_t start, size_t end)
{
  SVECTOR *vecleft=NULL, *vecright=NULL, *sum, *vec;
  long    weightid, mpos=0;
  vindex  vi;

  vi=make_vindex(4);
  vi->e[0]=parse->label;
  vi->n=1;

  if(parse->left) {   /* construct binary rule from tree */
    vi->e[1]=parse->left->label;
    vi->n=2;
    vecleft=collect_phi(parse->left,sm,lpos,rpos,start,end);
    if(parse->right) {
      vi->e[2]=parse->right->label;
      vi->n=3;
      mpos=*rpos;
      vecright=collect_phi(parse->right,sm,mpos,rpos,start,end);
    }
  }
  else {
    *rpos=lpos+1;
  }

  if(parse->left && parse->right) { /* binary rule */
    weightid=vihashl_ref(sm->weightid_ht,vi);
    assert(weightid);
    vec=phi_brule(weightid,parse->left,parse->right,sm,lpos,mpos,
		  *rpos,start,end);
    vec->next=vecleft;                /* prepend vec to vec of left tree */
    append_svector_list(vec,vecright);/* concatenate all for preorder list */
    sum=vec;
  }
  else if(parse->left) {           /* unary rule */
    weightid=vihashl_ref(sm->weightid_ht,vi);
    assert(weightid);
    vec=phi_urule(weightid,parse->left,sm,lpos,*rpos,start,end);
    vec->next=vecleft;                /* prepend vec to vec of left tree */
    sum=vec;
  }
  else {                           /* terminal */
    sum=NULL;
  }

  vindex_free(vi);

  return(sum);
}


SVECTOR *phi_urule(long weightid, bintree child, STRUCTMODEL *sm, 
		   int lpos, int rpos, int start, int end)
{
  WORD    *feat;
  SVECTOR *vec;
  long    basefnum;
  long    pos;
  WORD    empty[2];

  assert(weightid != 0);
  if(weightid <= 0) {
    empty[0].wnum=0;
    return(create_svector(empty,"",1.0));
  }

  basefnum=weightid;
  feat=(WORD *)MALLOC(sizeof(WORD)*MAXFEAT);
  feat[0].wnum=0;
  pos=0;
  
  /* base feature */
  pos=add_feature(feat,pos,basefnum+0,1.0);

  /* borders beginning or end */
  if(sm->sparm->feat_borders) {
    if(lpos == start) 
      pos=add_feature(feat,pos,basefnum+1,1.0);
    if(rpos == end) 
      pos=add_feature(feat,pos,basefnum+2,1.0);
  }

  /* length of span of whole rule */
  if((sm->sparm->feat_parent_span_length) 
     || (sm->sparm->feat_children_span_length))
     pos=encode_number(feat,pos,basefnum+3,rpos-lpos,1.0,2,3,4,5,7,9);

  assert(pos<MAXFEAT);

  vec=create_svector(feat,"",1.0);
  free(feat);

  return(vec);
}


SVECTOR *phi_brule(long weightid, bintree left, bintree right, STRUCTMODEL *sm,
		   int lpos, int mpos, int rpos, int start, int end)
{
  WORD *feat;
  SVECTOR *vec;
  long basefnum;
  long pos;
  WORD    empty[2];

  assert(weightid != 0);
  if(weightid <= 0) {
    empty[0].wnum=0;
    return(create_svector(empty,"",1.0));
  }

  basefnum=weightid;
  feat=(WORD *)MALLOC(sizeof(WORD)*(MAXFEAT+1));
  feat[0].wnum=0;
  pos=0;
  
  /* base feature */
  pos=add_feature(feat,pos,basefnum+0,1.0);

  /* borders beginning or end */
  if(sm->sparm->feat_borders) {
    if(lpos == start) 
      pos=add_feature(feat,pos,basefnum+1,1.0);
    if(rpos == end) 
      pos=add_feature(feat,pos,basefnum+2,1.0);
  }

  /* length of span of whole rule */
  if(sm->sparm->feat_parent_span_length) 
     pos=encode_number(feat,pos,basefnum+3,rpos-lpos,1.0,2,3,4,5,7,9);

  /* length of span of each child */
  if(sm->sparm->feat_children_span_length) {
     pos=encode_number(feat,pos,basefnum+9,rpos-mpos,1.0,2,3,4,5,7,9);
     pos=encode_number(feat,pos,basefnum+15,mpos-lpos,1.0,2,3,4,5,7,9);
  }

  /* difference between left and right child */
  if(sm->sparm->feat_diff_children_length) 
     pos=encode_number(feat,pos,basefnum+21,(rpos-mpos)-(mpos-lpos),1.0,-7,-3,-1,1,3,7);

  assert(pos<MAXFEAT);

  vec=create_svector(feat,"",1.0);
  free(feat);

  return(vec);
}


int add_feature(WORD *feat, int pos, long fnum, long weight)
     /* add feature fnum with value weight to end of feature list */ 
{
  /* find end of feature list */
  for(pos=0;feat[pos].wnum>0;pos++);
  feat[pos].wnum=fnum;
  feat[pos].weight=weight;
  pos++;
  feat[pos].wnum=0;
  return(pos);
}


int encode_number(WORD *feat,int pos,long basefnum,long number,double weight,
		       long a, long b, long c, long d, long e, long f) 
     /* encodes a number as a vector of 6 featurs by thresholding it
	against a,b,c,d,e,f */
{
  /* find end of feature list */
  for(pos=0;feat[pos].wnum>0;pos++);

  if(number <= a) 
    pos=add_feature(feat,pos,basefnum+0,weight);
  if(number <= b) 
    pos=add_feature(feat,pos,basefnum+1,weight);
  if(number <= c) 
    pos=add_feature(feat,pos,basefnum+2,weight);
  if(number <= d) 
    pos=add_feature(feat,pos,basefnum+3,weight);
  if(number <= e) 
    pos=add_feature(feat,pos,basefnum+4,weight);
  if(number <= f) 
    pos=add_feature(feat,pos,basefnum+5,weight);

  return(pos);
}


double urule_value(urule rule, bintree child, STRUCTMODEL *sm, 
		   int lpos, int rpos, int start, int end)
{
  double val;
  SVECTOR *vec;
  DOC *ex;

  assert(rule->weightid >= -1);
  assert(rule->weightid != 0);
  assert(rule->weightid < sm->sizePsi);
  /* printf("urule: %ld\n",rule->weightid); */
  if(rule->weightid == -1) {
    return(0.0);
  }
  else {
    vec=phi_urule(rule->weightid,child,sm,lpos,rpos,start,end);
    ex=create_example(-1,-1,-1,1,vec);
    val=classify_example(sm->svm_model,ex);
    free_example(ex,1);
    return(val);
  }
}

double brule_value(brule rule, bintree left, bintree right, STRUCTMODEL *sm, 
		   int lpos, int mpos, int rpos, int start, int end)
{
  double val;
  SVECTOR *vec;
  DOC *ex;

  assert(rule->weightid >= -1);
  assert(rule->weightid != 0);
  assert(rule->weightid < sm->sizePsi);
  /* printf("brule: %ld\n",rule->weightid); */
  if(rule->weightid == -1) {
    return(0.0);
  }
  else {
    vec=phi_brule(rule->weightid,left,right,sm,lpos,mpos,rpos,start,end);
    ex=create_example(-1,-1,-1,1,vec);
    val=classify_example(sm->svm_model,ex);
    free_example(ex,1);
    return(val);
  }
}

void count_local_trees(const tree t, vihashl localtree_ht)
{
  si_index      e[MAXRHS];
  struct vindex vi = {0, e};
  tree	        p;

  if (!t || !t->subtrees) return;

  vi.n = 0;
  vi.e[vi.n++] = t->label;

  for(p = t->subtrees; p; p=p->sibling) {
    assert(vi.n < MAXRHS);
    vi.e[vi.n++] = p->label;
    count_local_trees(p, localtree_ht);
  }

  vihashl_inc(localtree_ht, &vi, 1);	/* save this list */
}

void write_local_trees(FILE *fp,const vihashl localtree_ht, si_t si)
{
  vihashlit hit;
  vindex  vi;
  long    count;
  size_t  i;
  char    *string;

  for (hit = vihashlit_init(localtree_ht); vihashlit_ok(hit); hit = vihashlit_next(hit)) {
    vi = (vindex) hit.key;
    assert(vi->n > 0);
    assert(vi->n <= MAXRHS);
    count = hit.value;
    string =  si_index_string(si, vi->e[0]);
    assert(string);
    fprintf(fp,"%ld\t%s " REWRITES, count, string);
    
    for (i=1; i<vi->n; i++) {
      string = si_index_string(si, vi->e[i]);
      assert(string);
      fprintf(fp," %s", string);
    }

    fprintf(fp,"\n");
  }}

int tree_eq(const tree t1, const tree t2)
{
  tree	        p1,p2;

  if ((!t1) && (!t2)) return(1);
  if ((!t1) || (!t2)) return(0);
  if(t1->label == t2->label) {
    for(p1 = t1->subtrees,p2 = t2->subtrees; p1 || p2; p1=p1->sibling,p2=p2->sibling) {
      if(!tree_eq(p1,p2))
	return(0);
    }
  }
  else 
    return(0);

  return(1);
}



/* ------- parser -------- */

chart
chart_make(size_t n)
{
  size_t  i, nn = CHART_SIZE(n);
  chart   c = MALLOC(nn*sizeof(sihashcc));
  
  for (i=0; i<nn; i++) 
    c[i] = NULL;	/* chart cell will be constructed in apply_unary() */ 
  
  return c;
}

void
chart_free(chart c, size_t n)
{
  size_t i;

  for (i=0; i<CHART_SIZE(n); i++)
    free_sihashcc(c[i]);

  FREE(c);
}

void
chart_entry_display(FILE *fp, sihashcc chart_entry, si_t si)
{
  sihashccit hit;
  tree t;

  for (hit=sihashccit_init(chart_entry); sihashccit_ok(hit); 
       hit = sihashccit_next(hit)) {
    fprintf(fp, "\n %s: %g \t", si_index_string(si, hit.key),
	    (double) hit.value->prob);
    t = bintree_tree(&hit.value->tree, si);
    write_tree(fp, t, si);
    free_tree(t);
    if(hit.value->present2) {
      fprintf(fp, "\t : %g \t", (double) hit.value->prob2);
      t = bintree_tree(&hit.value->tree2, si);
      write_tree(fp, t, si);
      free_tree(t);
    }
  }
  fprintf(fp, "\n---\n");
}
  
int is_urule_cycle(bintree tree, si_index label, si_t si)
{
  if(!tree)                      /* is empty tree */
    return(0);
  else if(remove_parent_from_label(tree->label, si) 
	  == remove_parent_from_label(label,si))  
                                 /* already member of unary chain */
    return(1);
  else if(tree->right)           /* not produced by a unary rule */
    return(0);
  else {
    return(is_urule_cycle(tree->left,label,si));
  }
}

int
add_edge_zeroone(sihashcc chart_entry, si_index label, 
		 bintree left, bintree right,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -