📄 svm_struct_api.c

📁 New training algorithm for linear classification SVMs that can be much faster than SVMlight for larg
💻 C
📖 第 1 页 / 共 4 页
字号:
	ybar.class[predset[i].id]=((numn-2.0*sumn)*0.5*100.0/nump)/numn;
	sump++;
      }
      else {
	ybar.class[predset[i].id]=-((nump-2.0*(nump-sump))*0.5*100.0/nump)/numn;
	sumn++;
      }
    }
  }
  else {
    printf("ERROR: Invalid loss function '%d'.\n",sparm->loss_function);
    exit(1);
  }

  if(struct_verbosity >= 1) {
    SVECTOR *fy=psi(x,y,sm,sparm);
    SVECTOR *fybar=psi(x,ybar,sm,sparm);
    DOC *exy=create_example(0,0,1,1,fy);
    DOC *exybar=create_example(0,0,1,1,fybar);
    printf("\n max_y (loss(y_i,y)+ w*Psi(x,y)=%f\n",
	   loss(y,ybar,sparm)+classify_example(sm->svm_model,exybar));
    printf("w*Psi(x,y)=%f, w*Psi(x,ybar)=%f\n",
	   classify_example(sm->svm_model,exy),
	   classify_example(sm->svm_model,exybar));
    free_example(exy,1);
    free_example(exybar,1);
  }
  free(score);
  free(scorep);
  free(scoren);
  free(predset);

  return(ybar);
}

double objective_avgprec(int *negabove,STRUCT_ID_SCORE *scorep,int nump,
			 STRUCT_ID_SCORE *scoren,int numn)
{
  double val;
  int i,j;
  val=0;
  for(i=0;i<nump;i++) { /* contribution from loss */
    val+=(double)(i+1)/(double)(i+1+negabove[i]);
    /*    printf("na[%d]=%d,",i,negabove[i]); */
  }
  val=100-100.0*val/(double)(nump);

  printf("argmaxloss=%f\n",val);

  for(i=0;i<nump;i++) { /* contribution from pos scores */
    val+=(numn-negabove[i])*scorep[i].score-negabove[i]*scorep[i].score;
    for(j=0;j<negabove[i];j++) { /* contribution from neg scores */
      val+=scoren[j].score;
    }
    for(j=negabove[i];j<numn;j++) { /* contribution from neg scores */
      val-=scoren[j].score;
    }
  }
  return(val);
}

LABEL       find_most_violated_constraint_avgprec(PATTERN x, LABEL y, 
						     STRUCTMODEL *sm, 
						     STRUCT_LEARN_PARM *sparm,
						     int loss_type)
{
  /* Finds the most violated constraint for metrics that are based on
     a threshold. 
     WARNING: Currently only implemented for margin-rescaling!!! */
  LABEL ybar;
  int i,ii,iii,j,nump,numn,*negabove,changed,totwords;
  double *score,v,vv,vmax,*ortho_weights;
  STRUCT_ID_SCORE *scorep,*scoren;
  MODEL svm_model;

  ybar.totdoc=x.totdoc;
  ybar.class=(double *)my_malloc(sizeof(double)*x.totdoc);
  score=(double *)my_malloc(sizeof(double)*(ybar.totdoc+1));
  scorep=(STRUCT_ID_SCORE *)my_malloc(sizeof(STRUCT_ID_SCORE)*(ybar.totdoc+1));
  scoren=(STRUCT_ID_SCORE *)my_malloc(sizeof(STRUCT_ID_SCORE)*(ybar.totdoc+1));

 totwords=sm->svm_model->totwords;
  svm_model=(*sm->svm_model);  

  /* For sparse kernel, replace weight vector with beta=gamma^T*L^-1 */
  if(sm->sparse_kernel_type>0) {
    svm_model.lin_weights=(double *)my_malloc(sizeof(double)*(totwords+1));
    ortho_weights=prod_nvector_ltmatrix(sm->svm_model->lin_weights+1,sm->invL);
    for(i=0;i<sm->invL->m;i++)
      svm_model.lin_weights[i+1]=ortho_weights[i];
    svm_model.lin_weights[0]=0;
    free_nvector(ortho_weights);
  }

  nump=0;
  numn=0;
  for(i=0;i<x.totdoc;i++) {
    score[i]=classify_example(&svm_model,x.doc[i]);
    if(y.class[i] > 0) {
      scorep[nump].score=score[i];
      scorep[nump].tiebreak=0;
      scorep[nump].id=i;
      nump++;
    }
    else {
      scoren[numn].score=score[i];
      scorep[numn].tiebreak=0;
      scoren[numn].id=i;
      numn++;
    }
  }
  if(nump)
    qsort(scorep,nump,sizeof(STRUCT_ID_SCORE),comparedown);
  if(numn)
    qsort(scoren,numn,sizeof(STRUCT_ID_SCORE),comparedown);

  /* Restore weight vector that was modified above */
  if(sm->sparse_kernel_type>0) {
    free(svm_model.lin_weights);
  }

  /* find max of loss(ybar,y)+score(ybar) */
  if(sparm->loss_function == AVGPREC) { /* Average Precision */
    negabove=(int *)my_malloc(sizeof(int)*(nump+1));
    v=0;
    for(i=0;i<nump;i++) {
      negabove[i]=0;
      v+=numn*scorep[i].score;
    }
    for(j=0;j<numn;j++) {
      v-=nump*scoren[j].score;
    }
    negabove[nump]=numn;
    vv=objective_avgprec(negabove,scorep,nump,scoren,numn);
    printf("v=%f == vv=%f\n",v,vv);
    vmax=v;
    changed=1;
    while(changed) {
      changed=0;
      for(i=nump-1;i>=0;i--) {
	if(negabove[i+1] > negabove[i]) {/* there is a neg between the 2 pos */
	  for(ii=i;ii>=0;ii--) { /* try to move neg up */
	    for(iii=i;iii>=ii;iii--) {
	      negabove[iii]++;
	      v=v+(-100.0/(double)(nump)*(double)(iii+1)/(double)(iii+1+negabove[iii])+100.0/(double)(nump)*(double)(iii+1)/(double)(iii+1+negabove[iii]-1)-2*(scorep[iii].score-scoren[negabove[iii]-1].score));
	    }
	    /* vv=objective_avgprec(negabove,scorep,nump,scoren,numn); 
  	       printf("v=%f == vv=%f\n",v,vv); */
	    if(v<=vmax) {
	      for(iii=i;iii>=ii;iii--) { /* undo step */
		v=v-(-100.0/(double)(nump)*(double)(iii+1)/(double)(iii+1+negabove[iii])+100.0/(double)(nump)*(double)(iii+1)/(double)(iii+1+negabove[iii]-1)-2*(scorep[iii].score-scoren[negabove[iii]-1].score));
		negabove[iii]--;
	      }
	    }
	    else {
	      ii=-1; /* stop */
	      changed=1;
	      vmax=v;
	      /*
	      for(i=0;i<nump;i++) {
		printf("na[%d]=%d,",i,negabove[i]); 
	      }
	      printf("vmax=%f\n",vmax);
	      */
	    }
	    if((ii>0) && (negabove[ii-1]!=negabove[ii])) 
	      ii=-1; 
	  }
	}
      }
    }
    vv=objective_avgprec(negabove,scorep,nump,scoren,numn);
    printf("v=%f == vv=%f\n",v,vv);
    printf("vmax=%f\n",vmax);
    /*
    for(i=0;i<nump;i++) {
      printf("na[%d]=%d,",i,negabove[i]); 
    }
    */

    for(i=0;i<ybar.totdoc;i++) { /* create Psi with maximum objective */
      ybar.class[i]=0;
    }
    for(i=0;i<nump;i++) { 
      ybar.class[scorep[i].id]=numn-2*negabove[i];
      for(j=0;j<negabove[i];j++) {
	ybar.class[scoren[j].id]++;
      }
      for(j=negabove[i];j<numn;j++) {
	ybar.class[scoren[j].id]--;
      }
    }
  }
  else {
    printf("ERROR: Invalid loss function '%d'.\n",sparm->loss_function);
    exit(1);
  }

  if(struct_verbosity >= 1) {
    SVECTOR *fy=psi(x,y,sm,sparm);
    SVECTOR *fybar=psi(x,ybar,sm,sparm);
    DOC *exy=create_example(0,0,1,1,fy);
    DOC *exybar=create_example(0,0,1,1,fybar);
    printf("\n max_y (loss(y_i,y)+ w*Psi(x,y)=%f\n",
	   loss(y,ybar,sparm)+classify_example(sm->svm_model,exybar));
    printf("w*Psi(x,y)=%f, w*Psi(x,ybar)=%f\n",
	   classify_example(sm->svm_model,exy),
	   classify_example(sm->svm_model,exybar));
    free_example(exy,1);
    free_example(exybar,1);
  }
  free(score);
  free(scorep);
  free(scoren);

  return(ybar);
}


int         empty_label(LABEL y)
{
  /* Returns true, if y is an empty label. An empty label might be
     returned by find_most_violated_constraint_???(x, y, sm) if there
     is no incorrect label that can be found for x, or if it is unable
     to label x at all */
  return(0);
}

SVECTOR     *psi(PATTERN x, LABEL y, STRUCTMODEL *sm,
		 STRUCT_LEARN_PARM *sparm)
{
  /* Returns a feature vector describing the match between pattern x
     and label y. The feature vector is returned as a list of
     SVECTOR's. Each SVECTOR is in a sparse representation of pairs
     <featurenumber:featurevalue>, where the last pair has
     featurenumber 0 as a terminator. Featurenumbers start with 1 and
     end with sizePsi. Featuresnumbers that are not specified default
     to value 0. As mentioned before, psi() actually returns a list of
     SVECTOR's. Each SVECTOR has a field 'factor' and 'next'. 'next'
     specifies the next element in the list, terminated by a NULL
     pointer. The list can be though of as a linear combination of
     vectors, where each vector is weighted by its 'factor'. This
     linear combination of feature vectors is multiplied with the
     learned (kernelized) weight vector to score label y for pattern
     x. Without kernels, there will be one weight in sm.w for each
     feature. Note that psi has to match
     find_most_violated_constraint_???(x, y, sm) and vice versa. In
     particular, find_most_violated_constraint_???(x, y, sm) finds
     that ybar!=y that maximizes psi(x,ybar,sm)*sm.w (where * is the
     inner vector product) and the appropriate function of the
     loss + margin/slack rescaling method. See that paper for details. */
  SVECTOR *fvec=NULL,*fvec2;
  double *sum,*sum2;
  long i,totwords;

  /* The following special case speeds up computation for the linear
     kernel. The lines add the feature vectors for all examples into
     a single vector. This is more efficient for the linear kernel,
     but is invalid for all other kernels. */
  if(sm->svm_model->kernel_parm.kernel_type == LINEAR) {
    totwords=sparm->num_features;
    sum=(double *)my_malloc(sizeof(double)*(totwords+1));
    clear_nvector(sum,totwords);
    for(i=0;i<y.totdoc;i++) 
      add_vector_ns(sum,x.doc[i]->fvec,y.class[i]);

    /* For sparse kernel, replace feature vector Psi with k=L^-1*Psi */
    if(sm->sparse_kernel_type>0) { 
      sum2=prod_ltmatrix_nvector(sm->invL,sum+1);
      for(i=0;i<totwords;i++)
	sum[i+1]=sum2[i];
      free_nvector(sum2);
    }

    fvec=create_svector_n(sum,totwords,"",1);
    free(sum);
  }
  else { /* general kernel */
    for(i=0;i<y.totdoc;i++) {
      fvec2=copy_svector(x.doc[i]->fvec);
      fvec2->next=fvec;
      fvec2->factor=y.class[i];
      /* printf("class %d: %f\n",i,y.class[i]); */
      fvec=fvec2;
    }
  }

  return(fvec);
}

double      loss(LABEL y, LABEL ybar, STRUCT_LEARN_PARM *sparm)
{
  /* loss for correct label y and predicted label ybar. The loss for
     y==ybar has to be zero. sparm->loss_function is set with the -l option. */
  int a=0,b=0,c=0,d=0,i;
  double loss=1;

  /* compute contingency table */
  for(i=0;i<y.totdoc;i++) {
    if((y.class[i] > 0) && (ybar.class[i] > 0)) {
      a++;
    }
    if((y.class[i] > 0) && (ybar.class[i] <= 0)) {
      c++;
    }
    if((y.class[i] < 0) && (ybar.class[i] > 0)) {
      b++;
    }
    if((y.class[i] < 0) && (ybar.class[i] <= 0)) {
      d++;
    }
    /* printf("%f %f\n",y.class[i],ybar.class[i]); */
  }
  /* Return the loss according to the selected loss function. */
  if(sparm->loss_function == ZEROONE) { /* type 0 loss: 0/1 loss */
                                  /* return 0, if y==ybar. return 1 else */
    loss=zeroone_loss(a,b,c,d);
  }
  else if(sparm->loss_function == FONE) {
    loss=fone_loss(a,b,c,d);
  }
  else if(sparm->loss_function == ERRORRATE) {
    loss=errorrate_loss(a,b,c,d);
  }
  else if(sparm->loss_function == PRBEP) {
    /* WARNING: only valid if called for a labeling that is at PRBEP */
    loss=prbep_loss(a,b,c,d);
  }
  else if(sparm->loss_function == PREC_K) {
    /* WARNING: only valid if for a labeling that predicts k positives */
    loss=prec_k_loss(a,b,c,d);
  }
  else if(sparm->loss_function == REC_K) {
    /* WARNING: only valid if for a labeling that predicts k positives */
    loss=rec_k_loss(a,b,c,d);
  }
  else if(sparm->loss_function == SWAPPEDPAIRS) {
    loss=swappedpairs_loss(y,ybar);
  }
  else if(sparm->loss_function == AVGPREC) {
    loss=avgprec_loss(y,ybar);
  }
  else {
    /* Put your code for different loss functions here. But then
       find_most_violated_constraint_???(x, y, sm) has to return the
       highest scoring label with the largest loss. */
    printf("Unkown loss function type: %d\n",sparm->loss_function);
    exit(1);
  }
  if(struct_verbosity >= 2)
    printf("    loss=%f; contingency_table=(%d, %d, %d, %d)\n",loss,a,b,c,d);
  return(loss);
}

int         finalize_iteration(double ceps, int cached_constraint,
			       SAMPLE sample, STRUCTMODEL *sm,
			       CONSTSET cset, double *alpha, 
			       STRUCT_LEARN_PARM *sparm)
{
  /* This function is called just before the end of each cutting plane iteration. ceps is the amount by which the most violated constraint found in the current iteration was violated. cached_constraint is true if the added constraint was constructed from the cache. If the return value is FALSE, then the algorithm is allowed to terminate. If it is TRUE, the algorithm will keep iterating even if the desired precision sparm->epsilon is already reached. */
  return(0);
}

void        print_struct_learning_stats(SAMPLE sample, STRUCTMODEL *sm,
					CONSTSET cset, double *alpha, 
					STRUCT_LEARN_PARM *sparm)
{
  /* This function is called after training and allows final touches to
     the model sm. But primarly it allows computing and printing any
     kind of statistic (e.g. training error) you might want. */
  int i,j;


  /* Move weight from bias feature to threshold parameter in svm model */
  /* WARNING: This following is correct only for the linear kernel! */
  if(sparm->bias_featurenum) { 
    sm->svm_model->b=-sparm->bias*sm->svm_model->lin_weights[sparm->bias_featurenum];
    /* Set the bias feature to zero in all examples */
    for(i=1;i<sm->svm_model->sv_num;i++) {
      for(j=0;sm->svm_model->supvec[i]->fvec->words[j].wnum;j++)
	if(sm->svm_model->supvec[i]->fvec->words[j].wnum==sparm->bias_featurenum) {
	  sm->svm_model->supvec[i]->fvec->words[j].wnum=0;
	  sm->svm_model->supvec[i]->fvec->words[j].weight=0;
	}
    }
  }
}

void        print_struct_testing_stats(SAMPLE sample, STRUCTMODEL *sm,
				       STRUCT_LEARN_PARM *sparm, 
				       STRUCT_TEST_STATS *teststats)
{
  /* This function is called after making all test predictions in
     svm_struct_classify and allows computing and printing any kind of
     evaluation (e.g. precision/recall) you might want. You can use
     the function eval_prediction to accumulate the necessary
     statistics for each prediction. */

  if(!teststats->test_data_unlabeled) {
    printf("NOTE: The loss reported above is the percentage of errors. The zero/one-error\n");
    printf("      is the multivariate zero/one-error regarding the whole prediction vector!\n");
    printf("Accuracy : %5.2f\n",100-teststats->errorrate);
    printf("Precision: %5.2f\n",teststats->precision);
    printf("Recall   : %5.2f\n",teststats->recall);
    printf("F1       : %5.2f\n",teststats->fone);
    printf("PRBEP    : %5.2f\n",teststats->prbep);
    printf("ROCArea  : %5.2f\n",teststats->rocarea);
    printf("AvgPrec  : %5.2f\n",teststats->avgprec);
  }
  else {
    printf("NOTE: %ld test examples are unlabeled, so performance cannot be computed. The\n",teststats->test_data_unlabeled);
    printf("      loss and the error reported above may be inaccurate!\n");
  }
}

void        eval_prediction(long exnum, EXAMPLE ex, LABEL ypred, 
			    STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, 
			    STRUCT_TEST_STATS *teststats)
{
  /* This function allows you to accumlate statistic for how well the
     predicition matches the labeled example. It is called from
     svm_struct_classify. See also the function
     print_struct_testing_stats. */
  long i;
  if(exnum == 0) { /* this is the first time the function is
		      called. So initialize the teststats */
  }
  teststats->test_data_unlabeled=0;
  for(i=0;i<ex.y.totdoc;i++) 
    if(ex.y.class[i]==0) 
      teststats->test_data_unlabeled++;
  if(!teststats->test_data_unlabeled) {
    sparm->loss_function=ERRORRATE;
    teststats->errorrate=loss(ex.y,ypred,sparm);
    sparm->loss_function=PREC_K;
    teststats->precision=100.0-loss(ex.y,ypred,sparm);
    sparm->loss_function=REC_K;
    teststats->recall=100.0-loss(ex.y,ypred,sparm);
    sparm->loss_function=FONE;
    teststats->fone=100.0-loss(ex.y,ypred,sparm);
    teststats->prbep=prbep(ex.y,ypred);
    teststats->rocarea=rocarea(ex.y,ypred);
    teststats->avgprec=avgprec(ex.y,ypred);
  }
💿 文件大小 86 K
👤 上传用户 m472333662
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#classification #algorithm #for #training
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -