📄 svm_learn.c

📁 SVM-light Version llf_dqy_hhu
💻 C
📖 第 1 页 / 共 5 页
字号:
		  long int *chosen, long int *active2dnum, MODEL *model, 		  long int totdoc, long int *working2dnum, long int varnum, 		  double *a, double *lin, double *c, LEARN_PARM *learn_parm, 		  CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp, 		  double *epsilon_crit_target)     /* Do optimization on the working set. */{    long i;    double *a_v;    compute_matrices_for_optimization(docs,label,unlabeled,chosen,active2dnum,				      working2dnum,model,a,lin,c,varnum,				      totdoc,learn_parm,aicache,kernel_parm,				      qp);    if(verbosity>=3) {      printf("Running optimizer..."); fflush(stdout);    }    /* call the qp-subsolver */    a_v=optimize_qp(qp,epsilon_crit_target,		    learn_parm->svm_maxqpsize,		    &(model->b),   /* in case the optimizer gives us */                                   /* the threshold for free. otherwise */                                   /* b is calculated in calculate_model. */		    learn_parm);    if(verbosity>=3) {               printf("done\n");    }    for(i=0;i<varnum;i++) {      a[working2dnum[i]]=a_v[i];      /*      if(a_v[i]<=(0+learn_parm->epsilon_a)) {	a[working2dnum[i]]=0;      }      else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) {	a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]];      }      */    }}void compute_matrices_for_optimization(DOC *docs, long int *label, 	  long int *unlabeled, long int *chosen, long int *active2dnum,           long int *key, MODEL *model, double *a, double *lin, double *c, 	  long int varnum, long int totdoc, LEARN_PARM *learn_parm,           CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp){  register long ki,kj,i,j;  register double kernel_temp;  if(verbosity>=3) {    fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const);     fflush(stdout);  }  qp->opt_n=varnum;  qp->opt_ce0[0]=0; /* compute the constant for equality constraint */  for(j=1;j<model->sv_num;j++) { /* start at 1 */    if(!chosen[(model->supvec[j])->docnum]) {      qp->opt_ce0[0]+=model->alpha[j];    }  }   if(learn_parm->biased_hyperplane)     qp->opt_m=1;  else     qp->opt_m=0;  /* eq-constraint will be ignored */  /* init linear part of objective function */  for(i=0;i<varnum;i++) {    qp->opt_g0[i]=lin[key[i]];  }  for(i=0;i<varnum;i++) {    ki=key[i];    /* Compute the matrix for equality constraints */    qp->opt_ce[i]=label[ki];    qp->opt_low[i]=0;    qp->opt_up[i]=learn_parm->svm_cost[ki];    kernel_temp=(double)kernel(kernel_parm,&(docs[ki]),&(docs[ki]));     /* compute linear part of objective function */    qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]);     /* compute quadratic part of objective function */    qp->opt_g[varnum*i+i]=kernel_temp;    for(j=i+1;j<varnum;j++) {      kj=key[j];      kernel_temp=(double)kernel(kernel_parm,&(docs[ki]),&(docs[kj]));      /* compute linear part of objective function */      qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]);      qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]);       /* compute quadratic part of objective function */      qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp;      qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp;    }    if(verbosity>=3) {      if(i % 20 == 0) {	fprintf(stdout,"%ld..",i); fflush(stdout);      }    }  }  for(i=0;i<varnum;i++) {    /* assure starting at feasible point */    qp->opt_xinit[i]=a[key[i]];    /* set linear part of objective function */    qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]];      }  if(verbosity>=3) {    fprintf(stdout,"done\n");  }}long calculate_svm_model(DOC *docs, long int *label, long int *unlabeled, 			 double *lin, double *a, double *a_old, double *c, 			 LEARN_PARM *learn_parm, long int *working2dnum, 			 long int *active2dnum, MODEL *model)     /* Compute decision function based on current values */     /* of alpha. */{  long i,ii,pos,b_calculated=0,first_low,first_high;  double ex_c,b_temp,b_low,b_high;  if(verbosity>=3) {    printf("Calculating model..."); fflush(stdout);  }  if(!learn_parm->biased_hyperplane) {    model->b=0;    b_calculated=1;  }  for(ii=0;(i=working2dnum[ii])>=0;ii++) {    if((a_old[i]>0) && (a[i]==0)) { /* remove from model */      pos=model->index[i];       model->index[i]=-1;      (model->sv_num)--;      model->supvec[pos]=model->supvec[model->sv_num];      model->alpha[pos]=model->alpha[model->sv_num];      model->index[(model->supvec[pos])->docnum]=pos;    }    else if((a_old[i]==0) && (a[i]>0)) { /* add to model */      model->supvec[model->sv_num]=&(docs[i]);      model->alpha[model->sv_num]=a[i]*(double)label[i];      model->index[i]=model->sv_num;      (model->sv_num)++;    }    else if(a_old[i]==a[i]) { /* nothing to do */    }    else {  /* just update alpha */      model->alpha[model->index[i]]=a[i]*(double)label[i];    }          ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;    if((a_old[i]>=ex_c) && (a[i]<ex_c)) {       (model->at_upper_bound)--;    }    else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {       (model->at_upper_bound)++;    }    if((!b_calculated)        && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) {   /* calculate b */     	model->b=((double)label[i]*learn_parm->eps-c[i]+lin[i]); 	/* model->b=(-(double)label[i]+lin[i]); */	b_calculated=1;    }  }        /* No alpha in the working set not at bounds, so b was not     calculated in the usual way. The following handles this special     case. */  if(learn_parm->biased_hyperplane      && (!b_calculated)     && (model->sv_num-1 == model->at_upper_bound)) {     first_low=1;    first_high=1;    b_low=0;    b_high=0;    for(ii=0;(i=active2dnum[ii])>=0;ii++) {      ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;      if(a_old[i]<ex_c) { 	if(label[i]>0)  {	  b_temp=-(learn_parm->eps-c[i]+lin[i]);	  if((b_temp>b_low) || (first_low)) {	    b_low=b_temp;	    first_low=0;	  }	}	else {	  b_temp=-(-learn_parm->eps-c[i]+lin[i]);	  if((b_temp<b_high) || (first_high)) {	    b_high=b_temp;	    first_high=0;	  }	}      }      else {	if(label[i]<0)  {	  b_temp=-(-learn_parm->eps-c[i]+lin[i]);	  if((b_temp>b_low) || (first_low)) {	    b_low=b_temp;	    first_low=0;	  }	}	else {	  b_temp=-(learn_parm->eps-c[i]+lin[i]);	  if((b_temp<b_high) || (first_high)) {	    b_high=b_temp;	    first_high=0;	  }	}      }    }    if(first_high) {      model->b=-b_low;    }    else if(first_low) {      model->b=-b_high;    }    else {      model->b=-(b_high+b_low)/2.0;  /* select b as the middle of range */      /* printf("\nb_low=%lf, b_high=%lf,b=%lf\n",b_low,b_high,model->b); */    }  }  if(verbosity>=3) {    printf("done\n"); fflush(stdout);  }  return(model->sv_num-1); /* have to substract one, since element 0 is empty*/}long check_optimality(MODEL *model, long int *label, long int *unlabeled, 		      double *a, double *lin, double *c, long int totdoc, 		      LEARN_PARM *learn_parm, double *maxdiff, 		      double epsilon_crit_org, long int *misclassified, 		      long int *inconsistent, long int *active2dnum,		      long int *last_suboptimal_at, 		      long int iteration, KERNEL_PARM *kernel_parm)     /* Check KT-conditions */{  long i,ii,retrain;  double dist,ex_c,target;  if(kernel_parm->kernel_type == LINEAR) {  /* be optimistic */    learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;    }  else {  /* be conservative */    learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;   }  retrain=0;  (*maxdiff)=0;  (*misclassified)=0;  for(ii=0;(i=active2dnum[ii])>=0;ii++) {    if((!inconsistent[i]) && label[i]) {      dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from						 hyperplane*/      target=-(learn_parm->eps-(double)label[i]*c[i]);      ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;      if(dist <= 0) {       	(*misclassified)++;  /* does not work due to deactivation of var */      }      if((a[i]>learn_parm->epsilon_a) && (dist > target)) {	if((dist-target)>(*maxdiff))  /* largest violation */	  (*maxdiff)=dist-target;      }      else if((a[i]<ex_c) && (dist < target)) {	if((target-dist)>(*maxdiff))  /* largest violation */	  (*maxdiff)=target-dist;      }      /* Count how long a variable was at lower/upper bound (and optimal).*/      /* Variables, which were at the bound and optimal for a long */      /* time are unlikely to become support vectors. In case our */      /* cache is filled up, those variables are excluded to save */      /* kernel evaluations. (See chapter 'Shrinking').*/       if((a[i]>(learn_parm->epsilon_a)) 	 && (a[i]<ex_c)) { 	last_suboptimal_at[i]=iteration;         /* not at bound */      }      else if((a[i]<=(learn_parm->epsilon_a)) 	      && (dist < (target+learn_parm->epsilon_shrink))) {	last_suboptimal_at[i]=iteration;         /* not likely optimal */      }      else if((a[i]>=ex_c)	      && (dist > (target-learn_parm->epsilon_shrink)))  { 	last_suboptimal_at[i]=iteration;         /* not likely optimal */      }    }     }  /* termination criterion */  if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {      retrain=1;  }  return(retrain);}long identify_inconsistent(double *a, long int *label, 			   long int *unlabeled, long int totdoc, 			   LEARN_PARM *learn_parm, 			   long int *inconsistentnum, long int *inconsistent){  long i,retrain;  /* Throw out examples with multipliers at upper bound. This */  /* corresponds to the -i 1 option. */  /* ATTENTION: this is just a heuristic for finding a close */  /*            to minimum number of examples to exclude to */  /*            make the problem separable with desired margin */  retrain=0;  for(i=0;i<totdoc;i++) {    if((!inconsistent[i]) && (!unlabeled[i])        && (a[i]>=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { 	(*inconsistentnum)++;	inconsistent[i]=1;  /* never choose again */	retrain=2;          /* start over */	if(verbosity>=3) {	  printf("inconsistent(%ld)..",i); fflush(stdout);	}    }  }  return(retrain);}long identify_misclassified(double *lin, long int *label, 			    long int *unlabeled, long int totdoc, 			    MODEL *model, long int *inconsistentnum, 			    long int *inconsistent){  long i,retrain;  double dist;  /* Throw out misclassified examples. This */  /* corresponds to the -i 2 option. */  /* ATTENTION: this is just a heuristic for finding a close */  /*            to minimum number of examples to exclude to */  /*            make the problem separable with desired margin */  retrain=0;  for(i=0;i<totdoc;i++) {    dist=(lin[i]-model->b)*(double)label[i]; /* 'distance' from hyperplane*/      if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) { 	(*inconsistentnum)++;	inconsistent[i]=1;  /* never choose again */	retrain=2;          /* start over */	if(verbosity>=3) {	  printf("inconsistent(%ld)..",i); fflush(stdout);	}    }  }  return(retrain);}long identify_one_misclassified(double *lin, long int *label, 				long int *unlabeled, 				long int totdoc, MODEL *model, 				long int *inconsistentnum, 				long int *inconsistent){  long i,retrain,maxex=-1;  double dist,maxdist=0;  /* Throw out the 'most misclassified' example. This */  /* corresponds to the -i 3 option. */  /* ATTENTION: this is just a heuristic for finding a close */  /*            to minimum number of examples to exclude to */  /*            make the problem separable with desired margin */  retrain=0;  for(i=0;i<totdoc;i++) {    if((!inconsistent[i]) && (!unlabeled[i])) {      dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from hyperplane*/        if(dist<maxdist) {	maxdist=dist;	maxex=i;      }    }  }  if(maxex>=0) {    (*inconsistentnum)++;    inconsistent[maxex]=1;  /* never choose again */    retrain=2;          /* start over */    if(verbosity>=3) {      printf("inconsistent(%ld)..",i); fflush(stdout);    }  }  return(retrain);}void update_linear_component(DOC *docs, long int *label, 			     long int *active2dnum, double *a, 			     double *a_old, long int *working2dnum, 			     long int totdoc, long int totwords, 			     KERNEL_PARM *kernel_parm, 			     KERNEL_CACHE *kernel_cache, 			     double *lin, CFLOAT *aicache, double *weights)     /* keep track of the linear component */     /* lin of the gradient etc. by updating */     /* based on the change of the variables */     /* in the current working set */{  register long i,ii,j,jj;  register double tec;  if(kernel_parm->kernel_type==0) { /* special linear case */    clear_vector_n(weights,totwords);    for(ii=0;(i=working2dnum[ii])>=0;ii++) {      if(a[i] != a_old[i]) {	add_vector_ns(weights,docs[i].words,((a[i]-a_old[i])*(double)label[i]));      }    }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -