svm_learn.c

来自「一款不错的支持向量机程序」· C语言代码 · 共 1,878 行 · 第 1/5 页
1,878 行
	    fhigh=copy_svector(docs[i]->fvec);	    fhigh->factor=1.0;	    fhigh->next=flow;	    docdiff[k]=create_example(k,0,0,cost,fhigh);	  }	  target[k]=1;	  greater[k]=i;	  lesser[k]=j;	  k++;	}	else if(rankvalue[i] < rankvalue[j]) {	  if(kernel_parm->kernel_type == LINEAR)	    docdiff[k]=create_example(k,0,0,cost,				      sub_ss(docs[i]->fvec,docs[j]->fvec));	  else {	    flow=copy_svector(docs[j]->fvec);	    flow->factor=-1.0;	    flow->next=NULL;	    fhigh=copy_svector(docs[i]->fvec);	    fhigh->factor=1.0;	    fhigh->next=flow;	    docdiff[k]=create_example(k,0,0,cost,fhigh);	  }	  target[k]=-1;	  greater[k]=i;	  lesser[k]=j;	  k++;	}      }    }  }  printf("done.\n"); fflush(stdout);  /* need to get a bigger kernel cache */  if(*kernel_cache) {    kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);    kernel_cache_cleanup(*kernel_cache);    (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size);  }  /* must use unbiased hyperplane on difference vectors */  learn_parm->biased_hyperplane=0;  pairmodel=(MODEL *)my_malloc(sizeof(MODEL));  svm_learn_classification(docdiff,target,totpair,totwords,learn_parm,			   kernel_parm,(*kernel_cache),pairmodel,NULL);  /* Transfer the result into a more compact model. If you would like     to output the original model on pairs of documents, see below. */  alpha=(double *)my_malloc(sizeof(double)*totdoc);   for(i=0;i<totdoc;i++) {    alpha[i]=0;  }  for(i=1;i<pairmodel->sv_num;i++) {    alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i];    alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i];  }  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));  model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));  model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));  model->supvec[0]=0;  /* element 0 reserved and empty for now */  model->alpha[0]=0;  model->sv_num=1;  for(i=0;i<totdoc;i++) {    if(alpha[i]) {      model->supvec[model->sv_num]=docs[i];      model->alpha[model->sv_num]=alpha[i];      model->index[i]=model->sv_num;      model->sv_num++;    }    else {      model->index[i]=-1;    }  }  model->at_upper_bound=0;  model->b=0;	         model->lin_weights=NULL;  model->totwords=totwords;  model->totdoc=totdoc;  model->kernel_parm=(*kernel_parm);  model->loo_error=-1;  model->loo_recall=-1;  model->loo_precision=-1;  model->xa_error=-1;  model->xa_recall=-1;  model->xa_precision=-1;  free(alpha);  free(greater);  free(lesser);  free(target);  /* If you would like to output the original model on pairs of     document, replace the following lines with '(*model)=(*pairmodel);' */  for(i=0;i<totpair;i++)    free_example(docdiff[i],1);  free(docdiff);  free_model(pairmodel,0);}/* The following solves a freely defined and given set of   inequalities. The optimization problem is of the following form:   min 0.5 w*w + C sum_i C_i \xi_i   s.t. x_i * w > rhs_i - \xi_i   This corresponds to the -z o option. */void svm_learn_optimization(DOC **docs, double *rhs, long int			    totdoc, long int totwords, 			    LEARN_PARM *learn_parm, 			    KERNEL_PARM *kernel_parm, 			    KERNEL_CACHE *kernel_cache, MODEL *model,			    double *alpha)     /* docs:        Left-hand side of inequalities (x-part) */     /* rhs:         Right-hand side of inequalities */     /* totdoc:      Number of examples in docs/label */     /* totwords:    Number of features (i.e. highest feature index) */     /* learn_parm:  Learning paramenters */     /* kernel_parm: Kernel paramenters */     /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel.                      NULL if linear.*/     /* model:       Returns solution as SV expansion (assumed empty before called) */     /* alpha:       Start values for the alpha variables or NULL	             pointer. The new alpha values are returned after 		     optimization if not NULL. Array must be of size totdoc. */{  long i,*label;  long misclassified,upsupvecnum;  double loss,model_length,example_length;  double maxdiff,*lin,*a,*c;  long runtime_start,runtime_end;  long iterations,maxslackid,svsetnum;  long *unlabeled,*inconsistent;  double r_delta_sq=0,r_delta,r_delta_avg;  long *index,*index2dnum;  double *weights,*slack,*alphaslack;  CFLOAT *aicache;  /* buffer to keep one row of hessian */  TIMING timing_profile;  SHRINK_STATE shrink_state;  runtime_start=get_runtime();  timing_profile.time_kernel=0;  timing_profile.time_opti=0;  timing_profile.time_shrink=0;  timing_profile.time_update=0;  timing_profile.time_model=0;  timing_profile.time_check=0;  timing_profile.time_select=0;  kernel_cache_statistic=0;  learn_parm->totwords=totwords;  /* make sure -n value is reasonable */  if((learn_parm->svm_newvarsinqp < 2)      || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {    learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;  }  init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);  label = (long *)my_malloc(sizeof(long)*totdoc);  unlabeled = (long *)my_malloc(sizeof(long)*totdoc);  inconsistent = (long *)my_malloc(sizeof(long)*totdoc);  c = (double *)my_malloc(sizeof(double)*totdoc);  a = (double *)my_malloc(sizeof(double)*totdoc);  lin = (double *)my_malloc(sizeof(double)*totdoc);  learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));  model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));  model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));  model->at_upper_bound=0;  model->b=0;	         model->supvec[0]=0;  /* element 0 reserved and empty for now */  model->alpha[0]=0;  model->lin_weights=NULL;  model->totwords=totwords;  model->totdoc=totdoc;  model->kernel_parm=(*kernel_parm);  model->sv_num=1;  model->loo_error=-1;  model->loo_recall=-1;  model->loo_precision=-1;  model->xa_error=-1;  model->xa_recall=-1;  model->xa_precision=-1;  r_delta=estimate_r_delta(docs,totdoc,kernel_parm);  r_delta_sq=r_delta*r_delta;  r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);  if(learn_parm->svm_c == 0.0) {  /* default value for C */    learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);    if(verbosity>=1)       printf("Setting default regularization parameter C=%.4f\n",	     learn_parm->svm_c);  }  learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */  learn_parm->eps=0.0;      /* No margin, unless explicitly handcoded                               in the right-hand side in the training                               set.  */  for(i=0;i<totdoc;i++) {    /* various inits */    docs[i]->docnum=i;    a[i]=0;    lin[i]=0;    c[i]=rhs[i];       /* set right-hand side */    unlabeled[i]=0;    inconsistent[i]=0;    learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*      docs[i]->costfactor;    label[i]=1;  }  if(learn_parm->sharedslack) /* if shared slacks are used, they must */    for(i=0;i<totdoc;i++)     /*  be used on every constraint */      if(!docs[i]->slackid) {	perror("Error: Missing shared slacks definitions in some of the examples.");	exit(0);      }        /* compute starting state for initial alpha values */  if(alpha) {    if(verbosity>=1) {      printf("Computing starting state..."); fflush(stdout);    }    index = (long *)my_malloc(sizeof(long)*totdoc);    index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));    weights=(double *)my_malloc(sizeof(double)*(totwords+1));    aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);    for(i=0;i<totdoc;i++) {    /* create full index and clip alphas */      index[i]=1;      alpha[i]=fabs(alpha[i]);      if(alpha[i]<0) alpha[i]=0;      if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];    }    if(kernel_parm->kernel_type != LINEAR) {      for(i=0;i<totdoc;i++)     /* fill kernel cache with unbounded SV */	if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) 	   && (kernel_cache_space_available(kernel_cache))) 	  cache_kernel_row(kernel_cache,docs,i,kernel_parm);      for(i=0;i<totdoc;i++)     /* fill rest of kernel cache with bounded SV */	if((alpha[i]==learn_parm->svm_cost[i]) 	   && (kernel_cache_space_available(kernel_cache))) 	  cache_kernel_row(kernel_cache,docs,i,kernel_parm);    }    (void)compute_index(index,totdoc,index2dnum);    update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,			    totwords,kernel_parm,kernel_cache,lin,aicache,			    weights);    (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,			      learn_parm,index2dnum,index2dnum,model);    for(i=0;i<totdoc;i++) {    /* copy initial alphas */      a[i]=alpha[i];    }    free(index);    free(index2dnum);    free(weights);    free(aicache);    if(verbosity>=1) {      printf("done.\n");  fflush(stdout);    }     }   /* removing inconsistent does not work for general optimization problem */  if(learn_parm->remove_inconsistent) {	      learn_parm->remove_inconsistent = 0;    printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout);  }  /* caching makes no sense for linear kernel */  if(kernel_parm->kernel_type == LINEAR) {    kernel_cache = NULL;     }   if(verbosity==1) {    printf("Optimizing"); fflush(stdout);  }  /* train the svm */  if(learn_parm->sharedslack)    iterations=optimize_to_convergence_sharedslack(docs,label,totdoc,				     totwords,learn_parm,kernel_parm,				     kernel_cache,&shrink_state,model,				     a,lin,c,&timing_profile,				     &maxdiff);  else    iterations=optimize_to_convergence(docs,label,totdoc,				     totwords,learn_parm,kernel_parm,				     kernel_cache,&shrink_state,model,				     inconsistent,unlabeled,				     a,lin,c,&timing_profile,				     &maxdiff,(long)-1,(long)1);    if(verbosity>=1) {    if(verbosity==1) printf("done. (%ld iterations)\n",iterations);    misclassified=0;    for(i=0;(i<totdoc);i++) { /* get final statistic */      if((lin[i]-model->b)*(double)label[i] <= 0.0) 	misclassified++;    }    printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);     runtime_end=get_runtime();    if(verbosity>=2) {      printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",        ((float)runtime_end-(float)runtime_start)/100.0,        (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),	(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),	(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));    }    else {      printf("Runtime in cpu-seconds: %.2f\n",	     (runtime_end-runtime_start)/100.0);    }  }  if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {    loss=0;    model_length=0;     for(i=0;i<totdoc;i++) {      if((lin[i]-model->b)*(double)label[i] < c[i]-learn_parm->epsilon_crit)	loss+=c[i]-(lin[i]-model->b)*(double)label[i];      model_length+=a[i]*label[i]*lin[i];    }    model_length=sqrt(model_length);    fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);  }    if(learn_parm->sharedslack) {    index = (long *)my_malloc(sizeof(long)*totdoc);    index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));    maxslackid=0;    for(i=0;i<totdoc;i++) {    /* create full index */      index[i]=1;      if(maxslackid<docs[i]->slackid)	maxslackid=docs[i]->slackid;    }    (void)compute_index(index,totdoc,index2dnum);    slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));    alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));    for(i=0;i<=maxslackid;i++) {    /* init shared slacks */      slack[i]=0;      alphaslack[i]=0;    }    compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm,			  slack,alphaslack);    loss=0;    model->at_upper_bound=0;    svsetnum=0;    for(i=0;i<=maxslackid;i++) {    /* create full index */      loss+=slack[i];      if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a)) 	model->at_upper_bound++;      if(alphaslack[i] > learn_parm->epsilon_a)	svsetnum++;    }    free(index);    free(index2dnum);    free(slack);    free(alphaslack);  }    if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {    if(learn_parm->sharedslack) {      printf("Number of SV: %ld\n",	     model->sv_num-1);      printf("Number of non-zero slack variables: %ld (out of %ld)\n",
svm_learn.c - 源码说明

本页面展示了「一款不错的支持向量机程序」中的 svm_learn.c 源码文件，采用 C语言编程语言编写，共 1,878 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与支持向量机相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?