⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 svm_learn.c

📁 一款不错的支持向量机程序
💻 C
📖 第 1 页 / 共 5 页
字号:
	if(verbosity==1) {	  printf("-"); fflush(stdout); 	}      }      else {	loocomputed++;	heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */	learn_parm->svm_cost[heldout]=0;	/* make sure heldout example is not currently  */	/* shrunk away. Assumes that lin is up to date! */	shrink_state.active[heldout]=1;  	if(verbosity>=2) 	  printf("\nLeave-One-Out test on example %ld\n",heldout);	if(verbosity>=1) {	  printf("(?[%ld]",heldout); fflush(stdout); 	}		optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,				kernel_parm,				kernel_cache,&shrink_state,model,inconsistent,unlabeled,				a,lin,c,&timing_profile,				&maxdiff,heldout,(long)2);	/* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */	if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) { 	  loo_count++;                            /* there was a loo-error */	  if(label[heldout] > 0)  loo_count_pos++; else loo_count_neg++;	  if(verbosity>=1) {	    printf("-)"); fflush(stdout); 	  }	}	else {	  if(verbosity>=1) {	    printf("+)"); fflush(stdout); 	  }	}	/* now we need to restore the original data set*/	learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */      }    } /* end of leave-one-out loop */    if(verbosity>=1) {      printf("\nRetrain on full problem"); fflush(stdout);     }    optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,			    kernel_parm,			    kernel_cache,&shrink_state,model,inconsistent,unlabeled,			    a,lin,c,&timing_profile,			    &maxdiff,(long)-1,(long)1);    if(verbosity >= 1)       printf("done.\n");            /* after all leave-one-out computed */    model->loo_error=100.0*loo_count/(double)totdoc;    model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0;    model->loo_precision=(trainpos-loo_count_pos)/      (double)(trainpos-loo_count_pos+loo_count_neg)*100.0;    if(verbosity >= 1) {      fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n",	      model->loo_error);      fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n",	      model->loo_recall);      fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n",	      model->loo_precision);      fprintf(stdout,"Actual leave-one-outs computed:  %ld (rho=%.2f)\n",	      loocomputed,learn_parm->rho);      printf("Runtime for leave-one-out in cpu-seconds: %.2f\n",	     (double)(get_runtime()-runtime_start_loo)/100.0);    }  }      if(learn_parm->alphafile[0])    write_alphas(learn_parm->alphafile,a,label,totdoc);    shrink_state_cleanup(&shrink_state);  free(label);  free(inconsistent);  free(unlabeled);  free(c);  free(a);  free(a_fullset);  free(xi_fullset);  free(lin);  free(learn_parm->svm_cost);}/* Learns an SVM regression model based on the training data in   docs/label. The resulting model is returned in the structure   model. */void svm_learn_regression(DOC **docs, double *value, long int totdoc, 			  long int totwords, LEARN_PARM *learn_parm, 			  KERNEL_PARM *kernel_parm, 			  KERNEL_CACHE **kernel_cache, MODEL *model)     /* docs:        Training vectors (x-part) */     /* class:       Training value (y-part) */     /* totdoc:      Number of examples in docs/label */     /* totwords:    Number of features (i.e. highest feature index) */     /* learn_parm:  Learning paramenters */     /* kernel_parm: Kernel paramenters */     /* kernel_cache:Initialized Cache, if using a kernel. NULL if                     linear. Note that it will be free'd and reassigned */     /* model:       Returns learning result (assumed empty before called) */{  long *inconsistent,i,j;  long inconsistentnum;  long upsupvecnum;  double loss,model_length,example_length;  double maxdiff,*lin,*a,*c;  long runtime_start,runtime_end;  long iterations,kernel_cache_size;  long *unlabeled;  double r_delta_sq=0,r_delta,r_delta_avg;  double *xi_fullset; /* buffer for storing xi on full sample in loo */  double *a_fullset;  /* buffer for storing alpha on full sample in loo */  TIMING timing_profile;  SHRINK_STATE shrink_state;  DOC **docs_org;  long *label;  /* set up regression problem in standard form */  docs_org=docs;  docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc);  label = (long *)my_malloc(sizeof(long)*2*totdoc);  c = (double *)my_malloc(sizeof(double)*2*totdoc);  for(i=0;i<totdoc;i++) {       j=2*totdoc-1-i;    docs[i]=create_example(i,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);    label[i]=+1;    c[i]=value[i];    docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);    label[j]=-1;    c[j]=value[i];  }  totdoc*=2;  /* need to get a bigger kernel cache */  if(*kernel_cache) {    kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);    kernel_cache_cleanup(*kernel_cache);    (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size);  }  runtime_start=get_runtime();  timing_profile.time_kernel=0;  timing_profile.time_opti=0;  timing_profile.time_shrink=0;  timing_profile.time_update=0;  timing_profile.time_model=0;  timing_profile.time_check=0;  timing_profile.time_select=0;  kernel_cache_statistic=0;  learn_parm->totwords=totwords;  /* make sure -n value is reasonable */  if((learn_parm->svm_newvarsinqp < 2)      || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {    learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;  }  init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);  inconsistent = (long *)my_malloc(sizeof(long)*totdoc);  unlabeled = (long *)my_malloc(sizeof(long)*totdoc);  a = (double *)my_malloc(sizeof(double)*totdoc);  a_fullset = (double *)my_malloc(sizeof(double)*totdoc);  xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);  lin = (double *)my_malloc(sizeof(double)*totdoc);  learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));  model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));  model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));  model->at_upper_bound=0;  model->b=0;	         model->supvec[0]=0;  /* element 0 reserved and empty for now */  model->alpha[0]=0;  model->lin_weights=NULL;  model->totwords=totwords;  model->totdoc=totdoc;  model->kernel_parm=(*kernel_parm);  model->sv_num=1;  model->loo_error=-1;  model->loo_recall=-1;  model->loo_precision=-1;  model->xa_error=-1;  model->xa_recall=-1;  model->xa_precision=-1;  inconsistentnum=0;  r_delta=estimate_r_delta(docs,totdoc,kernel_parm);  r_delta_sq=r_delta*r_delta;  r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);  if(learn_parm->svm_c == 0.0) {  /* default value for C */    learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);    if(verbosity>=1)       printf("Setting default regularization parameter C=%.4f\n",	     learn_parm->svm_c);  }  for(i=0;i<totdoc;i++) {    /* various inits */    inconsistent[i]=0;    a[i]=0;    lin[i]=0;    unlabeled[i]=0;    if(label[i] > 0) {      learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*	docs[i]->costfactor;    }    else if(label[i] < 0) {      learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;    }  }  /* caching makes no sense for linear kernel */  if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) {    printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n");  }   if(verbosity==1) {    printf("Optimizing"); fflush(stdout);  }  /* train the svm */  iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,				     kernel_parm,*kernel_cache,&shrink_state,				     model,inconsistent,unlabeled,a,lin,c,				     &timing_profile,&maxdiff,(long)-1,				     (long)1);    if(verbosity>=1) {    if(verbosity==1) printf("done. (%ld iterations)\n",iterations);    printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);     runtime_end=get_runtime();    if(verbosity>=2) {      printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",        ((float)runtime_end-(float)runtime_start)/100.0,        (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),	(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),	(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));    }    else {      printf("Runtime in cpu-seconds: %.2f\n",	     (runtime_end-runtime_start)/100.0);    }    if(learn_parm->remove_inconsistent) {	        inconsistentnum=0;      for(i=0;i<totdoc;i++) 	if(inconsistent[i]) 	  inconsistentnum++;      printf("Number of SV: %ld (plus %ld inconsistent examples)\n",	     model->sv_num-1,inconsistentnum);    }    else {      upsupvecnum=0;      for(i=1;i<model->sv_num;i++) {	if(fabs(model->alpha[i]) >= 	   (learn_parm->svm_cost[(model->supvec[i])->docnum]-	    learn_parm->epsilon_a)) 	  upsupvecnum++;      }      printf("Number of SV: %ld (including %ld at upper bound)\n",	     model->sv_num-1,upsupvecnum);    }        if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {      loss=0;      model_length=0;       for(i=0;i<totdoc;i++) {	if((lin[i]-model->b)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit)	  loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i];	model_length+=a[i]*label[i]*lin[i];      }      model_length=sqrt(model_length);      fprintf(stdout,"L1 loss: loss=%.5f\n",loss);      fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);      example_length=estimate_sphere(model,kernel_parm);       fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",	      length_of_longest_document_vector(docs,totdoc,kernel_parm));    }    if(verbosity>=1) {      printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);    }  }      if(learn_parm->alphafile[0])    write_alphas(learn_parm->alphafile,a,label,totdoc);  /* this makes sure the model we return does not contain pointers to the      temporary documents */  for(i=1;i<model->sv_num;i++) {     j=model->supvec[i]->docnum;    if(j >= (totdoc/2)) {      j=totdoc-j-1;    }    model->supvec[i]=docs_org[j];  }    shrink_state_cleanup(&shrink_state);  for(i=0;i<totdoc;i++)    free_example(docs[i],0);  free(docs);  free(label);  free(inconsistent);  free(unlabeled);  free(c);  free(a);  free(a_fullset);  free(xi_fullset);  free(lin);  free(learn_parm->svm_cost);}void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, 		       long int totwords, LEARN_PARM *learn_parm, 		       KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, 		       MODEL *model)     /* docs:        Training vectors (x-part) */     /* rankvalue:   Training target values that determine the ranking */     /* totdoc:      Number of examples in docs/label */     /* totwords:    Number of features (i.e. highest feature index) */     /* learn_parm:  Learning paramenters */     /* kernel_parm: Kernel paramenters */     /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if 	             using a kernel. NULL if linear. NOTE: Cache is                      getting reinitialized in this function */     /* model:       Returns learning result (assumed empty before called) */{  DOC **docdiff;  long i,j,k,totpair,kernel_cache_size;  double *target,*alpha,cost;  long *greater,*lesser;  MODEL *pairmodel;  SVECTOR *flow,*fhigh;  totpair=0;  for(i=0;i<totdoc;i++) {    for(j=i+1;j<totdoc;j++) {      if((docs[i]->queryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) {	totpair++;      }    }  }  printf("Constructing %ld rank constraints...",totpair); fflush(stdout);  docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair);  target=(double *)my_malloc(sizeof(double)*totpair);   greater=(long *)my_malloc(sizeof(long)*totpair);   lesser=(long *)my_malloc(sizeof(long)*totpair);   k=0;  for(i=0;i<totdoc;i++) {    for(j=i+1;j<totdoc;j++) {      if(docs[i]->queryid == docs[j]->queryid) {	cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0;	if(rankvalue[i] > rankvalue[j]) {	  if(kernel_parm->kernel_type == LINEAR)	    docdiff[k]=create_example(k,0,0,cost,				      sub_ss(docs[i]->fvec,docs[j]->fvec));	  else {	    flow=copy_svector(docs[j]->fvec);	    flow->factor=-1.0;	    flow->next=NULL;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -