📄 svm_learn.c

📁 This document contains a general overview in the first few sections as well as a more detailed refer
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/***********************************************************************//*                                                                     *//*   svm_learn.c                                                       *//*                                                                     *//*   Learning module of Support Vector Machine.                        *//*                                                                     *//*   Author: Thorsten Joachims                                         *//*   Date: 02.07.02                                                    *//*                                                                     *//*   Copyright (c) 2002  Thorsten Joachims - All rights reserved       *//*                                                                     *//*   This software is available for non-commercial use only. It must   *//*   not be modified and distributed without prior permission of the   *//*   author. The author is not responsible for implications from the   *//*   use of this software.                                             *//*                                                                     *//***********************************************************************/# include "svm_common.h"# include "svm_learn.h"/* interface to QP-solver */double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);/*---------------------------------------------------------------------------*//* Learns an SVM classification model based on the training data in   docs/label. The resulting model is returned in the structure   model. */void svm_learn_classification(DOC **docs, double *class, long int			      totdoc, long int totwords, 			      LEARN_PARM *learn_parm, 			      KERNEL_PARM *kernel_parm, 			      KERNEL_CACHE *kernel_cache, 			      MODEL *model,			      double *alpha)     /* docs:        Training vectors (x-part) */     /* class:       Training labels (y-part, zero if test example for                     transduction) */     /* totdoc:      Number of examples in docs/label */     /* totwords:    Number of features (i.e. highest feature index) */     /* learn_parm:  Learning paramenters */     /* kernel_parm: Kernel paramenters */     /* kernel_cache:Initialized Cache of size totdoc, if using a kernel.                      NULL if linear.*/     /* model:       Returns learning result (assumed empty before called) */     /* alpha:       Start values for the alpha variables or NULL	             pointer. The new alpha values are returned after 		     optimization if not NULL. Array must be of size totdoc. */{  long *inconsistent,i,*label;  long inconsistentnum;  long misclassified,upsupvecnum;  double loss,model_length,example_length;  double maxdiff,*lin,*a,*c;  long runtime_start,runtime_end;  long iterations;  long *unlabeled,transduction;  long heldout;  long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0;  long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0;  double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg;  long *index,*index2dnum;  double *weights;  CFLOAT *aicache;  /* buffer to keep one row of hessian */  double *xi_fullset; /* buffer for storing xi on full sample in loo */  double *a_fullset;  /* buffer for storing alpha on full sample in loo */  TIMING timing_profile;  SHRINK_STATE shrink_state;  runtime_start=get_runtime();  timing_profile.time_kernel=0;  timing_profile.time_opti=0;  timing_profile.time_shrink=0;  timing_profile.time_update=0;  timing_profile.time_model=0;  timing_profile.time_check=0;  timing_profile.time_select=0;  kernel_cache_statistic=0;  learn_parm->totwords=totwords;  /* make sure -n value is reasonable */  if((learn_parm->svm_newvarsinqp < 2)      || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {    learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;  }  init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);  label = (long *)my_malloc(sizeof(long)*totdoc);  inconsistent = (long *)my_malloc(sizeof(long)*totdoc);  unlabeled = (long *)my_malloc(sizeof(long)*totdoc);  c = (double *)my_malloc(sizeof(double)*totdoc);  a = (double *)my_malloc(sizeof(double)*totdoc);  a_fullset = (double *)my_malloc(sizeof(double)*totdoc);  xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);  lin = (double *)my_malloc(sizeof(double)*totdoc);  learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));  model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));  model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));  model->at_upper_bound=0;  model->b=0;	         model->supvec[0]=0;  /* element 0 reserved and empty for now */  model->alpha[0]=0;  model->lin_weights=NULL;  model->totwords=totwords;  model->totdoc=totdoc;  model->kernel_parm=(*kernel_parm);  model->sv_num=1;  model->loo_error=-1;  model->loo_recall=-1;  model->loo_precision=-1;  model->xa_error=-1;  model->xa_recall=-1;  model->xa_precision=-1;  inconsistentnum=0;  transduction=0;  r_delta=estimate_r_delta(docs,totdoc,kernel_parm);  r_delta_sq=r_delta*r_delta;  r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);  if(learn_parm->svm_c == 0.0) {  /* default value for C */    learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);    if(verbosity>=1)       printf("Setting default regularization parameter C=%.4f\n",	     learn_parm->svm_c);  }  learn_parm->eps=-1.0;      /* equivalent regression epsilon for				classification */  for(i=0;i<totdoc;i++) {    /* various inits */    docs[i]->docnum=i;    inconsistent[i]=0;    a[i]=0;    lin[i]=0;    c[i]=0.0;    unlabeled[i]=0;    if(class[i] == 0) {      unlabeled[i]=1;      label[i]=0;      transduction=1;    }    if(class[i] > 0) {      learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*	docs[i]->costfactor;      label[i]=1;      trainpos++;    }    else if(class[i] < 0) {      learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;      label[i]=-1;      trainneg++;    }    else {      learn_parm->svm_cost[i]=0;    }  }  if(verbosity>=2) {    printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout);  }  /* caching makes no sense for linear kernel */  if(kernel_parm->kernel_type == LINEAR) {    kernel_cache = NULL;     }   /* compute starting state for initial alpha values */  if(alpha) {    if(verbosity>=1) {      printf("Computing starting state..."); fflush(stdout);    }    index = (long *)my_malloc(sizeof(long)*totdoc);    index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));    weights=(double *)my_malloc(sizeof(double)*(totwords+1));    aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);    for(i=0;i<totdoc;i++) {    /* create full index and clip alphas */      index[i]=1;      alpha[i]=fabs(alpha[i]);      if(alpha[i]<0) alpha[i]=0;      if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];    }    if(kernel_parm->kernel_type != LINEAR) {      for(i=0;i<totdoc;i++)     /* fill kernel cache with unbounded SV */	if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) 	   && (kernel_cache_space_available(kernel_cache))) 	  cache_kernel_row(kernel_cache,docs,i,kernel_parm);      for(i=0;i<totdoc;i++)     /* fill rest of kernel cache with bounded SV */	if((alpha[i]==learn_parm->svm_cost[i]) 	   && (kernel_cache_space_available(kernel_cache))) 	  cache_kernel_row(kernel_cache,docs,i,kernel_parm);    }    (void)compute_index(index,totdoc,index2dnum);    update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,			    totwords,kernel_parm,kernel_cache,lin,aicache,			    weights);    (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,			      learn_parm,index2dnum,index2dnum,model);    for(i=0;i<totdoc;i++) {    /* copy initial alphas */      a[i]=alpha[i];    }    free(index);    free(index2dnum);    free(weights);    free(aicache);    if(verbosity>=1) {      printf("done.\n");  fflush(stdout);    }     }   if(transduction) {    learn_parm->svm_iter_to_shrink=99999999;    if(verbosity >= 1)      printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n");  }  if(transduction && learn_parm->compute_loo) {    learn_parm->compute_loo=0;    if(verbosity >= 1)      printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n");  }      if(learn_parm->remove_inconsistent && learn_parm->compute_loo) {    learn_parm->compute_loo=0;    printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n");  }      if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) {    learn_parm->compute_loo=0;    printf("\nCannot compute leave-one-out with only one example in one class.\n\n");  }      if(verbosity==1) {    printf("Optimizing"); fflush(stdout);  }  /* train the svm */  iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,				     kernel_parm,kernel_cache,&shrink_state,model,				     inconsistent,unlabeled,a,lin,				     c,&timing_profile,				     &maxdiff,(long)-1,				     (long)1);    if(verbosity>=1) {    if(verbosity==1) printf("done. (%ld iterations)\n",iterations);    misclassified=0;    for(i=0;(i<totdoc);i++) { /* get final statistic */      if((lin[i]-model->b)*(double)label[i] <= 0.0) 	misclassified++;    }    printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n",	   misclassified,maxdiff);     runtime_end=get_runtime();    if(verbosity>=2) {      printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",        ((float)runtime_end-(float)runtime_start)/100.0,        (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),	(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),	(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),        (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));    }    else {      printf("Runtime in cpu-seconds: %.2f\n",	     (runtime_end-runtime_start)/100.0);    }    if(learn_parm->remove_inconsistent) {	        inconsistentnum=0;      for(i=0;i<totdoc;i++) 	if(inconsistent[i]) 	  inconsistentnum++;      printf("Number of SV: %ld (plus %ld inconsistent examples)\n",	     model->sv_num-1,inconsistentnum);    }    else {      upsupvecnum=0;      for(i=1;i<model->sv_num;i++) {	if(fabs(model->alpha[i]) >= 	   (learn_parm->svm_cost[(model->supvec[i])->docnum]-	    learn_parm->epsilon_a)) 	  upsupvecnum++;      }      printf("Number of SV: %ld (including %ld at upper bound)\n",	     model->sv_num-1,upsupvecnum);    }        if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {      loss=0;      model_length=0;       for(i=0;i<totdoc;i++) {	if((lin[i]-model->b)*(double)label[i] < 1.0-learn_parm->epsilon_crit)	  loss+=1.0-(lin[i]-model->b)*(double)label[i];	model_length+=a[i]*label[i]*lin[i];      }      model_length=sqrt(model_length);      fprintf(stdout,"L1 loss: loss=%.5f\n",loss);      fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);      example_length=estimate_sphere(model,kernel_parm);       fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",	      length_of_longest_document_vector(docs,totdoc,kernel_parm));      fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n",	      estimate_margin_vcdim(model,model_length,example_length,				    kernel_parm));      if((!learn_parm->remove_inconsistent) && (!transduction)) {	runtime_start_xa=get_runtime();	if(verbosity>=1) {	  printf("Computing XiAlpha-estimates..."); fflush(stdout);	}	compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a,			     kernel_parm,learn_parm,&(model->xa_error),			     &(model->xa_recall),&(model->xa_precision));	if(verbosity>=1) {	  printf("done\n");	}	printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n",	       (get_runtime()-runtime_start_xa)/100.0);		fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n",		model->xa_error,learn_parm->rho,learn_parm->xa_depth);	fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n",		model->xa_recall,learn_parm->rho,learn_parm->xa_depth);	fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n",		model->xa_precision,learn_parm->rho,learn_parm->xa_depth);      }      else if(!learn_parm->remove_inconsistent) {	estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin);      }    }    if(verbosity>=1) {      printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);    }  }  /* leave-one-out testing starts now */  if(learn_parm->compute_loo) {    /* save results of training on full dataset for leave-one-out */    runtime_start_loo=get_runtime();    for(i=0;i<totdoc;i++) {      xi_fullset[i]=1.0-((lin[i]-model->b)*(double)label[i]);      if(xi_fullset[i]<0) xi_fullset[i]=0;      a_fullset[i]=a[i];    }    if(verbosity>=1) {      printf("Computing leave-one-out");    }        /* repeat this loop for every held-out example */    for(heldout=0;(heldout<totdoc);heldout++) {      if(learn_parm->rho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout]	 < 1.0) { 	/* guaranteed to not produce a leave-one-out error */	if(verbosity==1) {	  printf("+"); fflush(stdout); 	}      }      else if(xi_fullset[heldout] > 1.0) {	/* guaranteed to produce a leave-one-out error */	loo_count++;	if(label[heldout] > 0)  loo_count_pos++; else loo_count_neg++;
12 3 4 5 下一页
💿 文件大小 288 K
👤 上传用户 a6810121
📂 所属分类其他
🏷️ 相关标签

#document #contains #detailed #overview
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -