📄 svm_learn.c
字号:
/***********************************************************************/
/* */
/* svm_learn.c */
/* */
/* Learning module of Support Vector Machine. */
/* */
/* Author: Thorsten Joachims */
/* Date: 02.07.02 */
/* */
/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
/* */
/* This software is available for non-commercial use only. It must */
/* not be modified and distributed without prior permission of the */
/* author. The author is not responsible for implications from the */
/* use of this software. */
/* */
/***********************************************************************/
# include "svm_common.h"
# include "svm_learn.h"
/* interface to QP-solver */
double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);
/*---------------------------------------------------------------------------*/
/* Learns an SVM classification model based on the training data in
docs/label. The resulting model is returned in the structure
model. */
void svm_learn_classification(DOC **docs, double *class, long int
totdoc, long int totwords,
LEARN_PARM *learn_parm,
KERNEL_PARM *kernel_parm,
KERNEL_CACHE *kernel_cache,
MODEL *model,
double *alpha)
/* docs: Training vectors (x-part) */
/* class: Training labels (y-part, zero if test example for
transduction) */
/* totdoc: Number of examples in docs/label */
/* totwords: Number of features (i.e. highest feature index) */
/* learn_parm: Learning paramenters */
/* kernel_parm: Kernel paramenters */
/* kernel_cache:Initialized Cache of size totdoc, if using a kernel.
NULL if linear.*/
/* model: Returns learning result (assumed empty before called) */
/* alpha: Start values for the alpha variables or NULL
pointer. The new alpha values are returned after
optimization if not NULL. Array must be of size totdoc. */
{
long *inconsistent,i,*label;
long inconsistentnum;
long misclassified,upsupvecnum;
double loss,model_length,example_length;
double maxdiff,*lin,*a,*c;
long runtime_start,runtime_end;
long iterations;
long *unlabeled,transduction;
long heldout;
long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0;
long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0;
double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg;
long *index,*index2dnum;
double *weights;
CFLOAT *aicache; /* buffer to keep one row of hessian */
double *xi_fullset; /* buffer for storing xi on full sample in loo */
double *a_fullset; /* buffer for storing alpha on full sample in loo */
TIMING timing_profile;
SHRINK_STATE shrink_state;
runtime_start=get_runtime();
timing_profile.time_kernel=0;
timing_profile.time_opti=0;
timing_profile.time_shrink=0;
timing_profile.time_update=0;
timing_profile.time_model=0;
timing_profile.time_check=0;
timing_profile.time_select=0;
kernel_cache_statistic=0;
learn_parm->totwords=totwords;
/* make sure -n value is reasonable */
if((learn_parm->svm_newvarsinqp < 2)
|| (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
}
init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
label = (long *)my_malloc(sizeof(long)*totdoc);
inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
c = (double *)my_malloc(sizeof(double)*totdoc);
a = (double *)my_malloc(sizeof(double)*totdoc);
a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
lin = (double *)my_malloc(sizeof(double)*totdoc);
learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
model->at_upper_bound=0;
model->b=0;
model->supvec[0]=0; /* element 0 reserved and empty for now */
model->alpha[0]=0;
model->lin_weights=NULL;
model->totwords=totwords;
model->totdoc=totdoc;
model->kernel_parm=(*kernel_parm);
model->sv_num=1;
model->loo_error=-1;
model->loo_recall=-1;
model->loo_precision=-1;
model->xa_error=-1;
model->xa_recall=-1;
model->xa_precision=-1;
inconsistentnum=0;
transduction=0;
r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
r_delta_sq=r_delta*r_delta;
r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
if(learn_parm->svm_c == 0.0) { /* default value for C */
learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
if(verbosity>=1)
printf("Setting default regularization parameter C=%.4f\n",
learn_parm->svm_c);
}
learn_parm->eps=-1.0; /* equivalent regression epsilon for
classification */
for(i=0;i<totdoc;i++) { /* various inits */
docs[i]->docnum=i;
inconsistent[i]=0;
a[i]=0;
lin[i]=0;
c[i]=0.0;
unlabeled[i]=0;
if(class[i] == 0) {
unlabeled[i]=1;
label[i]=0;
transduction=1;
}
if(class[i] > 0) {
learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
docs[i]->costfactor;
label[i]=1;
trainpos++;
}
else if(class[i] < 0) {
learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
label[i]=-1;
trainneg++;
}
else {
learn_parm->svm_cost[i]=0;
}
}
if(verbosity>=2) {
printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout);
}
/* caching makes no sense for linear kernel */
if(kernel_parm->kernel_type == LINEAR) {
kernel_cache = NULL;
}
/* compute starting state for initial alpha values */
if(alpha) {
if(verbosity>=1) {
printf("Computing starting state..."); fflush(stdout);
}
index = (long *)my_malloc(sizeof(long)*totdoc);
index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
weights=(double *)my_malloc(sizeof(double)*(totwords+1));
aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
index[i]=1;
alpha[i]=fabs(alpha[i]);
if(alpha[i]<0) alpha[i]=0;
if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
}
if(kernel_parm->kernel_type != LINEAR) {
for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
&& (kernel_cache_space_available(kernel_cache)))
cache_kernel_row(kernel_cache,docs,i,kernel_parm);
for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
if((alpha[i]==learn_parm->svm_cost[i])
&& (kernel_cache_space_available(kernel_cache)))
cache_kernel_row(kernel_cache,docs,i,kernel_parm);
}
(void)compute_index(index,totdoc,index2dnum);
update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
totwords,kernel_parm,kernel_cache,lin,aicache,
weights);
(void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
learn_parm,index2dnum,index2dnum,model);
for(i=0;i<totdoc;i++) { /* copy initial alphas */
a[i]=alpha[i];
}
free(index);
free(index2dnum);
free(weights);
free(aicache);
if(verbosity>=1) {
printf("done.\n"); fflush(stdout);
}
}
if(transduction) {
learn_parm->svm_iter_to_shrink=99999999;
if(verbosity >= 1)
printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n");
}
if(transduction && learn_parm->compute_loo) {
learn_parm->compute_loo=0;
if(verbosity >= 1)
printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n");
}
if(learn_parm->remove_inconsistent && learn_parm->compute_loo) {
learn_parm->compute_loo=0;
printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n");
}
if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) {
learn_parm->compute_loo=0;
printf("\nCannot compute leave-one-out with only one example in one class.\n\n");
}
if(verbosity==1) {
printf("Optimizing"); fflush(stdout);
}
/* train the svm */
iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
kernel_parm,kernel_cache,&shrink_state,model,
inconsistent,unlabeled,a,lin,
c,&timing_profile,
&maxdiff,(long)-1,
(long)1);
if(verbosity>=1) {
if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
misclassified=0;
for(i=0;(i<totdoc);i++) { /* get final statistic */
if((lin[i]-model->b)*(double)label[i] <= 0.0)
misclassified++;
}
printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n",
misclassified,maxdiff);
runtime_end=get_runtime();
if(verbosity>=2) {
printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
((float)runtime_end-(float)runtime_start)/100.0,
(100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
}
else {
printf("Runtime in cpu-seconds: %.2f\n",
(runtime_end-runtime_start)/100.0);
}
if(learn_parm->remove_inconsistent) {
inconsistentnum=0;
for(i=0;i<totdoc;i++)
if(inconsistent[i])
inconsistentnum++;
printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
model->sv_num-1,inconsistentnum);
}
else {
upsupvecnum=0;
for(i=1;i<model->sv_num;i++) {
if(fabs(model->alpha[i]) >=
(learn_parm->svm_cost[(model->supvec[i])->docnum]-
learn_parm->epsilon_a))
upsupvecnum++;
}
printf("Number of SV: %ld (including %ld at upper bound)\n",
model->sv_num-1,upsupvecnum);
}
if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
loss=0;
model_length=0;
for(i=0;i<totdoc;i++) {
if((lin[i]-model->b)*(double)label[i] < 1.0-learn_parm->epsilon_crit)
loss+=1.0-(lin[i]-model->b)*(double)label[i];
model_length+=a[i]*label[i]*lin[i];
}
model_length=sqrt(model_length);
fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
example_length=estimate_sphere(model,kernel_parm);
fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
length_of_longest_document_vector(docs,totdoc,kernel_parm));
fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n",
estimate_margin_vcdim(model,model_length,example_length,
kernel_parm));
if((!learn_parm->remove_inconsistent) && (!transduction)) {
runtime_start_xa=get_runtime();
if(verbosity>=1) {
printf("Computing XiAlpha-estimates..."); fflush(stdout);
}
compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a,
kernel_parm,learn_parm,&(model->xa_error),
&(model->xa_recall),&(model->xa_precision));
if(verbosity>=1) {
printf("done\n");
}
printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n",
(get_runtime()-runtime_start_xa)/100.0);
fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n",
model->xa_error,learn_parm->rho,learn_parm->xa_depth);
fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n",
model->xa_recall,learn_parm->rho,learn_parm->xa_depth);
fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n",
model->xa_precision,learn_parm->rho,learn_parm->xa_depth);
}
else if(!learn_parm->remove_inconsistent) {
estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin);
}
}
if(verbosity>=1) {
printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
}
}
/* leave-one-out testing starts now */
if(learn_parm->compute_loo) {
/* save results of training on full dataset for leave-one-out */
runtime_start_loo=get_runtime();
for(i=0;i<totdoc;i++) {
xi_fullset[i]=1.0-((lin[i]-model->b)*(double)label[i]);
if(xi_fullset[i]<0) xi_fullset[i]=0;
a_fullset[i]=a[i];
}
if(verbosity>=1) {
printf("Computing leave-one-out");
}
/* repeat this loop for every held-out example */
for(heldout=0;(heldout<totdoc);heldout++) {
if(learn_parm->rho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -