📄 svm_learn.c
字号:
model->at_upper_bound,svsetnum); fprintf(stdout,"L1 loss: loss=%.5f\n",loss); } else { upsupvecnum=0; for(i=1;i<model->sv_num;i++) { if(fabs(model->alpha[i]) >= (learn_parm->svm_cost[(model->supvec[i])->docnum]- learn_parm->epsilon_a)) upsupvecnum++; } printf("Number of SV: %ld (including %ld at upper bound)\n", model->sv_num-1,upsupvecnum); fprintf(stdout,"L1 loss: loss=%.5f\n",loss); } example_length=estimate_sphere(model,kernel_parm); fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", length_of_longest_document_vector(docs,totdoc,kernel_parm)); } if(verbosity>=1) { printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); } if(alpha) { for(i=0;i<totdoc;i++) { /* copy final alphas */ alpha[i]=a[i]; } } if(learn_parm->alphafile[0]) write_alphas(learn_parm->alphafile,a,label,totdoc); shrink_state_cleanup(&shrink_state); free(label); free(unlabeled); free(inconsistent); free(c); free(a); free(lin); free(learn_parm->svm_cost);}long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, long int totwords, LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm, KERNEL_CACHE *kernel_cache, SHRINK_STATE *shrink_state, MODEL *model, long int *inconsistent, long int *unlabeled, double *a, double *lin, double *c, TIMING *timing_profile, double *maxdiff, long int heldout, long int retrain) /* docs: Training vectors (x-part) */ /* label: Training labels/value (y-part, zero if test example for transduction) */ /* totdoc: Number of examples in docs/label */ /* totwords: Number of features (i.e. highest feature index) */ /* laern_parm: Learning paramenters */ /* kernel_parm: Kernel paramenters */ /* kernel_cache: Initialized/partly filled Cache, if using a kernel. NULL if linear. */ /* shrink_state: State of active variables */ /* model: Returns learning result */ /* inconsistent: examples thrown out as inconstistent */ /* unlabeled: test examples for transduction */ /* a: alphas */ /* lin: linear component of gradient */ /* c: right hand side of inequalities (margin) */ /* maxdiff: returns maximum violation of KT-conditions */ /* heldout: marks held-out example for leave-one-out (or -1) */ /* retrain: selects training mode (1=regular / 2=holdout) */{ long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; long inconsistentnum,choosenum,already_chosen=0,iteration; long misclassified,supvecnum=0,*active2dnum,inactivenum; long *working2dnum,*selexam; long activenum; double criterion,eq; double *a_old; long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ long transductcycle; long transduction; double epsilon_crit_org; double bestmaxdiff; long bestmaxdiffiter,terminate; double *selcrit; /* buffer for sorting */ CFLOAT *aicache; /* buffer to keep one row of hessian */ double *weights; /* buffer for weight vector in linear case */ QP qp; /* buffer for one quadratic program */ epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ if(kernel_parm->kernel_type == LINEAR) { learn_parm->epsilon_crit=2.0; kernel_cache=NULL; /* caching makes no sense for linear kernel */ } learn_parm->epsilon_shrink=2; (*maxdiff)=1; learn_parm->totwords=totwords; chosen = (long *)my_malloc(sizeof(long)*totdoc); last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); key = (long *)my_malloc(sizeof(long)*(totdoc+11)); selcrit = (double *)my_malloc(sizeof(double)*totdoc); selexam = (long *)my_malloc(sizeof(long)*totdoc); a_old = (double *)my_malloc(sizeof(double)*totdoc); aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_ce0 = (double *)my_malloc(sizeof(double)); qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize *learn_parm->svm_maxqpsize); qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); weights=(double *)my_malloc(sizeof(double)*(totwords+1)); choosenum=0; inconsistentnum=0; transductcycle=0; transduction=0; if(!retrain) retrain=1; iteration=1; bestmaxdiffiter=1; bestmaxdiff=999999999; terminate=0; if(kernel_cache) { kernel_cache->time=iteration; /* for lru cache */ kernel_cache_reset_lru(kernel_cache); } for(i=0;i<totdoc;i++) { /* various inits */ chosen[i]=0; a_old[i]=a[i]; last_suboptimal_at[i]=1; if(inconsistent[i]) inconsistentnum++; if(unlabeled[i]) { transduction=1; } } activenum=compute_index(shrink_state->active,totdoc,active2dnum); inactivenum=totdoc-activenum; clear_index(working2dnum); /* repeat this loop until we have convergence */ for(;retrain && (!terminate);iteration++) { if(kernel_cache) kernel_cache->time=iteration; /* for lru cache */ if(verbosity>=2) { printf( "Iteration %ld: ",iteration); fflush(stdout); } else if(verbosity==1) { printf("."); fflush(stdout); } if(verbosity>=2) t0=get_runtime(); if(verbosity>=3) { printf("\nSelecting working set... "); fflush(stdout); } if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; i=0; for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ if((chosen[j]>=(learn_parm->svm_maxqpsize/ minl(learn_parm->svm_maxqpsize, learn_parm->svm_newvarsinqp))) || (inconsistent[j]) || (j == heldout)) { chosen[j]=0; choosenum--; } else { chosen[j]++; working2dnum[i++]=j; } } working2dnum[i]=-1; if(retrain == 2) { choosenum=0; for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ chosen[j]=0; } clear_index(working2dnum); for(i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */ if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) { chosen[i]=99999; choosenum++; a[i]=0; } } if(learn_parm->biased_hyperplane) { eq=0; for(i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */ eq+=a[i]*label[i]; } for(i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) { if((eq*label[i] > 0) && (a[i] > 0)) { chosen[i]=88888; choosenum++; if((eq*label[i]) > a[i]) { eq-=(a[i]*label[i]); a[i]=0; } else { a[i]-=(eq*label[i]); eq=0; } } } } compute_index(chosen,totdoc,working2dnum); } else { /* select working set according to steepest gradient */ if(iteration % 101) { already_chosen=0; if((minl(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4) && (kernel_parm->kernel_type != LINEAR)) { /* select part of the working set from cache */ already_chosen=select_next_qp_subproblem_grad( label,unlabeled,a,lin,c,totdoc, (long)(minl(learn_parm->svm_maxqpsize-choosenum, learn_parm->svm_newvarsinqp) /2), learn_parm,inconsistent,active2dnum, working2dnum,selcrit,selexam,kernel_cache,1, key,chosen); choosenum+=already_chosen; } choosenum+=select_next_qp_subproblem_grad( label,unlabeled,a,lin,c,totdoc, minl(learn_parm->svm_maxqpsize-choosenum, learn_parm->svm_newvarsinqp-already_chosen), learn_parm,inconsistent,active2dnum, working2dnum,selcrit,selexam,kernel_cache,0,key, chosen); } else { /* once in a while, select a somewhat random working set to get unlocked of infinite loops due to numerical inaccuracies in the core qp-solver */ choosenum+=select_next_qp_subproblem_rand( label,unlabeled,a,lin,c,totdoc, minl(learn_parm->svm_maxqpsize-choosenum, learn_parm->svm_newvarsinqp), learn_parm,inconsistent,active2dnum, working2dnum,selcrit,selexam,kernel_cache,key, chosen,iteration); } } if(verbosity>=2) { printf(" %ld vectors chosen\n",choosenum); fflush(stdout); } if(verbosity>=2) t1=get_runtime(); if(kernel_cache) cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, choosenum,kernel_parm); if(verbosity>=2) t2=get_runtime(); if(retrain != 2) { optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum, model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, aicache,kernel_parm,&qp,&epsilon_crit_org); } if(verbosity>=2) t3=get_runtime(); update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, totwords,kernel_parm,kernel_cache,lin,aicache, weights); if(verbosity>=2) t4=get_runtime(); supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, learn_parm,working2dnum,active2dnum,model); if(verbosity>=2) t5=get_runtime(); /* The following computation of the objective function works only */ /* relative to the active variables */ if(verbosity>=3) { criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, active2dnum); printf("Objective function (over active variables): %.16f\n",criterion); fflush(stdout); } for(jj=0;(i=working2dnum[jj])>=0;jj++) { a_old[i]=a[i]; } if(retrain == 2) { /* reset inconsistent unlabeled examples */ for(i=0;(i<totdoc);i++) { if(inconsistent[i] && unlabeled[i]) { inconsistent[i]=0; label[i]=0; } } } retrain=check_optimality(model,label,unlabeled,a,lin,c,totdoc,learn_parm, maxdiff,epsilon_crit_org,&misclassified, inconsistent,active2dnum,last_suboptimal_at, iteration,kernel_parm); if(verbosity>=2) { t6=get_runtime(); timing_profile->time_select+=t1-t0; timing_profile->time_kernel+=t2-t1; timing_profile->time_opti+=t3-t2; timing_profile->time_update+=t4-t3; timing_profile->time_model+=t5-t4; timing_profile->time_check+=t6-t5; } /* checking whether optimizer got stuck */ if((*maxdiff) < bestmaxdiff) { bestmaxdiff=(*maxdiff); bestmaxdiffiter=iteration; } if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { /* long time no progress? */ terminate=1; retrain=0; if(verbosity>=1) printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); } noshrink=0; if((!retrain) && (inactivenum>0) && ((!learn_parm->skip_final_opt_check) || (kernel_parm->kernel_type == LINEAR))) { if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) || (verbosity>=2)) { if(verbosity==1) { printf("\n"); } printf(" Checking optimality of inactive variables..."); fflush(stdout); } t1=get_runtime(); reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, totwords,iteration,learn_parm,inconsistent, docs,kernel_parm,kernel_cache,model,aicache, weights,maxdiff); /* Update to new active variables. */ activenum=compute_index(shrink_state->active,totdoc,active2dnum); inactivenum=totdoc-activenum; /* reset watchdog */ bestmaxdiff=(*maxdiff); bestmaxdiffiter=iteration; /* termination criterion */ noshrink=1; retrain=0; if((*maxdiff) > learn_parm->epsilon_crit) retrain=1; timing_profile->time_shrink+=get_runtime()-t1; if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) || (verbosity>=2)) { printf("done.\n"); fflush(stdout); printf(" Number of inactive variables = %ld\n",inactivenum); } } if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) learn_parm->epsilon_crit=(*maxdiff);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -