📄 svm_learn.c
字号:
if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { learn_parm->epsilon_crit/=2.0; retrain=1; noshrink=1; } if(learn_parm->epsilon_crit<epsilon_crit_org) learn_parm->epsilon_crit=epsilon_crit_org; if(verbosity>=2) { printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", supvecnum,model->at_upper_bound,(*maxdiff)); fflush(stdout); } if(verbosity>=3) { printf("\n"); } if((!retrain) && (transduction)) { for(i=0;(i<totdoc);i++) { shrink_state->active[i]=1; } activenum=compute_index(shrink_state->active,totdoc,active2dnum); inactivenum=0; if(verbosity==1) printf("done\n"); retrain=incorporate_unlabeled_examples(model,label,inconsistent, unlabeled,a,lin,totdoc, selcrit,selexam,key, transductcycle,kernel_parm, learn_parm); epsilon_crit_org=learn_parm->epsilon_crit; if(kernel_parm->kernel_type == LINEAR) learn_parm->epsilon_crit=1; transductcycle++; /* reset watchdog */ bestmaxdiff=(*maxdiff); bestmaxdiffiter=iteration; } else if(((iteration % 10) == 0) && (!noshrink)) { activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm, active2dnum,last_suboptimal_at,iteration,totdoc, maxl((long)(activenum/10), maxl((long)(totdoc/500),100)), a,inconsistent); inactivenum=totdoc-activenum; if((kernel_cache) && (supvecnum>kernel_cache->max_elems) && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { kernel_cache_shrink(kernel_cache,totdoc, minl((kernel_cache->activenum-activenum), (kernel_cache->activenum-supvecnum)), shrink_state->active); } } if((!retrain) && learn_parm->remove_inconsistent) { if(verbosity>=1) { printf(" Moving training errors to inconsistent examples..."); fflush(stdout); } if(learn_parm->remove_inconsistent == 1) { retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm, &inconsistentnum,inconsistent); } else if(learn_parm->remove_inconsistent == 2) { retrain=identify_misclassified(lin,label,unlabeled,totdoc, model,&inconsistentnum,inconsistent); } else if(learn_parm->remove_inconsistent == 3) { retrain=identify_one_misclassified(lin,label,unlabeled,totdoc, model,&inconsistentnum,inconsistent); } if(retrain) { if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */ learn_parm->epsilon_crit=2.0; } } if(verbosity>=1) { printf("done.\n"); if(retrain) { printf(" Now %ld inconsistent examples.\n",inconsistentnum); } } } } /* end of loop */ free(chosen); free(last_suboptimal_at); free(key); free(selcrit); free(selexam); free(a_old); free(aicache); free(working2dnum); free(active2dnum); free(qp.opt_ce); free(qp.opt_ce0); free(qp.opt_g); free(qp.opt_g0); free(qp.opt_xinit); free(qp.opt_low); free(qp.opt_up); free(weights); learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ model->maxdiff=(*maxdiff); return(iteration);}long optimize_to_convergence_sharedslack(DOC **docs, long int *label, long int totdoc, long int totwords, LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm, KERNEL_CACHE *kernel_cache, SHRINK_STATE *shrink_state, MODEL *model, double *a, double *lin, double *c, TIMING *timing_profile, double *maxdiff) /* docs: Training vectors (x-part) */ /* label: Training labels/value (y-part, zero if test example for transduction) */ /* totdoc: Number of examples in docs/label */ /* totwords: Number of features (i.e. highest feature index) */ /* learn_parm: Learning paramenters */ /* kernel_parm: Kernel paramenters */ /* kernel_cache: Initialized/partly filled Cache, if using a kernel. NULL if linear. */ /* shrink_state: State of active variables */ /* model: Returns learning result */ /* a: alphas */ /* lin: linear component of gradient */ /* c: right hand side of inequalities (margin) */ /* maxdiff: returns maximum violation of KT-conditions */{ long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled; long *inconsistent,choosenum,already_chosen=0,iteration; long misclassified,supvecnum=0,*active2dnum,inactivenum; long *working2dnum,*selexam,*ignore; long activenum,retrain,maxslackid,slackset,jointstep; double criterion,eq_target; double *a_old,*alphaslack; long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ double epsilon_crit_org,maxsharedviol; double bestmaxdiff; long bestmaxdiffiter,terminate; double *selcrit; /* buffer for sorting */ CFLOAT *aicache; /* buffer to keep one row of hessian */ double *weights; /* buffer for weight vector in linear case */ QP qp; /* buffer for one quadratic program */ double *slack; /* vector of slack variables for optimization with shared slacks */ epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ if(kernel_parm->kernel_type == LINEAR) { learn_parm->epsilon_crit=2.0; kernel_cache=NULL; /* caching makes no sense for linear kernel */ } learn_parm->epsilon_shrink=2; (*maxdiff)=1; learn_parm->totwords=totwords; chosen = (long *)my_malloc(sizeof(long)*totdoc); unlabeled = (long *)my_malloc(sizeof(long)*totdoc); inconsistent = (long *)my_malloc(sizeof(long)*totdoc); ignore = (long *)my_malloc(sizeof(long)*totdoc); key = (long *)my_malloc(sizeof(long)*(totdoc+11)); selcrit = (double *)my_malloc(sizeof(double)*totdoc); selexam = (long *)my_malloc(sizeof(long)*totdoc); a_old = (double *)my_malloc(sizeof(double)*totdoc); aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_ce0 = (double *)my_malloc(sizeof(double)); qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize *learn_parm->svm_maxqpsize); qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); weights=(double *)my_malloc(sizeof(double)*(totwords+1)); maxslackid=0; for(i=0;i<totdoc;i++) { /* determine size of slack array */ if(maxslackid<docs[i]->slackid) maxslackid=docs[i]->slackid; } slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1)); for(i=0;i<=maxslackid;i++) { /* init shared slacks */ slack[i]=0; alphaslack[i]=0; last_suboptimal_at[i]=1; } choosenum=0; retrain=1; iteration=1; bestmaxdiffiter=1; bestmaxdiff=999999999; terminate=0; if(kernel_cache) { kernel_cache->time=iteration; /* for lru cache */ kernel_cache_reset_lru(kernel_cache); } for(i=0;i<totdoc;i++) { /* various inits */ chosen[i]=0; unlabeled[i]=0; inconsistent[i]=0; ignore[i]=0; a_old[i]=a[i]; } activenum=compute_index(shrink_state->active,totdoc,active2dnum); inactivenum=totdoc-activenum; clear_index(working2dnum); /* call to init slack and alphaslack */ compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, slack,alphaslack); /* repeat this loop until we have convergence */ for(;retrain && (!terminate);iteration++) { if(kernel_cache) kernel_cache->time=iteration; /* for lru cache */ if(verbosity>=2) { printf( "Iteration %ld: ",iteration); fflush(stdout); } else if(verbosity==1) { printf("."); fflush(stdout); } if(verbosity>=2) t0=get_runtime(); if(verbosity>=3) { printf("\nSelecting working set... "); fflush(stdout); } if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; /* select working set according to steepest gradient */ jointstep=0; eq_target=0; if(iteration % 101) { slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c, learn_parm,active2dnum,&maxsharedviol); if((iteration % 2) || (!slackset) || (maxsharedviol<learn_parm->epsilon_crit)){ /* do a step with examples from different slack sets */ if(verbosity >= 2) { printf("(i-step)"); fflush(stdout); } i=0; for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */ if((chosen[j]>=(learn_parm->svm_maxqpsize/ minl(learn_parm->svm_maxqpsize, learn_parm->svm_newvarsinqp)))) { chosen[j]=0; choosenum--; } else { chosen[j]++; working2dnum[i++]=j; } } working2dnum[i]=-1; already_chosen=0; if((minl(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4) && (kernel_parm->kernel_type != LINEAR)) { /* select part of the working set from cache */ already_chosen=select_next_qp_subproblem_grad( label,unlabeled,a,lin,c,totdoc, (long)(minl(learn_parm->svm_maxqpsize-choosenum, learn_parm->svm_newvarsinqp) /2), learn_parm,inconsistent,active2dnum, working2dnum,selcrit,selexam,kernel_cache, (long)1,key,chosen); choosenum+=already_chosen; } choosenum+=select_next_qp_subproblem_grad( label,unlabeled,a,lin,c,totdoc, minl(learn_parm->svm_maxqpsize-choosenum, learn_parm->svm_newvarsinqp-already_chosen), learn_parm,inconsistent,active2dnum, working2dnum,selcrit,selexam,kernel_cache, (long)0,key,chosen); } else { /* do a step with all examples from same slack set */ if(verbosity >= 2) { printf("(j-step on %ld)",slackset); fflush(stdout); } jointstep=1; for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ chosen[j]=0; } working2dnum[0]=-1; eq_target=alphaslack[slackset]; for(j=0;j<totdoc;j++) { /* mask all but slackset */ /* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */ if(docs[j]->slackid != slackset) ignore[j]=1; else { ignore[j]=0; learn_parm->svm_cost[j]=learn_parm->svm_c; /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */ } } learn_parm->biased_hyperplane=1; choosenum=select_next_qp_subproblem_grad( label,unlabeled,a,lin,c,totdoc, learn_parm->svm_maxqpsize, learn_parm,ignore,active2dnum, working2dnum,selcrit,selexam,kernel_cache, (long)0,key,chosen); learn_parm->biased_hyperplane=0; } } else { /* once in a while, select a somewhat random working set to get unlocked of infinite loops due to numerical inaccuracies in the core qp-solver */ choosenum+=select_next_qp_subproblem_rand( label,unlabeled,a,lin,c,totdoc, minl(learn_parm->svm_maxqpsize-choosenum, learn_parm->svm_newvarsinqp), learn_parm,inconsistent,active2dnum, working2dnum,selcrit,selexam,kernel_cache,key, chosen,iteration); } if(verbosity>=2) { printf(" %ld vectors chosen\n",choosenum); fflush(stdout); } if(verbosity>=2) t1=get_runtime(); if(kernel_cache) cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, choosenum,kernel_parm); if(verbosity>=2) t2=get_runtime(); if(jointstep) learn_parm->biased_hyperplane=1; optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum, model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, aicache,kernel_parm,&qp,&epsilon_crit_org); learn_parm->biased_hyperplane=0; for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */ alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]); for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill constraints */ if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) { if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) { alphaslack[docs[i]->slackid]-=a[i]; a[i]=0; } else { a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c); alphaslack[docs[i]->slackid]=learn_parm->svm_c; } } } for(jj=0;(i=active2dnum[jj])>=0;jj++) learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c -alphaslack[docs[i]->slackid]); if(verbosity>=2) t3=get_runtime(); update_linear
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -