📄 svm_learn.c
字号:
learn_parm,inconsistent,active2dnum, working2dnum,selcrit,selexam,kernel_cache, key,chosen); choosenum+=already_chosen; } choosenum+=select_next_qp_subproblem_grad( label,unlabeled,a,lin,c,totdoc, minl(learn_parm->svm_maxqpsize-choosenum, learn_parm->svm_newvarsinqp-already_chosen), learn_parm,inconsistent,active2dnum, working2dnum,selcrit,selexam,kernel_cache,key, chosen); } else { /* once in a while, select a somewhat random working set to get unlocked of infinite loops due to numerical inaccuracies in the core qp-solver */ choosenum+=select_next_qp_subproblem_rand( label,unlabeled,a,lin,c,totdoc, minl(learn_parm->svm_maxqpsize-choosenum, learn_parm->svm_newvarsinqp), learn_parm,inconsistent,active2dnum, working2dnum,selcrit,selexam,kernel_cache,key, chosen,iteration); } } if(verbosity>=2) { printf(" %ld vectors chosen\n",choosenum); fflush(stdout); } if(verbosity>=2) t1=get_runtime(); if(kernel_cache) cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, choosenum,kernel_parm); if(verbosity>=2) t2=get_runtime(); if(retrain != 2) { optimize_svm(docs,label,unlabeled,chosen,active2dnum,model,totdoc, working2dnum,choosenum,a,lin,c,learn_parm,aicache, kernel_parm,&qp,&epsilon_crit_org); } if(verbosity>=2) t3=get_runtime(); update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, totwords,kernel_parm,kernel_cache,lin,aicache, weights); if(verbosity>=2) t4=get_runtime(); supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, learn_parm,working2dnum,active2dnum,model); if(verbosity>=2) t5=get_runtime(); /* The following computation of the objective function works only */ /* relative to the active variables */ if(verbosity>=3) { criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, active2dnum); printf("Objective function (over active variables): %.16f\n",criterion); fflush(stdout); } for(jj=0;(i=working2dnum[jj])>=0;jj++) { a_old[i]=a[i]; } if(retrain == 2) { /* reset inconsistent unlabeled examples */ for(i=0;(i<totdoc);i++) { if(inconsistent[i] && unlabeled[i]) { inconsistent[i]=0; label[i]=0; } } } retrain=check_optimality(model,label,unlabeled,a,lin,c,totdoc,learn_parm, maxdiff,epsilon_crit_org,&misclassified, inconsistent,active2dnum,last_suboptimal_at, iteration,kernel_parm); if(verbosity>=2) { t6=get_runtime(); timing_profile->time_select+=t1-t0; timing_profile->time_kernel+=t2-t1; timing_profile->time_opti+=t3-t2; timing_profile->time_update+=t4-t3; timing_profile->time_model+=t5-t4; timing_profile->time_check+=t6-t5; } noshrink=0; if((!retrain) && (inactivenum>0) && ((!learn_parm->skip_final_opt_check) || (kernel_parm->kernel_type == LINEAR))) { if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) || (verbosity>=2)) { if(verbosity==1) { printf("\n"); } printf(" Checking optimality of inactive variables..."); fflush(stdout); } t1=get_runtime(); reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, totwords,iteration,learn_parm,inconsistent, docs,kernel_parm,kernel_cache,model,aicache, weights,maxdiff); /* Update to new active variables. */ activenum=compute_index(shrink_state->active,totdoc,active2dnum); inactivenum=totdoc-activenum; /* termination criterion */ noshrink=1; retrain=0; if((*maxdiff) > learn_parm->epsilon_crit) retrain=1; timing_profile->time_shrink+=get_runtime()-t1; if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) || (verbosity>=2)) { printf("done.\n"); fflush(stdout); printf(" Number of inactive variables = %ld\n",inactivenum); } } if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) learn_parm->epsilon_crit=(*maxdiff); if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { learn_parm->epsilon_crit/=2.0; retrain=1; noshrink=1; } if(learn_parm->epsilon_crit<epsilon_crit_org) learn_parm->epsilon_crit=epsilon_crit_org; if(verbosity>=2) { printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", supvecnum,model->at_upper_bound,(*maxdiff)); fflush(stdout); } if(verbosity>=3) { printf("\n"); } if((!retrain) && (transduction)) { for(i=0;(i<totdoc);i++) { shrink_state->active[i]=1; } activenum=compute_index(shrink_state->active,totdoc,active2dnum); inactivenum=0; if(verbosity==1) printf("done\n"); retrain=incorporate_unlabeled_examples(model,label,inconsistent, unlabeled,a,lin,totdoc, selcrit,selexam,key, transductcycle,kernel_parm, learn_parm); epsilon_crit_org=learn_parm->epsilon_crit; if(kernel_parm->kernel_type == LINEAR) learn_parm->epsilon_crit=1; transductcycle++; } else if(((iteration % 10) == 0) && (!noshrink)) { activenum=shrink_problem(learn_parm,shrink_state,active2dnum,last_suboptimal_at, iteration,totdoc,maxl((long)(activenum/10),100), a,inconsistent); inactivenum=totdoc-activenum; if((kernel_cache) && (supvecnum>kernel_cache->max_elems) && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { kernel_cache_shrink(kernel_cache,totdoc,maxl((long)(activenum/10),500), shrink_state->active); } } if((!retrain) && learn_parm->remove_inconsistent) { if(verbosity>=1) { printf(" Moving training errors to inconsistent examples..."); fflush(stdout); } if(learn_parm->remove_inconsistent == 1) { retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm, &inconsistentnum,inconsistent); } else if(learn_parm->remove_inconsistent == 2) { retrain=identify_misclassified(lin,label,unlabeled,totdoc, model,&inconsistentnum,inconsistent); } else if(learn_parm->remove_inconsistent == 3) { retrain=identify_one_misclassified(lin,label,unlabeled,totdoc, model,&inconsistentnum,inconsistent); } if(retrain) { if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */ learn_parm->epsilon_crit=2.0; } } if(verbosity>=1) { printf("done.\n"); if(retrain) { printf(" Now %ld inconsistent examples.\n",inconsistentnum); } } } } /* end of loop */ free(chosen); free(last_suboptimal_at); free(key); free(selcrit); free(selexam); free(a_old); free(aicache); free(working2dnum); free(active2dnum); free(qp.opt_ce); free(qp.opt_ce0); free(qp.opt_g); free(qp.opt_g0); free(qp.opt_xinit); free(qp.opt_low); free(qp.opt_up); free(weights); learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ return(iteration);}double compute_objective_function(double *a, double *lin, double *c, double eps, long int *label, long int *active2dnum) /* Return value of objective function. */ /* Works only relative to the active variables! */{ long i,ii; double criterion; /* calculate value of objective function */ criterion=0; for(ii=0;active2dnum[ii]>=0;ii++) { i=active2dnum[ii]; criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i]; } return(criterion);}void clear_index(long int *index) /* initializes and empties index */{ index[0]=-1;} void add_to_index(long int *index, long int elem) /* initializes and empties index */{ register long i; for(i=0;index[i] != -1;i++); index[i]=elem; index[i+1]=-1;}long compute_index(long int *binfeature, long int range, long int *index) /* create an inverted index of binfeature */{ register long i,ii; ii=0; for(i=0;i<range;i++) { if(binfeature[i]) { index[ii]=i; ii++; } } for(i=0;i<4;i++) { index[ii+i]=-1; } return(ii);}void optimize_svm(DOC *docs, long int *label, long int *unlabeled, long int *chosen, long int *active2dnum, MODEL *model, long int totdoc, long int *working2dnum, long int varnum, double *a, double *lin, double *c, LEARN_PARM *learn_parm, float *aicache, KERNEL_PARM *kernel_parm, QP *qp, double *epsilon_crit_target) /* Do optimization on the working set. */{ long i; double *a_v; compute_matrices_for_optimization(docs,label,unlabeled,chosen,active2dnum, working2dnum,model,a,lin,c,varnum, totdoc,learn_parm,aicache,kernel_parm, qp); if(verbosity>=3) { printf("Running optimizer..."); fflush(stdout); } /* call the qp-subsolver */ a_v=optimize_qp(qp,epsilon_crit_target, learn_parm->svm_maxqpsize, &(model->b), /* in case the optimizer gives us */ /* the threshold for free. otherwise */ /* b is calculated in calculate_model. */ learn_parm); if(verbosity>=3) { printf("done\n"); } for(i=0;i<varnum;i++) { a[working2dnum[i]]=a_v[i]; /* if(a_v[i]<=(0+learn_parm->epsilon_a)) { a[working2dnum[i]]=0; } else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) { a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]]; } */ }}void compute_matrices_for_optimization(DOC *docs, long int *label, long int *unlabeled, long int *chosen, long int *active2dnum, long int *key, MODEL *model, double *a, double *lin, double *c, long int varnum, long int totdoc, LEARN_PARM *learn_parm, float *aicache, KERNEL_PARM *kernel_parm, QP *qp){ register long ki,kj,i,j; register double kernel_temp; if(verbosity>=3) { fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const); fflush(stdout); } qp->opt_n=varnum; qp->opt_ce0[0]=0; /* compute the constant for equality constraint */ for(j=1;j<model->sv_num;j++) { /* start at 1 */ if(!chosen[(model->supvec[j])->docnum]) { qp->opt_ce0[0]+=model->alpha[j]; } } if(learn_parm->biased_hyperplane) qp->opt_m=1; else qp->opt_m=0; /* eq-constraint will be ignored */ /* init linear part of objective function */ for(i=0;i<varnum;i++) { qp->opt_g0[i]=lin[key[i]]; } for(i=0;i<varnum;i++) { ki=key[i]; /* Compute the matrix for equality constraints */ qp->opt_ce[i]=label[ki]; qp->opt_low[i]=0; qp->opt_up[i]=learn_parm->svm_cost[ki]; kernel_temp=(double)kernel(kernel_parm,&(docs[ki]),&(docs[ki])); /* compute linear part of objective function */ qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]); /* compute quadratic part of objective function */ qp->opt_g[varnum*i+i]=kernel_temp; for(j=i+1;j<varnum;j++) { kj=key[j]; kernel_temp=(double)kernel(kernel_parm,&(docs[ki]),&(docs[kj])); /* compute linear part of objective function */ qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]); qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]); /* compute quadratic part of objective function */ qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp; qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp; } if(verbosity>=3) { if(i % 20 == 0) { fprintf(stdout,"%ld..",i); fflush(stdout); } } } for(i=0;i<varnum;i++) { /* assure starting at feasible point */ qp->opt_xinit[i]=a[key[i]]; /* set linear part of objective function */ qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]]; } if(verbosity>=3) { fprintf(stdout,"done\n"); }}long calculate_svm_model(DOC *docs, long int *label, long int *unlabeled, double *lin, double *a, double *a_old, double *c, LEARN_PARM *learn_parm, long int *working2dnum, long int *active2dnum, MODEL *model) /* Compute decision function based on current values */ /* of alpha. */{ long i,ii,pos,b_calculated=0,first_low,first_high; double ex_c,b_temp,b_low,b_high; if(verbosity>=3) { printf("Calculating model..."); fflush(stdout); } if(!learn_parm->biased_hyperplane) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -