📄 svm_loqo.c
字号:
/* Copyright (C) 1999 Greg Schohn - gcs@jprc.com *//* ********************* svm_thorsten.c ********************** * Based on Thorsten Joachim's "Making large-Scale SVM * Learning Practical" * (http://www-ai.cs.uni-dortmund.de/DOKUMENTE/joachims_99a.ps.gz) * * This version does not do shrinking. This also is dependent * upon Alex Smola's pr_loqo solver (see README_SVM). */#include <bow/svm.h>#define INIT_SIGDIGIT 15 /* precision that pr_loqo will start with */#define LOOSE2LIVE 1000 /* # of iterations that pr_loqo should * spin with a loose precision *//* should use the selection algorithm described on pg 44 of joachim's ch. 11 *//* returns the # of items placed into the ws vector, the elements are returned * in sorted order *//* s is the s(t) from 11.36, the gradient of a(t) *//* n must be multiple of 4 */int get_ws(int *ws, int *y, double *a, double *s, float *cvect, int total, int old_n, int n, struct di *scratch) { int npicked; int nws; int *oldws; char *picked; double tmp; int i,j,k; npicked = 0; oldws = alloca(sizeof(int)*n); picked = alloca(sizeof(char)*total); bzero(picked, sizeof(char)*total); for (i=0; i<n; i++) { oldws[i] = ws[i]; } /* this fills in half - the half with the old ones... */ for (j=1; j>(-2); j-=2) { /* go thru twice, each time filling up n/4 elements */ for(i=0, nws=0; i<old_n; i++) { /* only add those elements which satisfy 11.21 & 22 */ tmp = j*y[oldws[i]]; /* follow DIRECTLY from the logic in 11.3 of adv. kernel methods */ /* the d_i = y_i case happens first (where the elements with the LARGEST * V(_) are chosen, then the d_i = -y_i are chosen next, where the SMALLEST * are chosen */ if (((a[oldws[i]]>svm_epsilon_a) && (a[oldws[i]]<cvect[oldws[i]]-svm_epsilon_a)) || ((a[oldws[i]]<=svm_epsilon_a) && (tmp>0)) || ((a[oldws[i]]>=cvect[oldws[i]]-svm_epsilon_a) && (tmp<0))) { /* use tmp instead of y[i] so that we can still pull things off of * the front of the list (like choosing a different sort fn) */ scratch[nws].d = tmp*(-1+y[oldws[i]]*s[oldws[i]]); /* look familiar? (g(a)) */ scratch[nws].i = oldws[i]; nws++; } } /* this used to be qsort, but nws can be extremely large */ get_top_n(scratch, nws, n); /* k counts the number of things added */ for (i=k=0; (k<n/4) && (i<nws); i++) { if (!picked[scratch[i].i]) { ws[npicked] = scratch[i].i; picked[scratch[i].i] = 1; npicked++; k++; } } } for (j=1; j>(-2); j-=2) { /* go thru twice, each time filling up n/4 elements */ for(i=0, nws=0; i<total; i++) { /* only add those elements which satisfy 11.21 & 22 */ tmp = j*y[i]; /* follow DIRECTLY from the logic in 11.3 of adv. kernel methods */ /* the d_i = y_i case happens first (where the elements with the LARGEST * V(_) are chosen, then the d_i = -y_i are chosen next, where the SMALLEST * are chosen */ if (((a[i]>svm_epsilon_a) && (a[i]<cvect[i]-svm_epsilon_a)) || ((a[i]<=svm_epsilon_a) && (tmp>0)) || ((a[i]>=cvect[i]-svm_epsilon_a) && (tmp<0))) { /* use tmp instead of y[i] so that we can still pull things off of * the front of the list (like choosing a different sort fn) */ scratch[nws].d = tmp*(-1+y[i]*s[i]); /* look familiar? (g(a)) */ scratch[nws].i = i; nws++; } } /* this used to be qsort, but nws can be extremely large */ get_top_n(scratch, nws, n); /* k counts the number of things added */ for (i=k=0; (k<n/4) && (i<nws); i++) { if (!picked[scratch[i].i]) { ws[npicked] = scratch[i].i; picked[scratch[i].i] = 1; npicked++; k++; } } } if (npicked < n) { n = npicked; } qsort(ws, n, sizeof(int), i_cmp); if (svm_verbosity > 1) { int ii; fprintf(stderr,"working set: "); for (ii=0; ii<n;ii++) { fprintf(stderr,"%d ",ws[ii]); } fprintf(stderr,"\n"); fprintf(stderr,"s[ws[*]]: "); for (ii=0; ii<n;ii++) { fprintf(stderr,"%f ",s[ws[ii]]); } fprintf(stderr,"\n"); } return n;}static double calculate_obj(struct svm_qp *q, double *a, int n) { double obj; int i, j; obj = 0.0; for (i=0; i<n; i++) { /* "linear part" "quadratic" part across the diagonal */ obj += (q->g0[i]*a[i]) + (.5*a[i]*a[i]*q->g[i*n+i]); /* since its sym. only go thru once for each ind. & mult by 2 (the .5 goes to 1) */ for (j=0; j<i; j++) { obj += a[i]*a[j]*q->g[j*n+i]; } } return obj;}static int npr_loqo_failures=0; /* counts the number of times the objective has increased *//* calls pr_loqo & does the best error checking that it can (ie. the check's * that svmlight does... */int solve_qp(struct svm_qp *q, int n) { double dist; double epsilon_loqo; int iter; double margin; int result; double obj0, obj1; int i, j; result = !OPTIMAL_SOLUTION; /* calculate the objective value before loqo has a go at it */ obj0 = calculate_obj(q, q->init_a, n); /* still don't understand the margin stuff - just copied from svmlight */ for (iter=q->init_iter, margin=q->margin; (margin<=.9999999) && (result != OPTIMAL_SOLUTION); ) { /* note how m always == 1 & restart is always false */ result = pr_loqo(n, 1, q->g0, q->g, q->ce, q->ce0, q->lbv, q->ubv, q->primal, q->dual, svm_verbosity-4, (double) q->digits, iter, q->margin, q->bound, 0); if (isnan(q->dual[0])) { if (q->margin < .8) { q->margin = (margin*4+1.0)/5.0; } margin = (margin+1)/2.0; q->digits--; //printf("invalid dual, Reducing precision of solver (digits = %d).\n", q->digits); } else if (result != OPTIMAL_SOLUTION) { /* if there is some other problem */ iter += 2000; /* yaslh */ q->init_iter += 10; q->digits--; //printf(" (digits = %d).\n", q->digits); } } /* svmlight does this & it doesn't seem like a bad idea */ epsilon_loqo=1E-10; for(i=0; i<n; i++) { dist=-q->dual[0]*q->ce[i]; dist+=(q->g0[i]+1.0); for(j=0; j<i; j++) { dist += (q->primal[j]*q->g[j*n+i]); } for(j=i; j<n; j++) { dist += (q->primal[j]*q->g[i*n+j]); } if((q->primal[i]<(q->ubv[i]-epsilon_loqo)) && (dist < (1.0-svm_epsilon_crit))) { fprintf(stderr, "relaxing epsilon_loqo (%f,%f)\n", q->primal[i],dist); epsilon_loqo=(q->ubv[i]-q->primal[i])*2.0; } else if((q->primal[i]>epsilon_loqo) && (dist > (1.0+svm_epsilon_crit))) { fprintf(stderr, "relaxing epsilon_loqo (%f,%f)\n", q->primal[i],dist); epsilon_loqo = q->primal[i]*2.0; } } for(i=0; i<n; i++) { /* clip alphas to bounds */ if(q->primal[i]<=epsilon_loqo) { //fprintf(stderr,"primal[i]=%f,eps=%f",q->primal[i],epsilon_loqo); q->primal[i] = 0; } else if(q->primal[i]>=q->ubv[i]-epsilon_loqo) { //fprintf(stderr,"primal[i]=%f,eps=%f",q->primal[i],epsilon_loqo); q->primal[i] = q->ubv[i]; } } obj1 = calculate_obj(q, q->primal, n); if (obj1 >= obj0) { q->digits += 2; fprintf(stderr,"objective function increased (from %f to %f)! Increasing precision (digits = %d)\n",obj0,obj1,q->digits); if (svm_verbosity > 0) { printV("Before: ", q->init_a, n, "\n"); printV("After: ", q->primal, n, "\n"); } npr_loqo_failures++; if (npr_loqo_failures > 200) { npr_loqo_failures=0; svm_epsilon_crit = svm_epsilon_crit * 1.5; /* give up at this prec., make cond. easier... */ fprintf(stderr,"Over 200 increases of the objective - increasing KKT slack to %f\n",svm_epsilon_crit); printf("Over 200 increases of the objective - increasing KKT slack to %f\n",svm_epsilon_crit); } } else if (svm_verbosity >2) { fprintf(stderr,"objective: %f --> %f\n", obj0, obj1); printV("After: ", q->primal, n, "\n"); } /* make sure to round results within epsilon of the bounds */ if (result == OPTIMAL_SOLUTION) { return SUCCESS; } else { fprintf(stderr,"optimal solution not found by pr_loqo"); return ERROR; }}void setup_solve_sub_qp(int *ws, int *y, double *a, bow_wv **docs, struct svm_qp *qd, int n, int *nsv) { int di; double qbn; int i,j,h,k; qd->ce0[0] = 0.0; /* compute the constant Sum{i of N}{A_i*y_i} in the constraint */ /* since this is an equality constraint that sums to 0, the sum of * the terms in the working set before optimization must be equal to * that after... therefore, simply summing over the working set is * just as good as explicitly summing over the bound set... */ for (i=0; i<n; i++) { if (a[ws[i]] > svm_epsilon_a) { qd->ce0[0] += y[ws[i]]*a[ws[i]]; } } /* compute things in B */ for (i=0; i<n; i++) { /* setup equality constraint (a_i*y_i) vector */ di = ws[i]; qd->ce[i] = y[di]; qbn = 0.0; for (h=j=k=0, qbn=0.0; h<(*nsv); j++) { /* if this is an sv */ if (a[j] > svm_epsilon_a) { /* remember we're ONLY adding those things in N, not b U n */ if (k < n) { if (ws[k] == j) { h++; k++; continue; } else { if (ws[k] < j) { /* same as above */ k++; j--; continue; } } } qbn += a[j]*y[j]*svm_kernel_cache(docs[j], docs[di]); h++; } } /* multiply that sum by the label of its cross-reference - this is Qbn * since the term -a_b also gets summed up - add them to qbn */ qd->g0[i] = -1 + y[di]*qbn; /* put together the "quadratic" terms - the BxB part */ for (j=i; j<n; j++) { qd->g[i*n + j] = y[di]*y[ws[j]]*svm_kernel_cache(docs[ws[j]], docs[di]); } } kcache_age(); /* init_a is kept in qd so that the B alphas that correspond to * the alphas in the primal are readily & easily available */ for(i=0; i<n; i++) { qd->init_a[i] = a[ws[i]]; } /* IMPORTANT - this is the only place that the number of support vectors * can change & they'll only change (arrive or leave) in the working set * (since those alpa in N cannot be modified) */ if (svm_verbosity > 3) { printf("calling solver with these variables...\nce0=%f\n",qd->ce0[0]); printV("init_a: ", qd->init_a, n, "\n"); printV("ce: ", qd->ce, n, "\n"); printV("g0: ", qd->g0, n, "\n"); printf("hessian:\n"); for (i=0; i<n; i++) { printV(" ", &(qd->g[i*n]), n, "\n"); } } /* this is a function so that other functions for other solvers may be written */ if (SUCCESS == solve_qp(qd, n)) { /* copy primal (the solution for the alphas to our alpha) */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -