📄 svm_loqo.c

📁 卡内基梅隆大学MaCallum开发的文本分类系统
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* Copyright (C) 1999 Greg Schohn - gcs@jprc.com *//* ********************* svm_thorsten.c ********************** * Based on Thorsten Joachim's "Making large-Scale SVM  * Learning Practical"  * (http://www-ai.cs.uni-dortmund.de/DOKUMENTE/joachims_99a.ps.gz) *  * This version does not do shrinking.  This also is dependent * upon Alex Smola's pr_loqo solver (see README_SVM). */#include <bow/svm.h>#define INIT_SIGDIGIT 15    /* precision that pr_loqo will start with */#define LOOSE2LIVE    1000  /* # of iterations that pr_loqo should 			     * spin with a loose precision *//* should use the selection algorithm described on pg 44 of joachim's ch. 11 *//* returns the # of items placed into the ws vector, the elements are returned * in sorted order *//* s is the s(t) from 11.36, the gradient of a(t) *//* n must be multiple of 4 */int get_ws(int *ws, int *y, double *a, double *s, float *cvect, 	   int total, int old_n, int n, struct di *scratch) {  int npicked;  int nws;  int  *oldws;  char *picked;  double tmp;  int i,j,k;  npicked = 0;  oldws = alloca(sizeof(int)*n);  picked = alloca(sizeof(char)*total);  bzero(picked, sizeof(char)*total);  for (i=0; i<n; i++) {    oldws[i] = ws[i];  }  /* this fills in half - the half with the old ones... */  for (j=1; j>(-2); j-=2) { /* go thru twice, each time filling up n/4 elements */    for(i=0, nws=0; i<old_n; i++) {      /* only add those elements which satisfy 11.21 & 22 */      tmp = j*y[oldws[i]];      /* follow DIRECTLY from the logic in 11.3 of adv. kernel methods */      /* the d_i = y_i case happens first (where the elements with the LARGEST       * V(_) are chosen, then the d_i = -y_i are chosen next, where the SMALLEST       * are chosen */      if (((a[oldws[i]]>svm_epsilon_a) && (a[oldws[i]]<cvect[oldws[i]]-svm_epsilon_a)) 	  || ((a[oldws[i]]<=svm_epsilon_a) && (tmp>0))	  || ((a[oldws[i]]>=cvect[oldws[i]]-svm_epsilon_a) && (tmp<0))) {	/* use tmp instead of y[i] so that we can still pull things off of	 * the front of the list (like choosing a different sort fn) */	scratch[nws].d = tmp*(-1+y[oldws[i]]*s[oldws[i]]); /* look familiar? (g(a)) */	scratch[nws].i = oldws[i];	nws++;      }    }    /* this used  to be qsort, but nws can be extremely large */    get_top_n(scratch, nws, n);    /* k counts the number of things added */    for (i=k=0; (k<n/4) && (i<nws); i++) {      if (!picked[scratch[i].i]) {	ws[npicked] = scratch[i].i;	picked[scratch[i].i] = 1;	npicked++;	k++;      }    }  }  for (j=1; j>(-2); j-=2) { /* go thru twice, each time filling up n/4 elements */    for(i=0, nws=0; i<total; i++) {      /* only add those elements which satisfy 11.21 & 22 */      tmp = j*y[i];      /* follow DIRECTLY from the logic in 11.3 of adv. kernel methods */      /* the d_i = y_i case happens first (where the elements with the LARGEST       * V(_) are chosen, then the d_i = -y_i are chosen next, where the SMALLEST       * are chosen */      if (((a[i]>svm_epsilon_a) && (a[i]<cvect[i]-svm_epsilon_a)) 	  || ((a[i]<=svm_epsilon_a) && (tmp>0))	  || ((a[i]>=cvect[i]-svm_epsilon_a) && (tmp<0))) {	/* use tmp instead of y[i] so that we can still pull things off of	 * the front of the list (like choosing a different sort fn) */	scratch[nws].d = tmp*(-1+y[i]*s[i]); /* look familiar? (g(a)) */	scratch[nws].i = i;	nws++;      }    }    /* this used  to be qsort, but nws can be extremely large */    get_top_n(scratch, nws, n);    /* k counts the number of things added */    for (i=k=0; (k<n/4) && (i<nws); i++) {      if (!picked[scratch[i].i]) {	ws[npicked] = scratch[i].i;	picked[scratch[i].i] = 1;	npicked++;	k++;      }    }  }  if (npicked < n) {    n = npicked;  }  qsort(ws, n, sizeof(int), i_cmp);  if (svm_verbosity > 1) {     int ii;     fprintf(stderr,"working set: ");     for (ii=0; ii<n;ii++) {       fprintf(stderr,"%d ",ws[ii]);    }     fprintf(stderr,"\n");     fprintf(stderr,"s[ws[*]]: ");     for (ii=0; ii<n;ii++) {       fprintf(stderr,"%f ",s[ws[ii]]);    }     fprintf(stderr,"\n");   }  return n;}static double calculate_obj(struct svm_qp *q, double *a, int n) {  double obj;  int i, j;  obj = 0.0;  for (i=0; i<n; i++) {    /*      "linear part"   "quadratic" part across the diagonal */    obj += (q->g0[i]*a[i]) + (.5*a[i]*a[i]*q->g[i*n+i]);    /* since its sym. only go thru once for each ind. & mult by 2 (the .5 goes to 1) */    for (j=0; j<i; j++) {       obj += a[i]*a[j]*q->g[j*n+i];    }  }  return obj;}static int npr_loqo_failures=0;  /* counts the number of times the objective has increased *//* calls pr_loqo & does the best error checking that it can (ie. the check's * that svmlight does... */int solve_qp(struct svm_qp *q, int n) {  double dist;  double epsilon_loqo;  int    iter;  double margin;  int    result;  double obj0, obj1;  int i, j;  result = !OPTIMAL_SOLUTION;  /* calculate the objective value before loqo has a go at it */  obj0 = calculate_obj(q, q->init_a, n);  /* still don't understand the margin stuff - just copied from svmlight */  for (iter=q->init_iter, margin=q->margin; (margin<=.9999999) && (result != OPTIMAL_SOLUTION); ) {    /* note how m always == 1 & restart is always false */    result = pr_loqo(n, 1, q->g0, q->g, q->ce, q->ce0, q->lbv, q->ubv, q->primal, q->dual,		     svm_verbosity-4, (double) q->digits, iter, q->margin, q->bound, 0);    if (isnan(q->dual[0])) {      if (q->margin < .8) {	q->margin = (margin*4+1.0)/5.0;      }      margin = (margin+1)/2.0;      q->digits--;      //printf("invalid dual, Reducing precision of solver (digits = %d).\n", q->digits);    } else if (result != OPTIMAL_SOLUTION) { /* if there is some other problem */      iter += 2000; /* yaslh */      q->init_iter += 10;      q->digits--;      //printf(" (digits = %d).\n", q->digits);    }  }  /* svmlight does this & it doesn't seem like a bad idea */  epsilon_loqo=1E-10;  for(i=0; i<n; i++) {    dist=-q->dual[0]*q->ce[i];    dist+=(q->g0[i]+1.0);    for(j=0; j<i; j++) {      dist += (q->primal[j]*q->g[j*n+i]);    }    for(j=i; j<n; j++) {      dist += (q->primal[j]*q->g[i*n+j]);    }    if((q->primal[i]<(q->ubv[i]-epsilon_loqo)) && (dist < (1.0-svm_epsilon_crit))) {      fprintf(stderr, "relaxing epsilon_loqo (%f,%f)\n", q->primal[i],dist);      epsilon_loqo=(q->ubv[i]-q->primal[i])*2.0;    } else if((q->primal[i]>epsilon_loqo) && (dist > (1.0+svm_epsilon_crit))) {      fprintf(stderr, "relaxing epsilon_loqo (%f,%f)\n", q->primal[i],dist);      epsilon_loqo = q->primal[i]*2.0;    }  }  for(i=0; i<n; i++) {  /* clip alphas to bounds */    if(q->primal[i]<=epsilon_loqo) {      //fprintf(stderr,"primal[i]=%f,eps=%f",q->primal[i],epsilon_loqo);      q->primal[i] = 0;    } else if(q->primal[i]>=q->ubv[i]-epsilon_loqo) {      //fprintf(stderr,"primal[i]=%f,eps=%f",q->primal[i],epsilon_loqo);      q->primal[i] = q->ubv[i];    }  }  obj1 = calculate_obj(q, q->primal, n);  if (obj1 >= obj0) {    q->digits += 2;    fprintf(stderr,"objective function increased (from %f to %f)! Increasing precision (digits = %d)\n",obj0,obj1,q->digits);    if (svm_verbosity > 0) {      printV("Before: ", q->init_a, n, "\n");      printV("After:  ", q->primal, n, "\n");    }    npr_loqo_failures++;    if (npr_loqo_failures > 200) {      npr_loqo_failures=0;      svm_epsilon_crit = svm_epsilon_crit * 1.5; /* give up at this prec., make cond. easier... */      fprintf(stderr,"Over 200 increases of the objective - increasing KKT slack to %f\n",svm_epsilon_crit);      printf("Over 200 increases of the objective - increasing KKT slack to %f\n",svm_epsilon_crit);    }  } else if (svm_verbosity >2) {    fprintf(stderr,"objective: %f --> %f\n", obj0, obj1);    printV("After:  ", q->primal, n, "\n");  }  /* make sure to round results within epsilon of the bounds */  if (result == OPTIMAL_SOLUTION) {    return SUCCESS;  } else {    fprintf(stderr,"optimal solution not found by pr_loqo");    return ERROR;  }}void setup_solve_sub_qp(int *ws, int *y, double *a, bow_wv **docs, struct svm_qp *qd, int n, int *nsv) {  int di;  double qbn;  int i,j,h,k;  qd->ce0[0] = 0.0;  /* compute the constant Sum{i of N}{A_i*y_i} in the constraint */  /* since this is an equality constraint that sums to 0, the sum of   * the terms in the working set before optimization must be equal to    * that after...  therefore, simply summing over the working set is   * just as good as explicitly summing over the bound set... */  for (i=0; i<n; i++) {    if (a[ws[i]] > svm_epsilon_a) {      qd->ce0[0] += y[ws[i]]*a[ws[i]];    }  }  /* compute things in B */  for (i=0; i<n; i++) {    /* setup equality constraint (a_i*y_i) vector */    di = ws[i];    qd->ce[i] = y[di];    qbn = 0.0;    for (h=j=k=0, qbn=0.0; h<(*nsv); j++) {      /* if this is an sv */      if (a[j] > svm_epsilon_a) {	/* remember we're ONLY adding those things in N, not b U n */	if (k < n) {	  if (ws[k] == j) {	    h++;	    k++;	    continue;	  } else {	    if (ws[k] < j) {  /* same as above */	      k++;	      j--;	      continue;	    }	  }	}	qbn += a[j]*y[j]*svm_kernel_cache(docs[j], docs[di]);	h++;      }    }    /* multiply that sum by the label of its cross-reference - this is Qbn     * since the term -a_b also gets summed up - add them to qbn */    qd->g0[i] = -1 + y[di]*qbn;    /* put together the "quadratic" terms - the BxB part */    for (j=i; j<n; j++) {      qd->g[i*n + j] = y[di]*y[ws[j]]*svm_kernel_cache(docs[ws[j]], docs[di]);    }  }  kcache_age();  /* init_a is kept in qd so that the B alphas that correspond to    * the alphas in the primal are readily & easily available */  for(i=0; i<n; i++) {    qd->init_a[i] = a[ws[i]];  }  /* IMPORTANT - this is the only place that the number of support vectors   * can change & they'll only change (arrive or leave) in the working set   * (since those alpa in N cannot be modified) */  if (svm_verbosity > 3) {    printf("calling solver with these variables...\nce0=%f\n",qd->ce0[0]);    printV("init_a: ", qd->init_a, n, "\n");    printV("ce:     ", qd->ce, n, "\n");    printV("g0:     ", qd->g0, n, "\n");      printf("hessian:\n");    for (i=0; i<n; i++) {      printV("     ", &(qd->g[i*n]), n, "\n");    }  }    /* this is a function so that other functions for other solvers may be written */  if (SUCCESS == solve_qp(qd, n)) {    /* copy primal (the solution for the alphas to our alpha) */
12 下一页
💿 文件大小 618 K
👤 上传用户 hqbbsw
📂 所属分类多国语言处理
🏷️ 相关标签

#MaCallum #大学 #文本分类
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -