📄 svm_smo.c
字号:
/* much like the build_svm algorithm's s(t) vector, error needs * to be updated every time we set some new alphas */ /* also finish update bup & blow */ { double *error = ms->error; int *items; int nitems; items = ms->I0.items; nitems = ms->I0.ilength; for (i=0; i<nitems; i++) { double a, b; a = svm_kernel_cache(docs[ex1],docs[items[i]]); b = svm_kernel_cache(docs[ex2],docs[items[i]]); error[items[i]] += diff1*a + diff2*b; } { int efrom; double e; /* compute the new bup & blow */ for (i=0, e=ms->bup; i<nitems; i++) { if (e > error[items[i]]) { e = error[items[i]]; efrom = items[i]; } } if (e != ms->bup) { ms->bup = e; ms->iup = efrom; } for (i=0, e=ms->blow; i<nitems; i++) { if (e < error[items[i]]) { e = error[items[i]]; efrom = items[i]; } } if (ms->blow != e) { ms->blow = e; ms->ilow = efrom; } } } kcache_age(); //printf("blow = %f(%d), bup = %f(%d)\n",ms->blow, ms->ilow, ms->bup, ms->iup); return 1;}/* this function is only called when all examples are being queried (ie. * the examine_all phase). */int opt_single(int ex2, struct svm_smo_model *ms) { double *error; int ndocs; double *weights; int *yvect; double a2; double e2; int y2; ms->n_single_tot ++; error = ms->error; ndocs = ms->ndocs; weights = ms->weights; yvect = ms->yvect; y2 = ms->yvect[ex2]; a2 = weights[ex2]; if (set_lookup(ex2, &(ms->I0))) { e2 = error[ex2]; } else { e2 = error[ex2] = smo_evaluate_error(ms, ex2); if (set_lookup(ex2, &(ms->I1)) || set_lookup(ex2, &(ms->I2))) { if (e2 < ms->bup) { ms->iup = ex2; ms->bup = e2; } } else if (!set_lookup(ex2, &(ms->I0))) { /* must be in I3 orI4 */ if (e2 > ms->blow) { ms->ilow = ex2; ms->blow = e2; } } } { int opt=1; int ex1; if (set_lookup(ex2, &(ms->I0)) || set_lookup(ex2, &(ms->I1)) || set_lookup(ex2, &(ms->I2))) { if (ms->blow-e2 > 2*svm_epsilon_crit) { opt = 0; ex1 = ms->ilow; } } if (set_lookup(ex2, &(ms->I0)) || set_lookup(ex2, &(ms->I3)) || set_lookup(ex2, &(ms->I4))) { if (e2-ms->bup > 2*svm_epsilon_crit) { opt = 0; ex1 = ms->iup; } } if (opt == 1) { kcache_age(); return 0; } /* if we get here, then opt was == 1 & ex1 was valid */ if (set_lookup(ex2, &(ms->I0))) { if (ms->blow > 2*e2 - ms->bup) { ex1 = ms->ilow; } else { ex1 = ms->iup; } } if (!set_lookup(ex1, &(ms->I0))) { /* not in the cache & it needs to be */ error[ex1] = smo_evaluate_error(ms, ex1); } kcache_age(); if (opt_pair(ex1, ex2, ms)) { ms->n_single_suc ++; return 1; } else { return 0; } }}int smo(bow_wv **docs, int *yvect, double *weights, double *a_b, double **W, int ndocs, double *error, float *cvect, int *nsv) { int changed; int inspect_all; struct svm_smo_model model; int nchanged; int num_words; double *original_weights; int i,j,k,n; num_words = bow_num_words(); m1 = m2 = m3 = m4 = 0; model.n_pair_suc = model.n_pair_tot = model.n_single_suc = model.n_single_tot = model.n_outer = 0; model.nsv = *nsv; model.docs = docs; model.error = error; model.ndocs = ndocs; model.cvect = cvect; original_weights = NULL; if (svm_kernel_type == 0 && !(*W)) { *W = model.W = (double *) malloc(sizeof(double)*num_words); } else { model.W = NULL; } model.weights = weights; model.yvect = yvect; /* figure out the # of positives */ for (i=j=k=n=0; i<ndocs; i++) { if (yvect[i] == 1) { k = i; j++; } else { n = i; } } /* k is set to the last positive example found, n is the last negative */ make_set(ndocs,ndocs,&(model.I0)); make_set(ndocs,j,&(model.I1)); make_set(ndocs,ndocs-j,&(model.I2)); make_set(ndocs,j,&(model.I3)); make_set(ndocs,ndocs-j,&(model.I4)); /* this is the code which initializes the sets according to the weights values */ for (i=0; i<ndocs; i++) { struct set *s; if (weights[i] > svm_epsilon_a && weights[i] < cvect[i] - svm_epsilon_a) { s = &(model.I0); } else if (yvect[i] == 1) { if (weights[i] < svm_epsilon_a) s = &(model.I1); else s = &(model.I3); } else { if (weights[i] < svm_epsilon_a) s = &(model.I4); else s = &(model.I2); } set_insert(i, s); } if (model.W) { for (i=0; i<num_words; i++) { model.W[i] = 0.0; } } if (model.I0.ilength == 0) { model.blow = 1; model.bup = -1; model.iup = k; model.ilow = n; error[k] = -1; error[n] = 1; } else { /* compute bup & blow */ int efrom, nitems; int *items; double e; nitems = model.I0.ilength; items = model.I0.items; for (i=0, e=-1*MAXDOUBLE; i<nitems; i++) { if (e < error[items[i]]) { e = error[items[i]]; efrom = items[i]; } } model.blow = e; model.ilow = efrom; for (i=0, e=MAXDOUBLE; i<nitems; i++) { if (e > error[items[i]]) { e = error[items[i]]; efrom = items[i]; } } model.bup = e; model.iup = efrom; if (model.W) { for (i=0; i<nitems; i++) { for (j=0; j<docs[items[i]]->num_entries; j++) { model.W[docs[items[i]]->entry[j].wi] += yvect[items[i]] * weights[items[i]] * docs[items[i]]->entry[j].weight; } } /* also need to include bound sv's (I2 & I3) */ for (k=0, nitems=model.I2.ilength, items=model.I2.items; k<2; k++, nitems=model.I3.ilength, items=model.I3.items) { for (i=0; i<nitems; i++) { for (j=0; j<docs[items[i]]->num_entries; j++) { model.W[docs[items[i]]->entry[j].wi] += yvect[items[i]] * weights[items[i]] * docs[items[i]]->entry[j].weight; } } } } } if (!model.W) { model.W = *W; } if (svm_weight_style == WEIGHTS_PER_MODEL) { kcache_init(ndocs); } inspect_all = 1; nchanged = 0; changed = 0; while (nchanged || inspect_all) { nchanged = 0; #ifdef DEBUG check_inv(&model,ndocs);#endif model.n_outer ++; PRINT_SMO_PROGRESS(stderr, &model); fflush(stderr); if (1 && inspect_all) { int ub = ndocs; i=j=random() % ndocs; for (k=0; k<2; k++,ub=j,i=0) { for (; i<ub; i++) { nchanged += opt_single(i, &model);#ifdef DEBUG check_inv(&model,ndocs);#endif } } inspect_all = 0; } else { /* greg's modification to keerthi, et al's modification 2 */ /* loop of optimizing all pairwise in a row with all elements * in I0 (just like above, but only those in I0) */ do { nchanged = 0; /* here's the continuous iup/ilow loop */ while (1) { if (!set_lookup(model.iup, &(model.I0))) { error[model.iup] = smo_evaluate_error(&model,model.iup); } if (!set_lookup(model.ilow, &(model.I0))) { error[model.ilow] = smo_evaluate_error(&model,model.ilow); } if (opt_pair(model.iup, model.ilow, &model)) {#ifdef DEBUG check_inv(&model,ndocs);#endif nchanged ++; } else { break; } if (model.bup > model.blow - 2*svm_epsilon_crit) break; } if (nchanged) { changed = 1; } nchanged = 0; /* now inspect all of the elements in I0 */ { int ub = ndocs; i=j=random() % ndocs; for (k=0; k<2; k++,ub=j,i=0) { for (; i<ub; i++) { if (set_lookup(i, &(model.I0))) { nchanged += opt_single(i, &model);#ifdef DEBUG check_inv(&model,ndocs);#endif } } } } } while (nchanged); /* of of the loop */ if (nchanged) { changed = 1; } inspect_all = 1; } /* note: both of the above blocks no when they are done so they flip inspect_all */ if (nchanged) { changed = 1; } } free_set(&model.I0); free_set(&model.I1); free_set(&model.I2); free_set(&model.I3); free_set(&model.I4); if (svm_weight_style == WEIGHTS_PER_MODEL) { kcache_clear(); } if (svm_verbosity > 3) fprintf(stderr,"\n"); //printf("bup=%f, blow=%f\n",model.bup,model.blow); *a_b = (model.bup + model.blow) / 2; if (svm_kernel_type == 0) { for (i=j=0; i<num_words; i++) { if (model.W[i] != 0.0) j++; } } //printf("m1: %d, m2: %d, m3: %d, m4: %d", m1,m2,m3,m4); *nsv = model.nsv; return (changed);}/* defunct fn. */inline double svm_smo_tval_to_err(double si, double b, int y) { return 0.0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -