📄 boost.c
字号:
/****************************************************************************** boost.c - boost ripper's hypotheses ******************************************************************************/#include <stdio.h>#include <math.h>#include "ripper.h"#include "boost.h"#include "mdb.h"/*****************************************************************************/static concept_kind_t weak_learner_hypkind(char *);static symbol_t *ith_weak_learner_classify(boosted_concept_t*,vec_t *,int);static symbol_t *last_weak_learner_classify(boosted_concept_t*,vec_t *);static BOOL add_weak_hyp(DATA *,boosted_concept_t *,double *);/* basic adaboost procedure -- test data is used only for traces */boosted_concept_t *boost_model(DATA *train_data,DATA *test_data){ ex_count_t tot_weight,tot_test_weight,new_tot,z; example_t *exi; int i,j,t; double epsilon, beta; double train_err,test_err; boosted_concept_t *hyp; hyp = newmem(1,boosted_concept_t); hyp->hyp_kind = weak_learner_hypkind(Weak_learner); hyp->weak_hyp = NULL; hyp->wt = new_vec(double); /* let tot_weight be total weight of examples */ tot_weight = 0; for (i=0; i<vmax(train_data); i++) { exi = vref(example_t,train_data,i); tot_weight += exi->wt; } /* now boost up to N_boost times */ for (t=0; t<N_boost; t++) { /* call weak learner, add a new hypothesis to hyp * return also train error epsilon and test error * and abort if epsilon>0.5 */ if (!add_weak_hyp(train_data,hyp,&epsilon)) { trace(SUMM) { printf("// aborting boost with epsilon = %.3f\n",epsilon); } break; } /* compute update factor for new hypothesis */ beta = epsilon / (1.0 - epsilon); /* print some info */ trace(SUMM) { printf("// weak hyp %d: train err = %.2f%%, beta=%.3f\n", t+1,epsilon,beta); } /* update the 'distribution' -- ie example weights */ new_tot = 0; for (i=0; i<vmax(train_data); i++) { exi = vref(example_t,train_data,i); if (last_weak_learner_classify(hyp,exi->inst) == exi->lab.nom) { exi->wt *= beta; } new_tot += exi->wt; } z = tot_weight/new_tot; for (i=0; i<vmax(train_data); i++) { exi = vref(example_t,train_data,i); exi->wt *= z; } trace(SUMM) { train_err = boost_error_rate(hyp,train_data); if (test_data && vmax(test_data) > 0) { test_err = boost_error_rate(hyp,test_data); } else { test_err = 0.0; } /* print a message */ printf("Boost %d: train+test errors = %.2f%% + %.2f%%\n\n", t+1,train_err*100,test_err*100); } } trace(SUMM) { if (t<N_boost && test_data && vmax(test_data) > 0) { test_err = boost_error_rate(hyp,test_data); printf("\nFinal boosted test error: %.3f%%\n",test_err*100); } } return hyp;}/* classify with boosted hypothesis */symbol_t *boost_classify(boosted_concept_t *hyp,vec_t *inst){ int k,clx,bestk; double w,bestw; static double *pred; if (pred==NULL) pred = newmem(vmax(Classes),double); /* compute prediction for instance */ for (k=0; k<vmax(Classes); k++) pred[k]=0.0; for (k=0; k<vmax(hyp->weak_hyp); k++) { clx = ith_weak_learner_classify(hyp,inst,k)->index; w = *vref(double,hyp->wt,k); pred[clx] += w; } bestk = -1; bestw = -1.0; for (k=0; k<vmax(Classes); k++) { if (pred[k] > bestw) { bestw = pred[k]; bestk = k; } } return vref(atom_t,Classes,bestk)->nom;}/* error rate of boosted hypothesis */double boost_error_rate(boosted_concept_t *c,DATA *data){ example_t *exp,*exmax; double err; double tot; err = tot = 0; exmax = vbase(example_t,data)+vmax(data); for (exp=vbase(example_t,data); exp<exmax; exp++) { tot += exp->wt; if (boost_classify(c,exp->inst)!=exp->lab.nom) err += exp->wt; } if (tot==0) return 0.0; else return err/tot;}/*************************************************************************** interface to weak learners and underlying boosted representation ***************************************************************************//* map string naming weak learner to its hypothesis */static concept_kind_t weak_learner_hypkind(char *weak){ if (streq(Weak_learner,"ripper")) { return RULESET; } else if (streq(Weak_learner,"findrule")) { return RULE; } else { fatal("unimplemented weak learner '%s'\n",Weak_learner); return 0; }}/* add a new weak hypothesis to a set of boosted hypotheses */static BOOL add_weak_hyp(DATA *data,boosted_concept_t *hyp,double *epsilon){ double w; int sz; if (streq(Weak_learner,"ripper")) { weak_ruleset_t *c; /* reduce level of tracing for the duration */ Trace_level--; c = model(data); Trace_level++; *epsilon = error_rate(c,data); if (*epsilon > 0.5) { return FALSE; } else { if (hyp->weak_hyp==NULL) { hyp->weak_hyp = new_vec(weak_ruleset_t); } ext_vec(weak_ruleset_t,hyp->weak_hyp,c); /* subtle point: after this first round, I assume that the class ordering is fixed; so prevent the learner from re-ordering the classes ever again */ Class_ordering = GIVEN; } } else if (streq(Weak_learner,"findrule")) { weak_rule_t *wr; wr = wrule_model(data); *epsilon = wrule_error_rate(wr,data); if (*epsilon > 0.5) { return FALSE; } else { if (hyp->weak_hyp==NULL) { hyp->weak_hyp = new_vec(weak_rule_t); } ext_vec(weak_rule_t,hyp->weak_hyp,wr); } } else { fatal("unimplemented weak learner '%s'\n",Weak_learner); } /* if we get here a new hyp has been added so add the assoc. weight */ w = Log2( (1.0 - *epsilon) / *epsilon ); ext_vec(double,hyp->wt,&w); return TRUE;}/* classify an example with the LAST learned weak hypothesis */static symbol_t *last_weak_learner_classify(boosted_concept_t *hyp,vec_t *inst){ int k; assert(hyp && hyp->weak_hyp && vmax(hyp->weak_hyp)>0); k = vmax(hyp->weak_hyp)-1; return ith_weak_learner_classify(hyp,inst,k);}/* classify an example with the ith weak hypothesis in a set */static symbol_t *ith_weak_learner_classify(boosted_concept_t *hyp,vec_t *inst,int k){ weak_ruleset_t *c; weak_rule_t *wr; assert(hyp && hyp->weak_hyp && vmax(hyp->weak_hyp)>k); switch (hyp->hyp_kind) { case RULESET: c = vref(weak_ruleset_t,hyp->weak_hyp,k); return classify(c,inst); break; case RULE: wr = vref(weak_rule_t,hyp->weak_hyp,k); return wrule_classify(wr,inst); break; default: fatal("ith_...: hypothesis kind %d unsupported",hyp->hyp_kind); return NULL; }}/* print a set of boosted concepts */void fprint_boost_concept(FILE *fp,boosted_concept_t *hyp){ int i; double w; weak_ruleset_t *c; weak_rule_t *wr; switch (hyp->hyp_kind) { case RULESET: fprintf(fp,"== Boosted rulesets:\n"); for (i=0; i<vmax(hyp->weak_hyp); i++) { c = vref(weak_ruleset_t,hyp->weak_hyp,i); w = *vref(double,hyp->wt,i); fprintf(fp,"\nWeight %.3f:\n",w); fprint_concept(fp,c); } fprintf(fp,"== End boosted rulesets:\n"); break; case RULE: fprintf(fp,"== Boosted rules:\n"); for (i=0; i<vmax(hyp->weak_hyp); i++) { wr = vref(weak_rule_t,hyp->weak_hyp,i); w = *vref(double,hyp->wt,i); fprintf(fp,"Weight %.3f: ",w); fprint_wrule(fp,wr); fprintf(fp,"\n"); } fprintf(fp,"== End boosted rules:\n"); break; default: fatal("print: hypothesis kind %d unsupported",hyp->hyp_kind); }}void fshow_boost_concept(FILE *fp,boosted_concept_t *hyp){ int i; double w; weak_ruleset_t *c; weak_rule_t *wr; switch (hyp->hyp_kind) { case RULESET: for (i=0; i<vmax(hyp->weak_hyp); i++) { c = vref(weak_ruleset_t,hyp->weak_hyp,i); w = *vref(double,hyp->wt,i); fprintf(fp,"%.g\n",w); fshow_concept(fp,c); } break; case RULE: for (i=0; i<vmax(hyp->weak_hyp); i++) { wr = vref(weak_rule_t,hyp->weak_hyp,i); w = *vref(double,hyp->wt,i); fprintf(fp,"%.g\n",w); fshow_wrule(fp,wr); } break; default: fatal("fshow: hypothesis kind %d unsupported",hyp->hyp_kind); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -