📄 ebm.cpp
字号:
idx_copy(m, out->x); } else { // spatially replicated // loop over output units { idx_bloop4(m, in->x, double, outx, out->x, double, ed, expdist, double, sx, sumexp, double) { // first compute smallest element of m double mini = m.get(0, 0); { idx_bloop1(m1, m, double) { { idx_bloop1(m0, m1, double) { if (m0->get() < mini) mini = m0->get(); } } } } // now do log-add, and save exponentials double r = 0.0; double w = 1 / (si * sj); { idx_bloop2(m1, m, double, ed1, ed, double) { { idx_bloop2(m0, m1, double, ed0, ed1, double) { ed0.set(w * exp(mini - m0.get())); r += ed0.get(); } } } } sx.set(r); // put result in output outx->set(mini - log(r)); } } }}void logadd_layer::bprop(state_idx *in, state_idx *out) { intg si = in->dx.dim(1); intg sj = in->dx.dim(2); if ((si * sj) == 1) { // save time and precision if no replication Idx<double> indx(in->dx.select(2, 0)); Idx<double> m(indx.select(1, 0)); idx_copy(out->dx, m); } else { // spatially replicated // loop over output units { idx_bloop4(m, in->dx, double, o, out->dx, double, ed, expdist, double, sx, sumexp, double) { { idx_bloop2(m1, m, double, ed1, ed, double) { { idx_bloop2(m0, m1, double, ed0, ed1, double) { *m0 = ed0.get() * o->get() / sx->get(); } } } } } } }}void logadd_layer::bbprop(state_idx *in, state_idx *out) { { idx_bloop2(o, out->ddx, double, i, in->ddx, double) { idx_fill(*i, o->get()); } }}////////////////////////////////////////////////////////////////////////edist_cost::edist_cost(Idx<ubyte> *classes, intg ini, intg inj, Idx<double> *p) { intg imax = idx_max(*classes); intg imin = idx_min(*classes); if (imin < 0) ylerror("labels must be positive"); if (imax > 100000) printf("warning: [edist-cost] largest label is huuuge\n"); label2classindex = Idx<ubyte>(1 + imax); { idx_bloop1(v, label2classindex, ubyte) { v->set(0); } } for (intg i = 0; i < classes->dim(0); ++i) label2classindex.set(i, classes->get(i)); dist = new state_idx(1, ini, inj); logadder = new logadd_layer(1, ini, inj); logadded_dist = new state_idx(1); proto = p;}void edist_cost::fprop(state_idx *in, Idx<ubyte> *desired, state_idx *energy) { Idx<double> p(proto->select(0, label2classindex.get(desired->get()))); intg ini = in->x.dim(1); intg inj = in->x.dim(2); dist->resize(1, ini, inj); int tr[] = {1, 2, 0}; Idx<double> inx(in->x.transpose(tr)); Idx<double> distx(dist->x.select(0, 0)); // loop over spatial dimensions { idx_bloop2(inx1, inx, double, dx1, distx, double) { { idx_bloop2(inx0, inx1, double, dx0, dx1, double) { // distance between desired prototype and output // at current location idx_sqrdist(p, *inx0, *dx0); } } } } idx_dotc(distx, 0.5, distx); logadder->fprop(dist, logadded_dist); energy->x.set(logadded_dist->x.get(0));}void edist_cost::bprop(state_idx *in, Idx<ubyte> *desired, state_idx *energy) { Idx<double> p(proto->select(0, label2classindex.get(desired->get()))); // backprop through logadder logadded_dist->dx.set(energy->dx.get(), 0); logadder->bprop(dist, logadded_dist); // backprop through Euclidean distance int tr1[] = {1, 2, 0}; int tr2[] = {1, 2, 0}; Idx<double> tinx(in->x.transpose(tr1)); Idx<double> tindx(in->dx.transpose(tr2)); Idx<double> distdx(dist->dx.select(0, 0)); // loop over last two dimensions { idx_bloop3(linx, tinx, double, lindx, tindx, double, ldistdx, distdx, double) { { idx_bloop3(llinx, linx, double, llindx, lindx, double, lldistdx, ldistdx, double) { idx_sub(llinx, p, llindx); idx_dotc(llindx, lldistdx.get(), llindx); } } } }}// mse has this funny property that the bbprop method mixes up the// the first derivative after with the second derivative before, and// vice versa. Only the first combination is used here.void edist_cost::bbprop(state_idx *in, Idx<ubyte> *desired, state_idx *energy) { // don't bother bbproping through the logadder // we would ignore its output anyway idx_fill(in->ddx, energy->dx.get());}////////////////////////////////////////////////////////////////////////classifier_meter::classifier_meter() { this->clear();}int classifier_meter::correctp(ubyte co, ubyte cd) { // TODO-0: can co be negative?// if (co == -1)// return 0; if (co == cd) return 1; return -1;}void classifier_meter::clear() { total_correct = 0; total_error = 0; total_punt = 0; total_energy = 0; age = 0; size = 0;}void classifier_meter::resize (intg sz) { ylerror("not implemented");}char classifier_meter::update(intg a, class_state *co, ubyte cd, state_idx *en) { intg crrct = this->correctp(co->output_class, cd); age = a; energy = en->x.get(); confidence = co->confidence; total_energy += energy; if (crrct == 1) total_correct++; else if (crrct == 0) total_punt++; else if (crrct == -1) total_error++; size++; return crrct;}void classifier_meter::test(class_state *co, ubyte cd, state_idx *en) { intg crrct = this->correctp(co->output_class, cd); age = 0; energy = en->x.get(); confidence = co->confidence; total_energy = energy; total_correct = 0; total_punt = 0; total_error = 0; if (crrct == 1) total_correct = 1; else if (crrct == 0) total_punt = 1; else if (crrct == -1) total_error = 1; size = 1;}void classifier_meter::info() { /* (list age size (/ total-energy size) (/ (* 100 total-correct) size) (/ (* 100 total-error) size) (/ (* 100 total-punt) size))) */ err_not_implemented();}void classifier_meter::info_sprint() { err_not_implemented();}void classifier_meter::info_print() { err_not_implemented();}void classifier_meter::display() { printf("[%5d] size=%3d energy=%g correct=%3.2f%% errors=%3.2f%% rejects=%3.2f%%\n", (int) age, (int) size, total_energy / (double) size, (total_correct * 100) / (double) size, (total_error * 100) / (double) size, (total_punt * 100) / (double) size);}bool classifier_meter::save() { err_not_implemented(); return false;}bool classifier_meter::load() { err_not_implemented(); return false;}////////////////////////////////////////////////////////////////////////class_state::class_state(ubyte n) { sorted_classes = new Idx<ubyte>(n); sorted_scores = new Idx<float>(n);}class_state::~class_state() { delete sorted_classes; delete sorted_scores;}void class_state::resize(ubyte n) { sorted_classes->resize(n); sorted_scores->resize(n);}////////////////////////////////////////////////////////////////////////max_classer::max_classer(Idx<ubyte> *classes) { classindex2label = classes;}void max_classer::fprop(state_idx *in, class_state *out) { intg n = in->x.dim(0); out->resize(n); { idx_bloop2(sc, *(out->sorted_scores), float, insc, in->x, double) { sc.set(idx_max(insc)); } } idx_copy(*classindex2label, *(out->sorted_classes)); idx_sortdown(*(out->sorted_scores), *(out->sorted_classes)); out->output_class = out->sorted_classes->get(0); out->confidence = out->sorted_scores->get(0);}////////////////////////////////////////////////////////////////////////softmax::softmax(double b){ beta = b;}void softmax::resize_nsame(state_idx *in, state_idx *out, int n){ int nmax = in->x.order(); if(n==0||n>nmax) {ylerror("illegal type")} else{ switch(n){ case 1: out->resize(in->x.dim(0)); break; case 2: out->resize(in->x.dim(0), in->x.dim(1)); break; case 3: out->resize(in->x.dim(0), in->x.dim(1), in->x.dim(2)); break; case 4: out->resize(in->x.dim(0), in->x.dim(1), in->x.dim(2), in->x.dim(3)); break; case 5: out->resize(in->x.dim(0), in->x.dim(1), in->x.dim(2), in->x.dim(3), in->x.dim(4)); break; case 6: out->resize(in->x.dim(0), in->x.dim(1), in->x.dim(2), in->x.dim(3), in->x.dim(4), in->x.dim(5)); break; } }}void softmax::fprop( state_idx *in, state_idx *out){ int n=in->x.order(); if(n==0){ Idx<double> ib; ib.set(1); idx_copy(ib, out->x); } else { resize_nsame(in, out, n); if( n > 6) {ylerror("illegal type")} else{ Idx<double> pp(new Srg<double>(), in->x.spec); Idx<double> dot(new Srg<double>(), in->x.spec); double mm = idx_max(in->x); idx_addc(in->x, -mm, pp); idx_dotc(pp, beta, dot); double out_sum = 0.0; double d = idx_sum(dot, &out_sum); idx_dotc(dot, (double)(1/d), out->x); } }}void softmax::bprop( state_idx *in, state_idx *out){ int n = in->x.order(); if( n == 0) return; if( n > 6 ) { ylerror("illegal type")} else{ Idx<double> pp(new Srg<double>(), out->dx.spec); Idx<double> mul(new Srg<double>(), out->dx.spec); double dot = idx_dot(out->dx, out->x); idx_addc(out->dx, -dot, pp); idx_mul(out->x, pp, mul); idx_dotcacc(mul, beta, in->x); }}void softmax::bbprop( state_idx *in, state_idx *out){ int n = in->x.order(); if( n == 0) return; if( n > 6 ) { ylerror("illegal type")} else{ Idx<double> mul(new Srg<double>(), out->x.spec); Idx<double> dot(new Srg<double>(), out->x.spec); Idx<double> pp(new Srg<double>(), out->x.spec); Idx<double> mul2(new Srg<double>(), out->x.spec); Idx<double> pp2(new Srg<double>(), out->x.spec); Idx<double> mul3(new Srg<double>(), out->x.spec); idx_mul(out->x, out->x, mul); idx_dotc(out->x, (double)-2, dot); idx_addc(dot, (double)1, pp); idx_mul(pp, out->ddx, mul2); idx_addc(mul2, idx_dot(out->ddx, mul), pp2); idx_mul(mul, pp2, mul3); idx_dotcacc(mul3, beta*beta, in->ddx); }}////////////////////////////////////////////////////////////////////////void Jacobian_tester::test(module_1_1<state_idx, state_idx> *module){ int insize = 16; state_idx *in = new state_idx(insize, 1, 1); state_idx *out = new state_idx(insize, 1, 1); //init dseed(2); // 2 is chosen randomly... feel free to change it module->fprop(in, out); // used to resize the outputs { idx_bloop1( i, in->x, double) { idx_bloop1 (ii, i, double) { idx_bloop1( iii, ii, double) { iii.set(drand(2)); } } } } { idx_bloop1( o, out->x, double) { idx_bloop1 (oo, o, double) { idx_bloop1( ooo, oo, double) { ooo.set(drand(2)); } } } } // check the Jacobian int ndim_in = in->x.nelements(); int ndim_out = in->x.nelements(); Idx<double> jac_fprop(ndim_in, ndim_out); // used to store the jacobian calculated via bprop Idx<double> jac_bprop(ndim_in, ndim_out); // used to store the jacobian calculated via prturbations // creation of jac_fprop module->fprop(in, out); int cnt = 0; { idx_bloop1(o, out->x, double) { idx_bloop1(oo, o, double) { idx_bloop1(ooo, oo, double) { out->clear_dx(); in->clear_dx(); ooo.set(1); module->bprop(in, out); Idx<double> bla = jac_bprop.select(1, cnt); idx_copy(in->dx, bla); cnt++; } } } } // creation of jac_bprop cnt = 0; double small = pow(10.0, -6); state_idx *in1 = new state_idx(in->x.dim(0), in->x.dim(1), in->x.dim(2)); state_idx *in2 = new state_idx(in->x.dim(0), in->x.dim(1), in->x.dim(2)); state_idx *out1 = new state_idx( 1, 1, 1); state_idx *out2 = new state_idx( 1, 1, 1); for(int d1 = 0; d1 < in->x.dim(0); d1++){ for(int d2 = 0; d2 < in->x.dim(1); d2++){ for(int d3 = 0; d3 < in->x.dim(2); d3++){ idx_copy(in->x, in1->x); idx_copy(in->x, in2->x); in1->x.set(in1->x.get( d1, d2, d3) + small, d1, d2, d3); in2->x.set(in2->x.get( d1, d2, d3) - small, d1, d2, d3); module->fprop(in1, out1); module->fprop(in2, out2); Idx<double> sub(new Srg<double>(), out1->x.spec); Idx<double> dot(new Srg<double>(), out1->x.spec); idx_sub(out1->x, out2->x, sub); idx_dotc(sub, 0.5/small, dot); Idx<double> bla2 = jac_fprop.select(0, cnt); idx_copy(dot, bla2); cnt++; } } } // comparison printf("Jacobian error: %8.7e \n", idx_sqrdist(jac_fprop, jac_bprop));}////////////////////////////////////////////////////////////////////////void Bbprop_tester::test(module_1_1<state_idx, state_idx> *module){ int insize = 16; state_idx *in = new state_idx(insize, 1, 1); state_idx *out = new state_idx(insize, 1, 1); //init dseed(2); // 2 is chosen randomly... feel free to change it module->fprop(in, out); // used to resize the outputs { idx_bloop1( i, in->x, double) { idx_bloop1 (ii, i, double) { idx_bloop1( iii, ii, double) { iii.set(drand(2)); } } } } { idx_bloop1( o, out->x, double) { idx_bloop1 (oo, o, double) { idx_bloop1( ooo, oo, double) { ooo.set(drand(2)); } } } } module->fprop(in, out); module->bprop(in, out); module->bbprop(in, out); Idx<double> bbprop_p(in->x.dim(0), in->x.dim(1), in->x.dim(2)); // used to store the bbprop calculated via perturbation // creation of bbprop_p int cnt = 0; double small = pow(10.0, -6); state_idx *in1 = new state_idx(in->x.dim(0), in->x.dim(1), in->x.dim(2)); state_idx *in2 = new state_idx(in->x.dim(0), in->x.dim(1), in->x.dim(2)); state_idx *out1 = new state_idx( 1, 1, 1); state_idx *out2 = new state_idx( 1, 1, 1); for(int d1 = 0; d1 < in->x.dim(0); d1++){ for(int d2 = 0; d2 < in->x.dim(1); d2++){ for(int d3 = 0; d3 < in->x.dim(2); d3++){ idx_copy(in->x, in1->x); idx_copy(in->x, in2->x); in1->x.set(in1->x.get( d1, d2, d3) + small, d1, d2, d3); in2->x.set(in2->x.get( d1, d2, d3) - small, d1, d2, d3); module->fprop(in1, out1); module->fprop(in2, out2); // here we calculate a in aX²+bX+c as a model for the 3 points calculated via // fprop(...), fprop(...+small) and fprop(...-small). the second derivative is // then 2*a Idx<double> ad(new Srg<double>(), out1->x.spec); Idx<double> sub(new Srg<double>(), out1->x.spec); Idx<double> dot(new Srg<double>(), out1->x.spec); Idx<double> dot2(new Srg<double>(), out1->x.spec); idx_add(out1->x, out2->x, ad); idx_dotc(out->x, (double)2, dot); idx_sub(ad, dot, sub); idx_dotc(sub, 1/small, dot2); bbprop_p.set(dot2.get( d1, d2, d3), d1, d2, d3); cnt++; } } } // comparison printf("bbprop error: %8.7e \n", idx_sqrdist(in->ddx, bbprop_p));}////////////////////////////////////////////////////////////////////////void Bprop_tester::test(module_1_1<state_idx, state_idx> *module){ int insize = 16; state_idx *in = new state_idx(insize, 1, 1); state_idx *out = new state_idx(insize, 1, 1); //init dseed(2); // 2 is chosen randomly... feel free to change it module->fprop(in, out); // used to resize the outputs { idx_bloop1( i, in->x, double) { idx_bloop1 (ii, i, double) { idx_bloop1( iii, ii, double) { iii.set(drand(2)); } } } } { idx_bloop1( o, out->x, double) { idx_bloop1 (oo, o, double) { idx_bloop1( ooo, oo, double) { ooo.set(drand(2)); } } } } Idx<double> bprop_p(in->x.dim(0), in->x.dim(1), in->x.dim(2)); // used to store the bbprop calculated via perturbation // creation of bprop_p int cnt = 0; double small = pow(10.0, -6); state_idx *in1 = new state_idx(in->x.dim(0), in->x.dim(1), in->x.dim(2)); state_idx *in2 = new state_idx(in->x.dim(0), in->x.dim(1), in->x.dim(2)); state_idx *out1 = new state_idx( 1, 1, 1); state_idx *out2 = new state_idx( 1, 1, 1); for(int d1 = 0; d1 < in->x.dim(0); d1++){ for(int d2 = 0; d2 < in->x.dim(1); d2++){ for(int d3 = 0; d3 < in->x.dim(2); d3++){ idx_copy(in->x, in1->x); idx_copy(in->x, in2->x); in1->x.set(in1->x.get( d1, d2, d3) + small, d1, d2, d3); in2->x.set(in2->x.get( d1, d2, d3) - small, d1, d2, d3); module->fprop(in1, out1); module->fprop(in2, out2); Idx<double> sub(new Srg<double>(), out1->x.spec); Idx<double> dot(new Srg<double>(), out1->x.spec); idx_sub(out1->x, out2->x, sub); idx_dotc(sub, 0.5/small, dot); bprop_p.set(dot.get( d1, d2, d3), d1, d2, d3); cnt++; } } } printf("Bprop error : %8.7e \n", idx_sqrdist(in->dx, bprop_p));}} // end namespace ebl
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -