📄 perceptron.cpp
字号:
#endif if (with_fld) wgt = fld();#if PERCEPTRON_FIXED_BIAS wgt.back() = bias_save;#endif RMAT X; RVEC Y; dset_extract(ptd, X, Y);#define RAND_IDX() (resample? randcdf(randu(),cdf) : UINT(randu()*n_samples))#define SAMPWGT(i) (resample? 1 : (*ptw)[i]*n_samples)#define GET_XYO(i) \ const Input& x = X[i]; \ const REAL y = Y[i]; \ const REAL o = DOTPROD(wgt,x) log_error(0); switch (train_method) { case PERCEPTRON: case ADALINE: for (UINT i = 0; i < max_run; ++i) { for (UINT j = 0; j < n_samples; ++j) { const UINT idx = RAND_IDX(); GET_XYO(idx); if (y * o > 0) continue; REAL deriv = (train_method == PERCEPTRON? y : (y - o)); REAL adj = learn_rate * SAMPWGT(idx) * deriv; for (UINT k = 0; k < udim; ++k) wgt[k] += adj * x[k]; } log_error(i+1); } break; case POCKET_RATCHET: case POCKET: { bool ratchet = (train_method == POCKET_RATCHET); RVEC best_w(wgt); REAL run = 0, err = train_c_error(); bool err_valid = true; REAL best_run = run, best_err = err; for (UINT i = 0; i < max_run; ++i) { for (UINT j = 0; j < n_samples; ++j) { const UINT idx = RAND_IDX(); GET_XYO(idx); if (y * o > 0) { run += SAMPWGT(idx); if (run > best_run) { if (!err_valid) err = train_c_error(); err_valid = true; if (!ratchet || err < best_err) { best_run = run; best_err = err; best_w = wgt; } if (err <= 0) break; } } else { run = 0; err_valid = false; const REAL adj = SAMPWGT(idx) * y; for (UINT k = 0; k < udim; ++k) wgt[k] += adj * x[k]; } } wgt.swap(best_w); log_error(i+1, best_err); wgt.swap(best_w); } wgt.swap(best_w); } break; case AVE_PERCEPTRON_RAND: case AVE_PERCEPTRON: { assert(train_method != AVE_PERCEPTRON || !resample); RVEC ave_wgt(dim, 0); REAL run = 0; for (UINT i = 0; i < max_run; ++i) { for (UINT j = 0; j < n_samples; ++j) { const UINT idx = (train_method == AVE_PERCEPTRON)? j : RAND_IDX(); GET_XYO(idx); if (y * o > 0) run += SAMPWGT(idx); else { for (UINT k = 0; k < dim; ++k) ave_wgt[k] += run * wgt[k]; const REAL adj = SAMPWGT(idx) * y; for (UINT k = 0; k < udim; ++k) wgt[k] += adj * x[k]; run = SAMPWGT(idx); } } RVEC tmp_wgt(ave_wgt); for (UINT k = 0; k < dim; ++k) tmp_wgt[k] += run * wgt[k]; wgt.swap(tmp_wgt); log_error(i+1); wgt.swap(tmp_wgt); } for (UINT k = 0; k < dim; ++k) wgt[k] = ave_wgt[k] + run * wgt[k]; } break; case ROMMA_AGG_RAND: case ROMMA_AGG: case ROMMA_RAND: case ROMMA: { bool fixed = (train_method == ROMMA || train_method == ROMMA_AGG); assert(!fixed || !resample); REAL bnd = (train_method == ROMMA || train_method == ROMMA_RAND)? 0 : (1-EPSILON); for (UINT i = 0; i < max_run; ++i) { for (UINT j = 0; j < n_samples; ++j) { const UINT idx = fixed? j : RAND_IDX(); GET_XYO(idx); const REAL& w_x = o; if (y * w_x > bnd) continue; REAL w_w = DOTPROD(wgt, wgt); REAL x_x = 1 + DOTPROD(x, x); REAL x2w2 = x_x * w_w; REAL deno = x2w2 - w_x*w_x; REAL c = (x2w2 - y*w_x) / deno; REAL d = w_w * (y - w_x) / deno; wgt[0] = c*wgt[0] + d; for (UINT k = 0; k < _n_in; ++k) wgt[k+1] = c*wgt[k+1] + d*x[k]; } log_error(i+1); } } break; case SGD_HINGE: case SGD_MLSE: { const REAL C = 0; // C is lambda for (UINT i = 0; i < max_run; ++i) { for (UINT j = 0; j < n_samples; ++j) { const UINT idx = RAND_IDX(); GET_XYO(idx); if (y*o < 1) { REAL shrink = 1 - C * learn_rate; REAL deriv = (train_method == SGD_HINGE? y : (y - o)); REAL adj = learn_rate * SAMPWGT(idx) * deriv; for (UINT k = 0; k < udim; ++k) wgt[k] = shrink * wgt[k] + adj * x[k]; } } log_error(i+1); } } break;#undef RAND_IDX#undef SAMPWGT#define CYCLE(r) (((r)+dim-1) % udim)#define UPDATE_WGT(d) update_wgt(wgt, d, X, Y) case COORDINATE_DESCENT: { dset_mult_wgt(ptw, Y); for (UINT r = 0; r < max_run; ++r) { UPDATE_WGT(coorvec(dim, CYCLE(r))); log_error(r+1); } } break; case FIXED_RCD: case FIXED_RCD_CONJ: case FIXED_RCD_BIAS: case FIXED_RCD_CONJ_BIAS: { bool bias_row = (train_method == FIXED_RCD_BIAS || train_method == FIXED_RCD_CONJ_BIAS); bool conjugate = (train_method == FIXED_RCD_CONJ || train_method == FIXED_RCD_CONJ_BIAS); RMAT A = randrot(dim, bias_row, conjugate); dset_mult_wgt(ptw, Y); for (UINT r = 0; r < max_run; ++r) { UPDATE_WGT(A[CYCLE(r)]); log_error(r+1); } } break; case RCD: case RCD_CONJ: case RCD_BIAS: case RCD_CONJ_BIAS: { bool bias_row = (train_method == RCD_BIAS || train_method == RCD_CONJ_BIAS); bool conjugate = (train_method == RCD_CONJ || train_method == RCD_CONJ_BIAS); RMAT A; dset_mult_wgt(ptw, Y); for (UINT r = 0; r < max_run; ++r) { const UINT c = CYCLE(r); if (c == CYCLE(0)) A = randrot(dim, bias_row, conjugate); UPDATE_WGT(A[c]); log_error(r+1); } } break; case RCD_GRAD_BATCH_RAND: case RCD_GRAD_BATCH: case RCD_GRAD_RAND: case RCD_GRAD: { bool online = (train_method == RCD_GRAD || train_method == RCD_GRAD_RAND); bool wrand = (train_method == RCD_GRAD_RAND || train_method == RCD_GRAD_BATCH_RAND); // gradient of sum weight*y*<w,x> over all unsatisfied examples dset_mult_wgt(ptw, Y); for (UINT r = 0; r < max_run; ++r) { RVEC dir(dim, 0); if (r % 5 == 0 && wrand) { dir = randvec(dim); } else if (online) { UINT idx, cnt = 0; REAL o; do { ++cnt; idx = UINT(randu() * n_samples); o = DOTPROD(wgt, X[idx]); } while (Y[idx] * o > 0 && cnt < 2*n_samples); // if we've tried too many times, just use any X dir = X[idx]; } else { bool no_err = true; for (UINT j = 0; j < n_samples; ++j) { GET_XYO(j); if (y * o > 0) continue; no_err = false; for (UINT k = 0; k < udim; ++k) dir[k] += y * x[k]; } if (no_err) break; }#if PERCEPTRON_FIXED_BIAS dir.back() = 0;#endif UPDATE_WGT(dir); log_error(r+1); } } break; case RCD_GRAD_MIXED_BATCH_INITRAND: case RCD_GRAD_MIXED_BATCH: case RCD_GRAD_MIXED_INITRAND: case RCD_GRAD_MIXED: { bool online = (train_method == RCD_GRAD_MIXED || train_method == RCD_GRAD_MIXED_INITRAND); bool init_rand = (train_method == RCD_GRAD_MIXED_INITRAND || train_method == RCD_GRAD_MIXED_BATCH_INITRAND); dset_mult_wgt(ptw, Y); for (UINT r = 0; r < max_run; ++r) { RVEC dir(dim, 0); if (init_rand) dir = randvec(dim); UINT cnt = 0; for (UINT j = 0; j < n_samples; ++j) { UINT idx = (online? UINT(randu() * n_samples) : j); GET_XYO(idx); if (y * o > 0) continue; ++cnt; REAL adj = y*n_samples * randu(); for (UINT k = 0; k < udim; ++k) dir[k] += adj * x[k]; } //if (cnt == 0 && !online) break; if (cnt == 0 && !init_rand) dir = randvec(dim);#if PERCEPTRON_FIXED_BIAS dir.back() = 0;#endif UPDATE_WGT(dir); log_error(r+1); } } break; case RCD_MIXED: { dset_mult_wgt(ptw, Y); RMAT A; for (UINT r = 0; r < max_run; ++r) { UINT c = r % (2*udim); RVEC dir; if (c < udim) dir = coorvec(dim, CYCLE(c)); else { if (c == udim) A = randrot(dim, false, false); dir = A[c-udim]; // CYCLE doesn't change anything } UPDATE_WGT(dir); log_error(r+1); } } break; default: assert(false); }#if PERCEPTRON_FIXED_BIAS assert(wgt.back() == bias_save || train_method == AVE_PERCEPTRON || train_method == AVE_PERCEPTRON_RAND);#endif return 0;}#define INPUT_SUM(w,x) \ std::inner_product(x.begin(), x.end(), w.begin(), w.back())Output Perceptron::operator() (const Input& x) const { assert(x.size() == n_input()); REAL sum = INPUT_SUM(wgt, x); return Output(1, (sum>=0)? 1 : -1);}REAL Perceptron::margin_of (const Input& x, const Output& y) const { assert(std::fabs(std::fabs(y[0]) - 1) < INFINITESIMAL); return INPUT_SUM(wgt, x) * y[0];}REAL Perceptron::w_norm () const { REAL s = DOTPROD_NB(wgt, wgt); return std::sqrt(s);}void Perceptron::log_error (UINT ite, REAL err) const { if (logf != NULL) { if (err < 0) err = train_c_error(); fprintf(logf, "%g ", err); }}} // namespace lemga
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -