📄 perceptron.cpp
字号:
/** @file * $Id: perceptron.cpp 2542 2006-01-10 07:24:14Z ling $ */// "Fixed bias" means we still use the bias term, but do not change it.// This is to simulate a perceptron without the bias term. An alternative// is to modify the dset_distract (for RCD) and those update rules (for// other algorithms) to not add "1" to input vectors. Although this approach// is simpler (to implement), it causes extra troubles with saving/loading// or importing from SVM.#define PERCEPTRON_FIXED_BIAS 0#define STUMP_DOUBLE_DIRECTIONS 0#include <assert.h>#include <cmath>#include <numeric>#include "random.h"#include "stump.h"#include "perceptron.h"REGISTER_CREATOR(lemga::Perceptron);typedef std::vector<REAL> RVEC;typedef std::vector<RVEC> RMAT;#define DOTPROD(x,y) std::inner_product(x.begin(), x.end(), y.begin(), .0)// The version without bias#define DOTPROD_NB(x,y) std::inner_product(x.begin(),x.end()-1,y.begin(),.0)inline RVEC randvec (UINT n) { RVEC x(n); for (UINT i = 0; i < n; ++i) x[i] = 2*randu() - 1; return x;}inline RVEC coorvec (UINT n, UINT c) { RVEC x(n, 0); x[c] = 1; return x;}inline void normalize (RVEC& v, REAL thr = 0) { REAL s = DOTPROD(v, v); assert(s > 0); if (s <= thr) return; s = 1 / std::sqrt(s); for (RVEC::iterator p = v.begin(); p != v.end(); ++p) *p *= s;}RMAT randrot (UINT n, bool bias_row = false, bool conjugate = true) {#if PERCEPTRON_FIXED_BIAS bias_row = true;#endif RMAT m(n); if (bias_row) m.back() = coorvec(n, n-1); for (UINT i = 0; i+bias_row < n; ++i) { m[i] = randvec(n); if (bias_row) m[i][n-1] = 0; if (!conjugate) continue; // make m[i] independent for (UINT k = 0; k < i; ++k) { REAL t = DOTPROD(m[i], m[k]); for (UINT j = 0; j < n; ++j) m[i][j] -= t * m[k][j]; } // normalize m[i] normalize(m[i]); } return m;}// from the Numerical Recipes in Cbool Cholesky_decomp (RMAT& A, RVEC& p) { const UINT n = A.size(); assert(p.size() == n); for (UINT i = 0; i < n; ++i) { for (UINT j = i; j < n; ++j) { REAL sum = A[i][j]; for (UINT k = 0; k < i; ++k) sum -= A[i][k] * A[j][k]; if (i == j) { if (sum <= 0) return false; p[i] = std::sqrt(sum); } else A[j][i] = sum / p[i]; } } return true;}// from the Numerical Recipes in Cvoid Cholesky_linsol (const RMAT& A, const RVEC& p, const RVEC& b, RVEC& x) { const UINT n = A.size(); assert(p.size() == n && b.size() == n); x.resize(n); for (UINT i = 0; i < n; ++i) { REAL sum = b[i]; for (UINT k = 0; k < i; ++k) sum -= A[i][k] * x[k]; x[i] = sum / p[i]; } for (UINT i = n; i--;) { REAL sum = x[i]; for (UINT k = i+1; k < n; ++k) sum -= A[k][i] * x[k]; x[i] = sum / p[i]; }}namespace lemga {bool ldivide (RMAT& A, const RVEC& b, RVEC& x) { const UINT n = A.size(); assert(b.size() == n); RVEC p(n); if (!Cholesky_decomp(A, p)) return false; Cholesky_linsol(A, p, b, x); return true;}/// Update the weight @a wgt along the direction @a dir./// If necessary, the whole @a wgt will be negated.void update_wgt (RVEC& wgt, const RVEC& dir, const RMAT& X, const RVEC& y) { const UINT dim = wgt.size(); assert(dim == dir.size()); const UINT n_samples = X.size(); assert(n_samples == y.size()); RVEC xn(n_samples), yn(n_samples);#if STUMP_DOUBLE_DIRECTIONS REAL bias = 0;#endif for (UINT i = 0; i < n_samples; ++i) { assert(X[i].size() == dim); const REAL x_d = DOTPROD(X[i], dir); REAL x_new = 0, y_new = 0; if (x_d != 0) { y_new = (x_d > 0)? y[i] : -y[i]; REAL x_w = DOTPROD(X[i], wgt); x_new = x_w / x_d; }#if STUMP_DOUBLE_DIRECTIONS else bias += (DOTPROD(X[i], wgt) > 0)? y[i] : -y[i];#endif xn[i] = x_new; yn[i] = y_new; }#if STUMP_DOUBLE_DIRECTIONS REAL th1, th2; bool ir, pos; Stump::train_1d(xn, yn, bias, ir, pos, th1, th2); const REAL w_d_new = -(th1 + th2) / 2;#else const REAL w_d_new = - Stump::train_1d(xn, yn);#endif for (UINT j = 0; j < dim; ++j) wgt[j] += w_d_new * dir[j];#if STUMP_DOUBLE_DIRECTIONS if (!pos) for (UINT j = 0; j < dim; ++j) wgt[j] = -wgt[j];#endif // use a big threshold to reduce the # of normalization normalize(wgt, 1e10);}void dset_extract (const pDataSet& ptd, RMAT& X, RVEC& y) { UINT n = ptd->size(); X.resize(n); y.resize(n); for (UINT i = 0; i < n; ++i) { X[i] = ptd->x(i); X[i].push_back(1); y[i] = ptd->y(i)[0]; }}inline void dset_mult_wgt (const pDataWgt& ptw, RVEC& y) { UINT n = y.size(); assert(ptw->size() == n); for (UINT i = 0; i < n; ++i) y[i] *= (*ptw)[i];}Perceptron::Perceptron (UINT n_in) : LearnModel(n_in, 1), wgt(n_in+1,0), resample(0), train_method(RCD_BIAS), learn_rate(0.002), min_cst(0), max_run(1000), with_fld(false){}Perceptron::Perceptron (const SVM& s) : LearnModel(s), wgt(_n_in+1,0), resample(0), train_method(RCD_BIAS), learn_rate(0.002), min_cst(0), max_run(1000), with_fld(false){ assert(wgt.size() == _n_in+1); // unable to check that the SVM uses Linear kernel const UINT nsv = s.n_support_vectors(); for (UINT i = 0; i < nsv; ++i) { const Input sv = s.support_vector(i); const REAL coef = s.support_vector_coef(i); assert(sv.size() == _n_in); for (UINT k = 0; k < _n_in; ++k) wgt[k] += coef * sv[k]; } wgt.back() = s.bias();}bool Perceptron::serialize (std::ostream& os, ver_list& vl) const { SERIALIZE_PARENT(LearnModel, os, vl, 1); for (UINT i = 0; i <= _n_in; ++i) if (!(os << wgt[i] << ' ')) return false; return (os << '\n');}bool Perceptron::unserialize (std::istream& is, ver_list& vl, const id_t& d) { if (d != id() && d != empty_id) return false; UNSERIALIZE_PARENT(LearnModel, is, vl, 1, v); wgt.resize(_n_in+1); for (UINT i = 0; i <= _n_in; ++i) if (!(is >> wgt[i])) return false; return true;}void Perceptron::set_weight (const WEIGHT& w) { wgt = w; if (_n_in == 0) _n_in = w.size() - 1; assert(_n_in > 0 && _n_in+1 == wgt.size());}void Perceptron::initialize () { assert(_n_in > 0); wgt.resize(_n_in + 1); for (UINT i = 0; i+PERCEPTRON_FIXED_BIAS < wgt.size(); ++i) wgt[i] = randu() * 0.1 - 0.05; // small random numbers}Perceptron::WEIGHT Perceptron::fld () const { assert(ptd != 0 && ptw != 0); // get the mean Input m1(_n_in, 0), m2(_n_in, 0); REAL w1 = 0, w2 = 0; for (UINT i = 0; i < n_samples; ++i) { const Input& x = ptd->x(i); REAL y = ptd->y(i)[0]; REAL w = (*ptw)[i]; if (y > 0) { w1 += w; for (UINT k = 0; k < _n_in; ++k) m1[k] += w * x[k]; } else { w2 += w; for (UINT k = 0; k < _n_in; ++k) m2[k] += w * x[k]; } } assert(w1 > 0 && w2 > 0 && std::fabs(w1+w2-1) < EPSILON); for (UINT k = 0; k < _n_in; ++k) { m1[k] /= w1; m2[k] /= w2; } // get the covariance RMAT A(_n_in, RVEC(_n_in, 0)); RVEC diff(_n_in); for (UINT i = 0; i < n_samples; ++i) { const Input& x = ptd->x(i); REAL y = ptd->y(i)[0]; REAL w = (*ptw)[i]; if (y > 0) for (UINT k = 0; k < _n_in; ++k) diff[k] = x[k] - m1[k]; else for (UINT k = 0; k < _n_in; ++k) diff[k] = x[k] - m2[k]; // we only need the upper triangular part for (UINT j = 0; j < _n_in; ++j) for (UINT k = j; k < _n_in; ++k) A[j][k] += w * diff[j] * diff[k]; } for (UINT k = 0; k < _n_in; ++k) diff[k] = m1[k] - m2[k]; RVEC w; bool not_reg = true; while (!ldivide(A, diff, w)) { assert(not_reg); not_reg = false; // should only happen once REAL tr = 0; for (UINT j = 0; j < _n_in; ++j) tr += A[j][j]; const REAL gamma = 1e-10; // see [Friedman 1989] const REAL adj = gamma/(1-gamma) * tr/_n_in; for (UINT j = 0; j < _n_in; ++j) A[j][j] += adj;#if VERBOSE_OUTPUT std::cerr << "LDA: The class covariance matrix estimate is " "regularized by\n\tan eigenvalue shinkage parameter " << gamma << '\n';#endif } w.push_back(- (DOTPROD(m1,w) * w2 + DOTPROD(m2,w) * w1)); return w;}inline UINT randcdf (REAL r, const RVEC& cdf) { UINT b = 0, e = cdf.size()-1; while (b+1 < e) { UINT m = (b + e) / 2; if (r < cdf[m]) e = m; else b = m; } return b;}REAL Perceptron::train () { const UINT dim = _n_in+1; // but what is updated might be of a smaller size const UINT udim = dim - PERCEPTRON_FIXED_BIAS; assert(wgt.size() == dim); assert(ptd != NULL && ptw != NULL); assert(ptd->size() == n_samples); RVEC cdf; if (resample) { cdf.resize(n_samples+1); cdf[0] = 0; for (UINT i = 0; i < n_samples; ++i) cdf[i+1] = cdf[i] + (*ptw)[i]; assert(cdf[n_samples]-1 > -EPSILON && cdf[n_samples]-1 < EPSILON); }#if PERCEPTRON_FIXED_BIAS REAL bias_save = wgt.back();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -