📄 boosting.cpp
字号:
/** @file * $Id: boosting.cpp 2538 2006-01-08 10:01:17Z ling $ */#include <assert.h>#include "vectorop.h"#include "optimize.h"#include "boosting.h"REGISTER_CREATOR(lemga::Boosting);#define _cost(F,y) cost_functor.cost(F[0],y[0])#define _cost_deriv(F,y) cost_functor.deriv1(F[0],y[0])namespace lemga {/** @copydoc LearnModel(UINT,UINT) * @param cvx \c true if convex combination is used; \c false if * linear combination is used */Boosting::Boosting (bool cvx, const cost::Cost& c) : Aggregating(), convex(cvx), grad_desc_view(false), min_cst(0), min_err(-1), cost_functor(c){ /* empty */ }bool Boosting::serialize (std::ostream& os, ver_list& vl) const { SERIALIZE_PARENT(Aggregating, os, vl, 1); assert(lm_wgt.size() == lm.size()); for (UINT i = 0; i < lm_wgt.size(); ++i) os << lm_wgt[i] << ' '; if (!lm_wgt.empty()) os << '\n'; return (os << convex << '\n');}bool Boosting::unserialize (std::istream& is, ver_list& vl, const id_t& d) { if (d != id() && d != empty_id) return false; UNSERIALIZE_PARENT(Aggregating, is, vl, 1, v); const UINT n = lm.size(); lm_wgt.resize(n); for (UINT i = 0; i < n; ++i) if (!(is >> lm_wgt[i])) return false; UINT c; if (!(is >> c)) { if (v != 0) return false; convex = false; // some old version: no convex } else if (c > 1) return false; convex = c; return true;}void Boosting::initialize () { Aggregating::initialize(); lm_wgt.clear();#if BOOSTING_OUTPUT_CACHE clear_cache();#endif}REAL Boosting::margin_norm () const { return convex? 1 : model_weight_sum();}REAL Boosting::margin_of (const Input& x, const Output& y) const { if (n_in_agg == 0) return 0; assert(std::fabs(std::fabs(y[0])-1) < INFINITESIMAL); return (*this)(x)[0] * y[0];}REAL Boosting::margin (UINT i) const { if (n_in_agg == 0) return 0; REAL y = ptd->y(i)[0]; assert(std::fabs(std::fabs(y)-1) < INFINITESIMAL); return get_output(i)[0] * y;}Output Boosting::operator() (const Input& x) const { assert(n_in_agg <= lm.size() && n_in_agg <= lm_wgt.size());#ifndef NDEBUG for (UINT i = 0; i < n_in_agg; ++i) assert(lm_wgt[i] >= 0);#endif Output y(_n_out, 0); for (UINT i = 0; i < n_in_agg; ++i) { assert(lm[i] != 0); Output out = (*lm[i])(x); for (UINT j = 0; j < _n_out; ++j) y[j] += (out[j] > 0)? lm_wgt[i] : -lm_wgt[i]; } if (convex && n_in_agg > 0) { using namespace op; y *= 1 / model_weight_sum(); } return y;}Output Boosting::get_output (UINT idx) const { assert(ptw != NULL); // no data sampling#if BOOSTING_OUTPUT_CACHE if (cache_n[idx] > n_in_agg) clear_cache(idx); Output& y = cache_y[idx]; UINT start = cache_n[idx]; cache_n[idx] = n_in_agg;#else Output y(_n_out, 0); UINT start = 0;#endif for (UINT i = start; i < n_in_agg; ++i) { assert(lm[i] != 0); Output out = lm[i]->get_output(idx); for (UINT j = 0; j < _n_out; ++j) y[j] += (out[j] > 0)? lm_wgt[i] : -lm_wgt[i]; } if (convex && n_in_agg > 0) { Output y2 = y; using namespace op; y2 *= 1 / model_weight_sum(); return y2; } return y;}#if BOOSTING_OUTPUT_CACHEvoid Boosting::set_train_data (const pDataSet& pd, const pDataWgt& pw) { Aggregating::set_train_data(pd, pw); clear_cache();}#endifREAL Boosting::train () { assert(n_in_agg == 0 && empty()); assert(ptd != 0 && ptw != 0); assert(lm_base != 0); // we need lm_base to create new hypotheses if (grad_desc_view) return train_gd(); pDataWgt sample_wgt = ptw; for (UINT i = 0; i < max_n_model; ++i) { const pLearnModel p = train_with_smpwgt(sample_wgt); // update sample_wgt, set up hypothesis wgt (lm_wgt) const REAL w = assign_weight(*sample_wgt, *p); if (w <= 0) break; lm.push_back(p); lm_wgt.push_back(w); n_in_agg++; if (min_cst > 0 && cost() < min_cst) break; if (min_err >= 0) { REAL err = 0; for (UINT j = 0; j < n_samples; ++j) err += (*ptw)[j] * (get_output(j)[0]*ptd->y(j)[0] <= 0); if (err <= min_err) break; } sample_wgt = update_smpwgt(*sample_wgt, *p); } return 0;}REAL Boosting::train_gd () { _boost_gd bgd(this); iterative_optimize(_line_search<_boost_gd,BoostWgt,REAL,REAL> (&bgd, convex? 1.0 : 0.5)); return cost();}pLearnModel Boosting::train_with_smpwgt (const pDataWgt& sw) const {#if VERBOSE_OUTPUT std::cout << "=== " << id() << " [" << (convex? "convex" : "linear") << "] #" << n_in_agg+1 << " / " << max_n_model << " ===\n";#endif LearnModel *plm = lm_base->clone(); assert(plm != 0); plm->set_train_data(ptd, sw); plm->train(); return plm;}REAL Boosting::convex_weight (const DataWgt&, const LearnModel&) { OBJ_FUNC_UNDEFINED("convex_weight");}REAL Boosting::linear_weight (const DataWgt&, const LearnModel&) { OBJ_FUNC_UNDEFINED("linear_weight");}void Boosting::convex_smpwgt (DataWgt&) { OBJ_FUNC_UNDEFINED("convex_smpwgt");}void Boosting::linear_smpwgt (DataWgt&) { OBJ_FUNC_UNDEFINED("linear_smpwgt");}REAL Boosting::cost () const { if (n_in_agg == 0 && convex) return INFINITY; /* GCC 2.96 doesn't have header file <limits>; otherwise * return std::numeric_limits<REAL>::max(); * should be used */ assert(ptd != 0 && ptw != 0); REAL cst = 0; for (UINT i = 0; i < n_samples; ++i) { REAL c = _cost(get_output(i), ptd->y(i)); cst += c * (*ptw)[i]; } return cst;}/** Compute weight (probability) vector according to * @f[ D_i \propto -\frac{w_i}{y_i} c'_F (F(x_i), y_i) @f] * @sa #cost_deriv_functor */pDataWgt Boosting::sample_weight () const { assert(ptd != 0 && ptw != 0); if (n_in_agg == 0) return ptw; DataWgt* pdw = new DataWgt(n_samples); REAL sum = 0; for (UINT i = 0; i < n_samples; ++i) { REAL yi = ptd->y(i)[0]; REAL p = - (*ptw)[i] / yi * _cost_deriv(get_output(i), ptd->y(i)); assert(p >= 0); (*pdw)[i] = p; sum += p; } assert(sum > 0); const REAL k = 1 / sum; for (UINT i = 0; i < n_samples; ++i) (*pdw)[i] *= k; return pdw;}Boosting::BoostWgt& Boosting::BoostWgt::operator+= (const BoostWgt& bw) { const UINT ts = size(); assert(ts+1 == bw.size()); for (UINT i = 0; i < ts; ++i) { assert(lm[i] == bw.lm[i]); lm_wgt[i] += bw.lm_wgt[i]; } lm.push_back(bw.lm[ts]); lm_wgt.push_back(bw.lm_wgt[ts]); return *this;}Boosting::BoostWgt Boosting::BoostWgt::operator- () const { using namespace op; return BoostWgt(lm, -lm_wgt);}Boosting::BoostWgt& Boosting::BoostWgt::operator*= (REAL r) { using namespace op; lm_wgt *= r; return *this;}} // namespace lemga
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -