⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 stump.cpp

📁 C++编写的机器学习算法 Lemga is a C++ package which consists of classes for several learning models and gener
💻 CPP
字号:
/** @file *  $Id: stump.cpp 2542 2006-01-10 07:24:14Z ling $ */#include <assert.h>#include <cmath>#include <algorithm>#include <vector>#include <map>#include "stump.h"REGISTER_CREATOR(lemga::Stump);namespace lemga {bool Stump::serialize (std::ostream& os, ver_list& vl) const {    SERIALIZE_PARENT(LearnModel, os, vl, 2);    return (os << idx << ' ' << bd1 << ' ' << bd2 << ' '               << (dir? 'P':'N') << '\n');}bool Stump::unserialize (std::istream& is, ver_list& vl, const id_t& d) {    if (d != id() && d != empty_id) return false;    UNSERIALIZE_PARENT(LearnModel, is, vl, 2, v);    if (v == 0) { /* Take care of _n_in and _n_out */        if (!(is >> _n_in)) return false;        assert(_n_out == 1);    }    /* common part for ver 0, 1, and 2 */    if (!(is >> idx >> bd1)) return false;    bd2 = bd1;    if (v >= 2)        if (!(is >> bd2) || bd1 > bd2) return false;    char c;    if (!(is >> c) || (c != 'P' && c != 'N')) return false;    dir = (c == 'P');    return true;}typedef std::map<REAL,REAL>::iterator MI;REAL Stump::train_1d (const std::vector<REAL>& x, const std::vector<REAL>& yw){    const UINT N = x.size();    assert(yw.size() == N);    // combine examples with same input, and sort them    std::map<REAL,REAL> xy;    for (UINT i = 0; i < N; ++i)        xy[x[i]] += yw[i];    UINT n = xy.size();    xy[xy.begin()->first-2] = 1;    // insert a "very small" x    assert(xy.size() == n+1);    for (MI p, pn = xy.begin(); p = pn++, p != xy.end();)        if (p->second > -INFINITESIMAL && p->second < INFINITESIMAL)            xy.erase(p);    n = xy.size();    xy[xy.rbegin()->first+2] = 1;   // a "very large" x    assert(n > 0 && xy.size() == n+1);    const MI xyb = xy.begin();    REAL minthr = 0, mine = INFINITY, e = - xyb->second;    REAL cur_x = xyb->first;    for (MI p = xyb; n; --n) {        e += p->second;        assert(p != xyb || e == 0);        REAL nxt_x = (++p)->first;        REAL cur_thr = (cur_x + nxt_x) / 2;        cur_x = nxt_x;        if (e < mine) {            mine = e; minthr = cur_thr;        }    }    return minthr;}/* yw_inf is yw[infinity] */REAL Stump::train_1d (const std::vector<REAL>& x, const std::vector<REAL>& yw,                      REAL yw_inf, bool& ir, bool& mind, REAL& th1, REAL& th2){    const UINT N = x.size();    assert(yw.size() == N);    // combine examples with same input, and sort them    std::map<REAL,REAL> xy;    for (UINT i = 0; i < N; ++i)        xy[x[i]] += yw[i];#ifndef NDEBUG    UINT n = xy.size();#endif    xy[xy.begin()->first-2] = 1;    // insert a "very small" x    assert(xy.size() == n+1);    for (MI p, pn = xy.begin(); p = pn++, p != xy.end();)        if (p->second > -INFINITESIMAL && p->second < INFINITESIMAL)            xy.erase(p);#ifndef NDEBUG    // check whether a constant function is enough    bool all_the_same = true;    MI pb = xy.begin(); ++pb;    for (MI p = pb; p != xy.end(); ++p)        if (p->second < -INFINITESIMAL) { all_the_same = false; break; }    if (!all_the_same) {        all_the_same = true;        for (MI p = pb; p != xy.end(); ++p)            if (p->second > INFINITESIMAL) { all_the_same = false; break; }    }    if (all_the_same)        std::cerr << "Stump: Warning: all y's are the same.\n";#endif    const MI xyb = xy.begin(), xye = xy.end();    REAL mine = 0, maxe = 0, e = -1;    MI mint, maxt, p;    for (mint = maxt = p = xyb; p != xye; ++p) {        e += p->second;        assert(p != xyb || e == 0);        if (e < mine) {            mine = e; mint = p;        } else if (e > maxe) {            maxe = e; maxt = p;        }        // we prefer middle indices        if (mint == xyb && e == 0) mint = p;        if (maxt == xyb && e == 0) maxt = p;    }    e = (1-e-yw_inf) / 2;// error of y = sgn(x > -Inf)    mine += e;           // starting with y = -1    maxe = 1 - (maxe+e); // starting with y = 1    // unify the solution to mind, mine, and mint    if (std::fabs(mine - maxe) < EPSILON) {        MI nxtt = mint; ++nxtt;        mind = (mint != xyb && nxtt != xye);    }    else mind = (mine < maxe);    if (!mind) {        mine = maxe; mint = maxt;    }    MI nxtt = mint; ++nxtt;    ir = (mint != xyb && nxtt != xye);    assert(!ir || !all_the_same);    th1 = mint->first;    if (ir)        th2 = nxtt->first;    else        th2 = th1 + 2;    return mine;}/* Find the optimal dimension and threshold */REAL Stump::train () {    const UINT N = n_samples;    assert(ptd != 0 && ptw != 0 && ptd->size() == N);    assert(n_input() > 0 && n_output() == 1);    // weight the examples    std::vector<REAL> yw(N);    for (UINT i = 0; i < N; ++i)        yw[i] = ptd->y(i)[0] * (*ptw)[i];    REAL minerr = 2;    bool minir = true;    std::vector<REAL> x(N);    for (UINT d = 0; d < _n_in; ++d) {        for (UINT i = 0; i < N; ++i)            x[i] = ptd->x(i)[d];        bool mind, ir;        REAL th1, th2;        REAL mine = train_1d(x, yw, 0, ir, mind, th1, th2);        if (mine < minerr) {            minerr = mine;            minir = ir;            dir = mind; idx = d;            bd1 = th1; bd2 = th2;        }    }    if (!minir)        std::cerr << "Stump: Warning: threshold out of range.\n";    return minerr;}Output Stump::operator() (const Input& x) const {    assert(idx < n_input() && x.size() == n_input());    assert(bd2 >= bd1);    REAL y = x[idx]*2 - (bd1 + bd2);    if (hard || bd1 == bd2)        y = (y < 0)? -1 : 1;    else {        y /= bd2 - bd1;        y = (y<-1)? -1 : (y>1)? 1 : y;    }    return Output(1, dir? y : -y);}} // namespace lemga

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -