⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 adaboost_ecoc.cpp

📁 C++编写的机器学习算法 Lemga is a C++ package which consists of classes for several learning models and gener
💻 CPP
字号:
/** @file *  $Id: adaboost_ecoc.cpp 2541 2006-01-10 06:02:27Z ling $ */#include <algorithm>#include <assert.h>#include <cmath>#include <map>#include "random.h"#include "utility.h"#include "adaboost_ecoc.h"REGISTER_CREATOR(lemga::AdaBoost_ECOC);namespace lemga {void AdaBoost_ECOC::reset_training () {    joint_wgt = JointWgt(n_class(), *ptw);    // to facilitate the normalization in update_training(),    // we explicitly set joint_wgt[y_i][i] to zero    for (UINT i = 0; i < n_samples; ++i)        joint_wgt[ex_class[i]][i] = 0;    cur_err.resize(n_samples);    assert(n_in_agg == 0);}typedef std::vector<std::vector<REAL> > WMAT;static REAL binary_cut (const WMAT& w, const ECOC_VECTOR& p) {    const UINT n = p.size();    assert(w.size() == n);    REAL c = 0;    for (UINT i = 0; i < n; ++i) {        assert(w[i].size() == n);        for (UINT j = 0; j < i; ++j) {            assert(w[i][j] == w[j][i]);            if (p[i] != p[j])                c += w[i][j];        }    }    return c;}// compute the edge weightWMAT AdaBoost_ECOC::confusion_matrix () const {    WMAT w(nclass, std::vector<REAL>(nclass, 0));    for (UINT j = 0; j < n_samples; ++j) {        int y = ex_class[j];        for (UINT c = 0; c < nclass; ++c)            w[y][c] += joint_wgt[c][j];    }    for (UINT c = 0; c < nclass; ++c)        for (UINT y = 0; y < c; ++y)            w[y][c] = w[c][y] = w[y][c] + w[c][y];    return w;}/// test all combinations to find out the max-cutECOC_VECTOR AdaBoost_ECOC::max_cut (UINT nr) const {    assert(nr == nclass);    if (nclass > 13)        std::cerr << "Warning: Max-cut is very slow for too many classes\n";    WMAT ewgt = confusion_matrix();    for (UINT c = 0; c < nclass; ++c)        assert(ewgt[c][c] == 0); // needed for the max-cut below    ECOC_VECTOR maxp, p(nclass, 0);    REAL maxc = 0, cut = 0;    size_t flip = 0;    while (gray_next(p, flip), flip != 0) { // p[0] will be fixed at 0        for (UINT c = 0; c < nclass; ++c)            if (p[c] == p[flip])                cut -= ewgt[c][flip];            else                cut += ewgt[c][flip];        if (maxc < cut) {            maxp = p; maxc = cut;        }    }    for (UINT c = 0; c < nclass; ++c)        if (maxp[c] == 0) maxp[c] = -1;    assert(std::fabs(maxc - binary_cut(ewgt, maxp)) < EPSILON);    return maxp;}/// nr (or nr+1) is the maximal # of classes that can be "colored"ECOC_VECTOR AdaBoost_ECOC::max_cut_greedy (UINT nr) const {    assert(nr <= nclass);    WMAT ewgt = confusion_matrix();    // we could use O(n^2 log(K)) to find the first K edges with largest    // weights. Here we just use O(n^2 log(n^2)).    typedef std::multimap<REAL,UINT> MMAP;    MMAP edge;    for (UINT j = 1; j < nclass; ++j)        for (UINT c = 0; c+j < nclass; ++c) {            UINT y = c + j;            edge.insert(std::pair<REAL,UINT>(-ewgt[y][c], c*nclass+y));        }    ECOC_VECTOR p(nclass, 0);    UINT pn = 0;    for (MMAP::iterator pe = edge.begin(); pn < nr && pe != edge.end(); ++pe) {        UINT k1 = pe->second / nclass, k2 = pe->second % nclass;        if (randu() < 0.5) std::swap(k1, k2); // not really useful        if (p[k1] == 0 && p[k2] == 0) {            p[k1] = 1; p[k2] = -1;            pn += 2;        } else if (p[k1] == 0) {            p[k1] = -p[k2];            ++pn;        } else if (p[k2] == 0) {            p[k2] = -p[k1];            ++pn;        }    }    return p;}ECOC_VECTOR AdaBoost_ECOC::random_half (UINT nr) const {    assert(nr <= nclass);    ECOC_VECTOR p(nclass, 0);    // If nr == n, setting p to be all 1's and then randomly    // flipping half of p should be the fastest approach.    std::vector<UINT> idx(nclass);    for (UINT i = 0; i < nclass; ++i)        idx[i] = i;    std::random_shuffle(idx.begin(), idx.end());    while (nr--)        p[idx[nr]] = (nr % 2)? 1 : -1;    return p;}bool AdaBoost_ECOC::ECOC_partition (UINT i, ECOC_VECTOR& p) {    if (MultiClass_ECOC::ECOC_partition(i, p)) return true;    const UINT n = n_class();    switch (par_method) {    case RANDOM_HALF:        p = random_half(n);        break;    case MAX_CUT:        p = max_cut(n);        break;    case MAX_CUT_GREEDY:        p = max_cut_greedy(n);        break;    default:        assert(false);    }    return true;}pDataWgt AdaBoost_ECOC::smpwgt_with_partition (const ECOC_VECTOR& p) {    DataWgt* btw = new DataWgt(n_samples);    REAL wsum = 0;    for (UINT i = 0; i < n_samples; ++i) {        int y = p[ex_class[i]];        assert(y == 1 || y == -1);        REAL w = 0;        for (UINT c = 0; c < n_class(); ++c)            if (p[c] + y == 0)                w += joint_wgt[c][i];        wsum += w; (*btw)[i] = w;    }    REAL r = 1 / wsum;    for (UINT i = 0; i < n_samples; ++i)        (*btw)[i] *= r;    return btw;}pLearnModel AdaBoost_ECOC::train_with_partition (ECOC_VECTOR& p) {    LearnModel *plm = lm_base->clone();    assert(plm != 0);    DataSet* btd = new DataSet();    for (UINT i = 0; i < n_samples; ++i)        btd->append(ptd->x(i), Output(1, p[ex_class[i]]));    cur_smpwgt = smpwgt_with_partition(p);    plm->set_train_data(btd, cur_smpwgt);    plm->train();    // To save some memory, we put back the original set and weight    plm->set_train_data(ptd, ptw);    for (UINT i = 0; i < n_samples; ++i)        cur_err[i] = (plm->get_output(i)[0] * p[ex_class[i]] <= 0);    return plm;}REAL AdaBoost_ECOC::assign_weight (const ECOC_VECTOR& p, const LearnModel& l) {    const DataWgt& sw = *cur_smpwgt;    REAL err = 0;    for (UINT i = 0; i < n_samples; ++i) {        if (cur_err[i])            err += sw[i];    }    if (err >= 0.5) return -1;    REAL beta;    if (err <= 0)        beta = 1000;    else        beta = 1/err - 1;    return std::log(beta) / 2;}void AdaBoost_ECOC::update_training (const ECOC_VECTOR& p) {    const UINT n = n_class();    const REAL beta = std::exp(lm_wgt[n_in_agg-1]);    const REAL ibeta = 1 / beta;    REAL wsum = 0;    for (UINT i = 0; i < n_samples; ++i) {        int y = p[ex_class[i]];        REAL r = (cur_err[i]? beta : ibeta);        for (UINT c = 0; c < n; ++c) {            if (p[c] != y)                joint_wgt[c][i] *= r;            wsum += joint_wgt[c][i];        }    }    REAL r = n / wsum;    if (r > 1e-5 && r < 1e5) return;    // normalize the joint_wgt    for (UINT i = 0; i < n_samples; ++i)        for (UINT c = 0; c < n; ++c)            joint_wgt[c][i] *= r;}} // namespace lemga

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -