📄 aggrank.cpp

📁 The program implements three large-margin thresholded ensemble algorithms for ordinal regression. I
💻 CPP
字号:
/**   aggrank.cpp: an abstract class for general thresholded ensembles   (c) 2006-2007 Hsuan-Tien Lin**/#include <assert.h>#include <cmath>#include <map>#include "aggrank.h"namespace lemga {    /** basics **/    bool AggRank::serialize (std::ostream& os, ver_list& vl) const {	SERIALIZE_PARENT(Aggregating, os, vl, 1);    	assert(lm_wgt.size() == lm.size());	for (UINT i = 0; i < lm_wgt.size(); ++i)	    os << lm_wgt[i] << ' ';	if (!lm_wgt.empty()) os << '\n';	os << n_rank << '\n';	UINT t=0;	for (UINT i = 0; i <= lm.size(); i++){	    for (UINT k = 1; k < n_rank; k++)		os << thres[t++] << ' ';	    os << '\n';	}	if (t > 0) os << '\n';	return true;    }    bool AggRank::unserialize (std::istream& is, ver_list& vl, const id_t& d) {	if (d != id() && d != NIL_ID) return false;	UNSERIALIZE_PARENT(Aggregating, is, vl, 1, v);    	const UINT n = lm.size();	lm_wgt.resize(n);	for (UINT i = 0; i < n; ++i)	    if (!(is >> lm_wgt[i])) return false;	if (!set_aggregation_size(n)) return false;	UINT _n_rank;	if (!(is >> _n_rank))	    return false;	set_n_rank(_n_rank);	thres.resize((n+1) * (n_thres));	UINT t=0;	for (UINT i = 0; i <= n; i++)	    for (UINT k = 1; k < n_rank; k++)		if (!(is >> thres[t++])) return false;	return true;    }    Output AggRank::operator() (const Input& x, UINT iter) const {	//y[1] is the decision value, and y[0] is the prediction	assert(_n_out == 1);	Output y(2, 0);	if (iter > n_in_agg) iter = n_in_agg;    	for (UINT i = 0; i < iter; ++i) {	    Output out = (*lm[i])(x);	    y[1] += (out[0] * lm_wgt[i]);	}	UINT rank = 1;	UINT idx = iter * n_thres;	for(UINT k = 1; k < n_rank; k++)	    if (y[1] >= thres[idx++]) rank++;	y[0] = rank;    	return y;    }        /** initialize routine **/    void AggRank::reset() {	//clear the trained results, but retain other settings	Aggregating::reset();	assert(n_rank >= 2);	lm_wgt.clear();	thres.resize(n_thres);	for (UINT k = 0; k < n_thres; ++k){	    switch(init_mode){	    case INIT_NAIVE:		thres[k] = 1.0 + k - 0.5 * n_rank;		break;	    case INIT_ZERO:		thres[k] = 0.0;		break;	    case INIT_RAND:		if (k > 0)		    thres[k] = thres[k-1] + randuc();		else		    thres[k] = randuc();		break;	    }	}	dec_value.resize(n_samples);	for (UINT j = 0; j < n_samples; ++j)	    dec_value[j] = 0;    }    pLearnModel AggRank::learn_weak(const vREAL& rhopos, const vREAL& rhoneg, 				    const REAL sum_rhodiff) {	// rhopos: the sample weights supporting the prediction to be positive	// rhoneg: the sample weights supporting the prediction to be negative	DataSet* btd = new DataSet();	DataWgt* btw = new DataWgt();    	for(UINT j = 0; j < n_samples; ++j) {	    if (rhopos[j] >= rhoneg[j])		btd->append(ptd->x(j), 			    Output(1, +1));	    else		btd->append(ptd->x(j), 			    Output(1, -1));	    btw->push_back(fabs(rhopos[j] - rhoneg[j]) / sum_rhodiff);	}	LearnModel *plm = lm_base->clone();    	plm->set_train_data(btd, btw);	plm->train();	return plm;    }    void AggRank::recompute_dec() {    	dec_value.resize(n_samples);	for (UINT j = 0; j < n_samples; ++j){	    dec_value[j] = 0.0;	    for (UINT i = 0; i < n_in_agg; ++i) {		Output out = (*lm[i])(ptd->x(j));			dec_value[j] += (out[0] >= 0)? lm_wgt[i] : -lm_wgt[i];	    }	}    }    void AggRank::compute_thres(vREAL::iterator it) {	vREAL thres_now(n_rank+1);	thres_now[0] = -std::numeric_limits<REAL>::max();	thres_now[n_rank] = std::numeric_limits<REAL>::max();	switch(thres_mode){	case THRES_EXPLOSS:	    compute_thres_exploss(thres_now, false, false);	    break;	case THRES_EXPLOSS_ORDERED:	    compute_thres_exploss(thres_now, true, false);	    break;	case THRES_ABSLOSS:	    compute_thres_dploss(thres_now, true);	    break;	case THRES_CLALOSS:	    compute_thres_dploss(thres_now, false);	    break;	default:	    assert(thres_mode < THRES_CLALOSS);	}	for(UINT k = 1; k < n_rank; ++k)	    (*it++)=thres_now[k];    }    void AggRank::compute_thres_exploss(vREAL& th, bool ordered, bool full){	assert(th[n_rank] >= std::numeric_limits<REAL>::max());	vREAL wp(n_rank+1), wn(n_rank+1);	for(UINT j = 0; j < n_samples; ++j) {	    int o = (int)ptd->y(j)[0];	    if (full){		for(int k=1; k <= o-1; ++k)		    wp[k] += exp(-dec_value[j]);		for(int k=o; k <= (int)n_rank-1; ++k)		    wn[k] += exp(dec_value[j]);	    }	    else{		wp[o-1] += exp(-dec_value[j]);		wn[o] += exp(dec_value[j]);	    }	}    	for(UINT k = n_rank-1; k > 0; --k){	    th[k] = 0.5 * log(wn[k] / wp[k]);	    //clip th in the good region	    if (isnan(th[k]))		th[k] = th[k+1];	    	    if (isinf(th[k]))		th[k] = isinf(th[k]) * std::numeric_limits<REAL>::max();	    if (ordered && th[k] > th[k+1]){		UINT kk;		REAL wwn(wn[k]), wwp(wp[k]);			for(kk = k+1; ; kk++){		    assert(kk <= n_rank-1); 		    // if th[n_rank] = INFTY, 		    // then the assertion should be true		    wwn += wn[kk];		    wwp += wp[kk];		    th[kk] = 0.5 * log(wwn / wwp);		    if (isnan(th[kk]))			th[kk] = th[kk+1];		    if (th[kk] <= th[kk+1])			break;		}		//update from k to kk		while(kk > k){		    kk--;		    th[kk] = th[kk+1];		}	    }	}    }    void AggRank::compute_thres_dploss(vREAL& th, bool do_abs){	std::map<REAL, std::vector<UINT> > declbl;	std::vector<vREAL> cost;	std::map<REAL, std::vector<UINT> >::iterator it;	vREAL threserr(n_rank+1);	for(UINT j = 0; j < n_samples; ++j)	    declbl[dec_value[j]].push_back((UINT)(ptd->y(j)[0]));	it = declbl.begin();	cost.resize(declbl.size());	cost[0].resize(n_rank * 2 + 1);	cost[0][0] = it->first;	for(UINT k = 1; k <= n_rank; ++k){	    REAL loss = 0;	    for(std::vector<UINT>::iterator in = it->second.begin(); 		in != it->second.end(); in++)		if (*in != k) loss += (do_abs ? fabs(REAL(*in) - REAL(k)) : 1);	    cost[0][k] = loss;	    cost[0][k+n_rank] = -1;	}	++it;	for(UINT n = 1; it != declbl.end(); it++, n++){	    REAL bestloss = std::numeric_limits<REAL>::infinity();	    REAL bestidx = -1;	    cost[n].resize(n_rank * 2 + 1);	    cost[n][0] = it->first;	    for(UINT k = 1; k <= n_rank; ++k){		if (cost[n-1][k] < bestloss){		    bestloss = cost[n-1][k];		    bestidx = k;		}		REAL loss = bestloss;		for(std::vector<UINT>::iterator in = it->second.begin(); 		    in != it->second.end(); in++)		    if (*in != k) 			loss += (do_abs ? fabs(REAL(*in) - REAL(k)) : 1);		cost[n][k] = loss;		cost[n][k+n_rank] = bestidx;	    }	}    	UINT bestidx = 1;	UINT nCost = cost.size();	for(UINT k = 1; k <= n_rank; ++k){	    if (cost[nCost-1][k] < cost[nCost-1][bestidx])		bestidx = k;	}	for(UINT j = bestidx; j < n_rank; ++j)	    th[j] = std::numeric_limits<REAL>::max();	for(UINT n = nCost-1; n > 0; n--){	    UINT nextidx = (UINT)(cost[n][bestidx + n_rank]);	    if (bestidx != nextidx){		for(UINT j = nextidx; j < bestidx; ++j)		    th[j] = (cost[n][0] + cost[n-1][0]) * 0.5;		bestidx = nextidx;	    	    }	}    	for(UINT j = 1; j < bestidx; ++j)	    th[j] = -std::numeric_limits<REAL>::max();    }} //namespace lemga
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -