📄 rankboost.cpp
字号:
return (t== max_num_rounds); // normal termination
}
void RankBoostModel::set_best_model_parameters_to_current_parameters()
{
best_round_idx = current_round;
best_train_error = train_error;
best_test_error = test_error;
best_total_default_weight = total_default_weight;
best_ind_active_binary_feature = ind_active_binary_feature;
best_binary_weights = binary_weights;
best_ind_active_real_feature = ind_active_real_feature;
best_real_weights = real_weights;
best_real_limits = real_limits;
best_real_default_weights = real_default_weights;
best_real_update_counts = real_update_counts;
best_binary_update_counts = binary_update_counts;
best_non_zero_binary_idxs = non_zero_binary_idxs;
best_non_zero_real_idxs = non_zero_real_idxs;
}
void RankBoostModel::set_current_model_parameters_to_best_parameters()
{
current_round = best_round_idx;
train_error = best_train_error;
test_error = best_test_error;
total_default_weight = best_total_default_weight;
ind_active_binary_feature = best_ind_active_binary_feature;
binary_weights = best_binary_weights;
ind_active_real_feature = best_ind_active_real_feature;
real_weights = best_real_weights;
real_limits = best_real_limits;
real_default_weights = best_real_default_weights;
real_update_counts = best_real_update_counts;
binary_update_counts = best_binary_update_counts;
non_zero_binary_idxs = best_non_zero_binary_idxs;
non_zero_real_idxs = best_non_zero_real_idxs;
}
/********************************************************************************
Changes the current weights in the model.
Since it is a binary variable, only need to add weight
*********************************************************************************/
void RankBoostModel::update_model_weights_for_binary_feature(int best_binary_idx,
weight_t alpha)
{
binary_weights[best_binary_idx] += alpha;
binary_update_counts[best_binary_idx]++;
if (binary_update_counts[best_binary_idx] == 1)
{
non_zero_binary_idxs.push_back(best_binary_idx);
sort(non_zero_binary_idxs.begin(),non_zero_binary_idxs.end());
}
}
/********************************************************************************
Changes the current weights in the model.
Since this is a real theta thresholded variable, all weights above theta should
be affected. If the q_def is 1 then also the default (no vote) weights need to be
updated.
*********************************************************************************/
void RankBoostModel::update_model_weights_for_real_feature(weight_t alpha,
int best_real_idx, int q_def, int theata_idx_start, int theta_idx_end)
{
if (theta_idx_end<0)
theta_idx_end= real_weights[best_real_idx].size()-1;
int i;
for (i=theata_idx_start; i<=theta_idx_end; i++)
real_weights[best_real_idx][i] += alpha;
real_update_counts[best_real_idx]++;
if (real_update_counts[best_real_idx] == 1)
{
non_zero_real_idxs.push_back(best_real_idx);
sort(non_zero_real_idxs.begin(),non_zero_real_idxs.end());
}
if (q_def>0)
{
real_default_weights[best_real_idx] += alpha;
total_default_weight += alpha;
}
}
struct feature_pair {
feature_pair() : idx(-1), score(NEG_INF) {};
feature_pair(int _i, float _s) : idx(_i), score(_s) {};
bool operator< (const feature_pair& other) const
{
return score>other.score;
}
int idx;
float score;
};
/***************************************************************************
****************************************************************************/
void RankBoostModel::ouput_ranked_feature_list( ostream& os) const
{
os << "FEATURE LIST FOR ROUND " << current_round << endl;
if (binary_weights.size()>0)
{
os << "BINARY FEATURE WEIGHTS: " << endl;
vector<feature_pair> bin_pairs;
int i;
for (i=0; i<binary_weights.size(); i++)
if (binary_weights[i] != 0)
bin_pairs.push_back(feature_pair(i,fabs(binary_weights[i])));
sort(bin_pairs.begin(),bin_pairs.end());
os << setprecision(7);
for (i=0; i<bin_pairs.size(); i++)
{
os << i+1<< ")\t" << binary_weights[bin_pairs[i].idx] << "\t" << bin_pairs[i].idx << "\t" <<
binary_feature_names[bin_pairs[i].idx] << " (" << binary_update_counts[bin_pairs[i].idx] <<
" updates)" << endl;
}
os << endl;
}
if (real_weights.size()>0)
{
int i;
os << "REAL FEATURE WEIGHTS: " << endl;
vector<feature_pair> real_pairs;
for (i=0; i<real_weights.size(); i++)
{
float max=0;
int j;
for (j=0; j<real_weights[i].size(); j++)
if (real_weights[i][j] != 0.0)
if (fabs(real_weights[i][j])>max)
max=fabs(real_weights[i][j]);
if (fabs(real_default_weights[i])>max)
max = fabs(real_default_weights[i]);
if (max == 0)
continue;
real_pairs.push_back(feature_pair(i,max));
}
sort(real_pairs.begin(),real_pairs.end());
for (i=0; i<real_pairs.size(); i++)
{
int idx = real_pairs[i].idx;
os << i+1 << ")\t" << idx << "\t" << real_feature_names[idx] << " (" <<
setprecision(5) << real_weights[idx].size()-1 <<
" bins, " << real_update_counts[idx] << " updates)" << endl;
int j;
for (j=1; j<real_weights[idx].size()-1; j++)
if (real_weights[idx][j] != real_weights[idx][j+1])
os << " " << j << ":" << real_limits[idx][j] << "," << setprecision(4) << real_weights[idx][j];
os << " > " << "," << setprecision(4) << real_weights[idx][j] << endl;
if (real_default_weights[idx] != 0)
os << "default: " << real_default_weights[idx] << endl;
os << endl;
}
}
}
struct FeatureStats {
FeatureStats() : idx(NEG_INF), global_weight(0), local_weight(0), percent_active(0) {};
bool operator< (const FeatureStats& other) const
{
return local_weight>other.local_weight;
}
int idx;
double global_weight;
double local_weight;
double percent_active;
};
/******************************************************************************
Measures the "weight" of a feature, globally (how much at adds to all examples)
and locally (how much it adds to the samples for which it is applicable)
outputs a list ranked according to the local importanc)e
*******************************************************************************/
void RankBoostModel::ouput_importance_ranked_feature_list( const RankBoostDataset& training_ds,
ostream& os,
int only_fidx,
int round_idx)
{
remove_default_weights();
if (real_weights.size()>0)
{
int i;
// calc sample "weights"
const vector<SamplePairWeight>& phi = training_ds.get_phi_support();
const vector<RankBoostSample>& samples = training_ds.get_samples();
vector<double> sam_weights;
sam_weights.resize(samples.size(),0);
for (i=0; i<phi.size(); i++)
{
sam_weights[phi[i].idx1]+=phi[i].weight;
sam_weights[phi[i].idx2]+=phi[i].weight;
}
double total_weight =0;
for (i=0; i<sam_weights.size(); i++)
total_weight += sam_weights[i];
// sum the weights
const int num_real = real_weights.size();
vector<FeatureStats> feature_stats;
feature_stats.resize(num_real);
for (i=0; i<samples.size(); i++)
{
const RankBoostSample& sam = samples[i];
int j;
for (j=0; j<sam.real_active_idxs.size(); j++)
{
const int f_idx = sam.real_active_idxs[j];
if (real_weights[f_idx].size() == 0)
continue;
const int bin_idx = get_real_bin_idx_for_value(f_idx,sam.real_active_values[j]);
const double w = real_weights[f_idx][bin_idx] - real_default_weights[f_idx];
feature_stats[f_idx].local_weight += (fabs(w) * sam_weights[i]);
feature_stats[f_idx].percent_active += sam_weights[i];
}
}
// remove default weight from all features
bool changed_a_default = false;
for (i=0; i<feature_stats.size(); i++)
{
if (real_default_weights[i] != 0)
{
int j;
for (j=0 ;j<real_weights[i].size(); j++)
real_weights[i][j] -= real_default_weights[i];
real_default_weights[i]=0;
changed_a_default = true;
}
}
// recompute weight for all if needed
if (changed_a_default)
{
feature_stats.clear();
feature_stats.resize(num_real);
int sam_idx;
for (sam_idx=0; sam_idx<samples.size(); sam_idx++)
{
const RankBoostSample& sam = samples[sam_idx];
int j;
for (j=0; j<sam.real_active_idxs.size(); j++)
{
const int f_idx = sam.real_active_idxs[j];
const int bin_idx = get_real_bin_idx_for_value(f_idx,sam.real_active_values[j]);
const double w = real_weights[f_idx][bin_idx] - real_default_weights[f_idx];
feature_stats[f_idx].local_weight += (fabs(w) * sam_weights[sam_idx]);
feature_stats[f_idx].percent_active+= sam_weights[sam_idx];
}
}
}
// create global weighting and weighted active percent
for (i=0; i<feature_stats.size(); i++)
{
// cout << i << "\t" << fixed << setprecision(4) << feature_stats[i].local_weight << "\t" <<
// feature_stats[i].percent_active << endl;
feature_stats[i].idx = i;
if (feature_stats[i].local_weight>0)
{
feature_stats[i].global_weight = feature_stats[i].local_weight;
feature_stats[i].global_weight /= total_weight;
feature_stats[i].local_weight /= feature_stats[i].percent_active;
feature_stats[i].percent_active /= total_weight;
}
}
sort(feature_stats.begin(),feature_stats.end());
while (feature_stats.size()>0 && feature_stats[feature_stats.size()-1].local_weight == 0 )
feature_stats.pop_back();
os << "REAL FEATURE WEIGHTS: " << endl;
for (i=0; i<feature_stats.size(); i++)
{
int idx = feature_stats[i].idx;
if (only_fidx>=0 && idx != only_fidx)
continue;
os << i+1 << ")\t" << idx << "\t" << real_feature_names[idx] << " (" <<
setprecision(5) << real_weights[idx].size()-1 <<
" bins, " << real_update_counts[idx];
if (round_idx<0)
{
os << " updates [ " << real_first_updates[idx] << " ] )" << endl;
}
else
os << " updates [ " << real_first_updates[idx] << " ], " << round_idx << " rounds)" << endl;
os << setprecision(4) << "LW: " << feature_stats[i].local_weight << "\tGW:" <<
feature_stats[i].global_weight << "\t%ACT: " << feature_stats[i].percent_active << endl;
int j;
for (j=1; j<real_weights[idx].size()-1; j++)
if (real_weights[idx][j] != real_weights[idx][j+1])
os << " " << j << ":" << real_limits[idx][j] << "," << setprecision(4) << real_weights[idx][j];
os << " > " << "," << setprecision(4) << real_weights[idx][j] << endl;
if (real_default_weights[idx] != 0)
os << "default: " << real_default_weights[idx] << endl;
os << endl;
}
}
}
void RankBoostModel::get_top_misclassified_pairs(
const RankBoostDataset& training_ds,
const vector<weight_t>& D,
const vector<weight_t>& D0,
vector<idx_weight_pair>& pair_idxs,
int num_top_pairs ) const
{
const int num_samples = training_ds.get_num_samples();
const vector<RankBoostSample>& samples = training_ds.get_samples();
vector<weight_t> rank_scores;
rank_scores.resize(num_samples);
int i;
for (i=0; i<num_samples; i++)
rank_scores[i]=calc_rank_score(samples[i]);
vector<idx_weight_pair> pairs;
const vector<SamplePairWeight>& phi_support = training_ds.get_phi_support();
double train_error=0;
for (i=0; i<phi_support.size(); i++)
{
if (rank_scores[phi_support[i].idx1]>=rank_scores[phi_support[i].idx2])
pairs.push_back(idx_weight_pair(i,D[i]/D0[i]));
}
sort(pairs.begin(),pairs.end());
pair_idxs.clear();
for (i=0; i<pairs.size(); i++)
{
if (num_top_pairs>0 && i>=num_top_pairs)
break;
pair_idxs.push_back(pairs[i]);
}
}
/**********************************************************************
***********************************************************************/
void RankBoostModel::print_top_misclassified_pairs(
const RankBoostDataset& training_ds,
const vector<weight_t>& D,
const vector<weight_t>& org_D,
int num_top_pairs,
ostream& os) const
{
const int num_samples = training_ds.get_num_samples();
const vector<RankBoostSample>& samples = training_ds.get_samples();
vector<weight_t> rank_scores;
rank_scores.resize(num_samples);
int i;
for (i=0; i<num_samples; i++)
rank_scores[i]=calc_rank_score(samples[i]);
vector<idx_weight_pair> pairs;
const vector<SamplePairWeight>& phi_support = training_ds.get_phi_support();
double train_error=0;
for (i=0; i<phi_support.size(); i++)
{
if (rank_scores[phi_support[i].idx1]>=rank_scores[phi_support[i].idx2])
pairs.push_back(idx_weight_pair(i,D[i]/org_D[i]));
}
sort(pairs.begin(),pairs.end());
os << "Top miscalssified pairs: " << endl;
for (i=0; i<num_top_pairs && i<pairs.size(); i++)
{
os << i << " " << pairs[i].idx << " " << pairs[i].weight << endl;
const int idx1 = phi_support[pairs[i].idx].idx1;
const int idx2 = phi_support[pairs[i].idx].idx2;
os << idx1 << " > " << idx2 << endl;
}
os << endl;
}
/***************************************************************************
****************************************************************************/
struct score_pair {
score_pair() : idx(int(NEG_INF)), score(NEG_INF) {};
score_pair(int _i, float _n) : idx(_i), score(_n) {};
bool o
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -