⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rankboost.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 5 页
字号:
	return (t== max_num_rounds); // normal termination
}



void RankBoostModel::set_best_model_parameters_to_current_parameters()
{
	best_round_idx   = current_round;
	best_train_error = train_error;
	best_test_error  = test_error;
	best_total_default_weight = total_default_weight;

	best_ind_active_binary_feature = ind_active_binary_feature;
	best_binary_weights = binary_weights;

	best_ind_active_real_feature = ind_active_real_feature;
	best_real_weights = real_weights; 
	best_real_limits = real_limits;
	best_real_default_weights = real_default_weights; 
	best_real_update_counts = real_update_counts; 
	best_binary_update_counts = binary_update_counts;

	best_non_zero_binary_idxs = non_zero_binary_idxs;
	best_non_zero_real_idxs = non_zero_real_idxs;
}

void RankBoostModel::set_current_model_parameters_to_best_parameters()
{
	current_round = best_round_idx;
	train_error = best_train_error;
	test_error = best_test_error;
	total_default_weight = best_total_default_weight;

	ind_active_binary_feature = best_ind_active_binary_feature;
	binary_weights = best_binary_weights;

	ind_active_real_feature = best_ind_active_real_feature;
	real_weights = best_real_weights; 
	real_limits = best_real_limits;
	real_default_weights = best_real_default_weights; 
	real_update_counts = best_real_update_counts; 
	binary_update_counts = best_binary_update_counts;

	non_zero_binary_idxs = best_non_zero_binary_idxs;
	non_zero_real_idxs = best_non_zero_real_idxs;
}


/********************************************************************************
Changes the current weights in the model.
Since it is a binary variable, only need to add weight
*********************************************************************************/
void RankBoostModel::update_model_weights_for_binary_feature(int best_binary_idx, 
															 weight_t alpha)
{
	binary_weights[best_binary_idx] += alpha;
	binary_update_counts[best_binary_idx]++;
	if (binary_update_counts[best_binary_idx] == 1)
	{
		non_zero_binary_idxs.push_back(best_binary_idx);
		sort(non_zero_binary_idxs.begin(),non_zero_binary_idxs.end());
	}
}



/********************************************************************************
Changes the current weights in the model.
Since this is a real theta thresholded variable, all weights above theta should
be affected. If the q_def is 1 then also the default (no vote) weights need to be
updated.
*********************************************************************************/
void RankBoostModel::update_model_weights_for_real_feature(weight_t alpha, 
			int best_real_idx, int q_def, int theata_idx_start, int theta_idx_end)
{
	if (theta_idx_end<0)
		theta_idx_end= real_weights[best_real_idx].size()-1;
	int i;
	for (i=theata_idx_start; i<=theta_idx_end; i++)
		real_weights[best_real_idx][i] += alpha;

	real_update_counts[best_real_idx]++;
	if (real_update_counts[best_real_idx] == 1)
	{
		non_zero_real_idxs.push_back(best_real_idx);
		sort(non_zero_real_idxs.begin(),non_zero_real_idxs.end());
	}

	if (q_def>0)
	{
		real_default_weights[best_real_idx] += alpha;
		total_default_weight += alpha;
	}
}


struct feature_pair {
	feature_pair() : idx(-1), score(NEG_INF) {};
	feature_pair(int _i, float _s) : idx(_i), score(_s) {};
	bool operator< (const feature_pair& other) const
	{
		return score>other.score;
	}
	int idx;
	float score;
};

/***************************************************************************

****************************************************************************/
void RankBoostModel::ouput_ranked_feature_list( ostream& os) const
{
	os << "FEATURE LIST FOR ROUND " << current_round << endl;
	
	if (binary_weights.size()>0)
	{
		os << "BINARY FEATURE WEIGHTS: " << endl;
		vector<feature_pair> bin_pairs;
		int i;
		for (i=0; i<binary_weights.size(); i++)
			if (binary_weights[i] != 0)
				bin_pairs.push_back(feature_pair(i,fabs(binary_weights[i])));

		sort(bin_pairs.begin(),bin_pairs.end());
		os << setprecision(7);
		for (i=0; i<bin_pairs.size(); i++)
		{
			os << i+1<< ")\t" << binary_weights[bin_pairs[i].idx] << "\t" << bin_pairs[i].idx << "\t" <<
				binary_feature_names[bin_pairs[i].idx] << " (" << binary_update_counts[bin_pairs[i].idx] << 
				" updates)" << endl;
		}
		os << endl;
	}


	if (real_weights.size()>0)
	{
		int i;
		os << "REAL FEATURE WEIGHTS: " << endl;
		vector<feature_pair> real_pairs;
		for (i=0; i<real_weights.size(); i++)
		{
			float max=0;
			int j;
			for (j=0; j<real_weights[i].size(); j++)
				if (real_weights[i][j] != 0.0)
					if (fabs(real_weights[i][j])>max)
						max=fabs(real_weights[i][j]);
			
			if (fabs(real_default_weights[i])>max)
				max = fabs(real_default_weights[i]);

			if (max == 0)
				continue;
					
			real_pairs.push_back(feature_pair(i,max));
		}

		sort(real_pairs.begin(),real_pairs.end());
		for (i=0; i<real_pairs.size(); i++)
		{
			int idx = real_pairs[i].idx;
			os << i+1 << ")\t" << idx << "\t" << real_feature_names[idx] << "  (" << 
				setprecision(5) << real_weights[idx].size()-1 << 
				" bins, " << real_update_counts[idx] << "  updates)" << endl;
			
			int j;
			for (j=1; j<real_weights[idx].size()-1; j++)
				if (real_weights[idx][j] != real_weights[idx][j+1])
					os << "  " << j << ":" << real_limits[idx][j] << "," << setprecision(4) << real_weights[idx][j];
				
			os << " >  " << "," << setprecision(4) << real_weights[idx][j] << endl;

			if (real_default_weights[idx] != 0)
				os << "default: " << real_default_weights[idx] << endl;
			
			os << endl;
		}
	}
}


struct FeatureStats {
	FeatureStats() : idx(NEG_INF), global_weight(0), local_weight(0), percent_active(0) {};

	bool operator< (const FeatureStats& other) const
	{
		return local_weight>other.local_weight;
	}

	int	   idx;
	double global_weight;
	double local_weight;
	double percent_active;
};



/******************************************************************************
Measures the "weight" of a feature, globally (how much at adds to all examples)
and locally (how much it adds to the samples for which it is applicable)
outputs a list ranked according to the local importanc)e
*******************************************************************************/
void RankBoostModel::ouput_importance_ranked_feature_list( const RankBoostDataset& training_ds, 
														  ostream& os,
														  int only_fidx,
														  int round_idx)
{
	remove_default_weights();

	if (real_weights.size()>0)
	{
		int i;

		// calc sample "weights"
		const vector<SamplePairWeight>& phi = training_ds.get_phi_support();
		const vector<RankBoostSample>& samples = training_ds.get_samples();
		vector<double> sam_weights;
		sam_weights.resize(samples.size(),0);

		for (i=0; i<phi.size(); i++)
		{
			sam_weights[phi[i].idx1]+=phi[i].weight;
			sam_weights[phi[i].idx2]+=phi[i].weight;
		}

		double total_weight =0;
		for (i=0; i<sam_weights.size(); i++)
			total_weight += sam_weights[i];
	
		// sum the weights
		const int num_real = real_weights.size();
		vector<FeatureStats> feature_stats;
		feature_stats.resize(num_real);
		for (i=0; i<samples.size(); i++)
		{
			const RankBoostSample& sam = samples[i];
			int j;
			for (j=0; j<sam.real_active_idxs.size(); j++)
			{
				const int f_idx   = sam.real_active_idxs[j];

				if (real_weights[f_idx].size() == 0)
					continue;

				const int bin_idx = get_real_bin_idx_for_value(f_idx,sam.real_active_values[j]);
				const double w    = real_weights[f_idx][bin_idx] - real_default_weights[f_idx];

				feature_stats[f_idx].local_weight   += (fabs(w) * sam_weights[i]);
				feature_stats[f_idx].percent_active +=  sam_weights[i];
			}
		}

		// remove default weight from all features
		bool changed_a_default = false;
		for (i=0; i<feature_stats.size(); i++)
		{
			if (real_default_weights[i] != 0)
			{
				int j;
				for (j=0 ;j<real_weights[i].size(); j++)
					real_weights[i][j] -= real_default_weights[i];
				real_default_weights[i]=0;
				changed_a_default = true;
			}
		}


		// recompute weight for all if needed
		if (changed_a_default)
		{
			feature_stats.clear();
			feature_stats.resize(num_real);

			int sam_idx;
			for (sam_idx=0; sam_idx<samples.size(); sam_idx++)
			{
				const RankBoostSample& sam = samples[sam_idx];
				int j;
				for (j=0; j<sam.real_active_idxs.size(); j++)
				{
					const int f_idx   = sam.real_active_idxs[j];
					const int bin_idx = get_real_bin_idx_for_value(f_idx,sam.real_active_values[j]);
					const double w    = real_weights[f_idx][bin_idx] - real_default_weights[f_idx];

					feature_stats[f_idx].local_weight += (fabs(w) * sam_weights[sam_idx]);
					feature_stats[f_idx].percent_active+= sam_weights[sam_idx];
				}
			}
		}

		// create global weighting and weighted active percent
		for (i=0; i<feature_stats.size(); i++)
		{

	//		cout << i << "\t" << fixed << setprecision(4) << feature_stats[i].local_weight << "\t" << 
	//		feature_stats[i].percent_active << endl;
			feature_stats[i].idx = i;
			if (feature_stats[i].local_weight>0)
			{
				feature_stats[i].global_weight = feature_stats[i].local_weight;
				feature_stats[i].global_weight /= total_weight;
				feature_stats[i].local_weight  /= feature_stats[i].percent_active;
				feature_stats[i].percent_active /= total_weight;
			}
		}

		sort(feature_stats.begin(),feature_stats.end());
		while (feature_stats.size()>0 && feature_stats[feature_stats.size()-1].local_weight == 0 )
			feature_stats.pop_back();

		os << "REAL FEATURE WEIGHTS: " << endl;
			
		for (i=0; i<feature_stats.size(); i++)
		{
			int idx = feature_stats[i].idx;

			if (only_fidx>=0 && idx != only_fidx)
				continue;

			os << i+1 << ")\t" << idx << "\t" << real_feature_names[idx] << "  (" << 
				setprecision(5) << real_weights[idx].size()-1 << 
				" bins, " << real_update_counts[idx];
			if (round_idx<0)
			{
				os << "  updates [ " << real_first_updates[idx] << " ]  )" << endl;
			}
			else
				os << " updates [ " << real_first_updates[idx] << " ],  " << round_idx << " rounds)" << endl;

			os << setprecision(4) << "LW: " <<  feature_stats[i].local_weight << "\tGW:" <<
				feature_stats[i].global_weight << "\t%ACT: " << feature_stats[i].percent_active << endl;
			
			int j;
			for (j=1; j<real_weights[idx].size()-1; j++)
				if (real_weights[idx][j] != real_weights[idx][j+1])
					os << "  " << j << ":" << real_limits[idx][j] << "," << setprecision(4) << real_weights[idx][j];
				
			os << " >  " << "," << setprecision(4) << real_weights[idx][j] << endl;

			if (real_default_weights[idx] != 0)
				os << "default: " << real_default_weights[idx] << endl;
			
			os << endl;
		}
	}
}




void RankBoostModel::get_top_misclassified_pairs(
								   const RankBoostDataset& training_ds,
								   const vector<weight_t>& D,
								   const vector<weight_t>& D0,
								   vector<idx_weight_pair>& pair_idxs,
								   int num_top_pairs ) const
{
	const int num_samples = training_ds.get_num_samples();
	const vector<RankBoostSample>& samples = training_ds.get_samples();

	vector<weight_t> rank_scores;

	rank_scores.resize(num_samples);

	int i;
	for (i=0; i<num_samples; i++)
		rank_scores[i]=calc_rank_score(samples[i]);

	vector<idx_weight_pair> pairs;
	const vector<SamplePairWeight>& phi_support = training_ds.get_phi_support();
	double train_error=0;
	for (i=0; i<phi_support.size(); i++)
	{
		if (rank_scores[phi_support[i].idx1]>=rank_scores[phi_support[i].idx2])
			pairs.push_back(idx_weight_pair(i,D[i]/D0[i]));
	}

	sort(pairs.begin(),pairs.end());


	pair_idxs.clear();
	for (i=0; i<pairs.size(); i++)
	{
		if (num_top_pairs>0 && i>=num_top_pairs)
			break;
		pair_idxs.push_back(pairs[i]);
	}
}

/**********************************************************************
***********************************************************************/
void RankBoostModel::print_top_misclassified_pairs(
								   const RankBoostDataset& training_ds,
								   const vector<weight_t>& D,
								   const vector<weight_t>& org_D,
								   int num_top_pairs,
								   ostream& os) const
{
	const int num_samples = training_ds.get_num_samples();
	const vector<RankBoostSample>& samples = training_ds.get_samples();

	vector<weight_t> rank_scores;

	rank_scores.resize(num_samples);

	int i;
	for (i=0; i<num_samples; i++)
		rank_scores[i]=calc_rank_score(samples[i]);

	vector<idx_weight_pair> pairs;
	const vector<SamplePairWeight>& phi_support = training_ds.get_phi_support();
	double train_error=0;
	for (i=0; i<phi_support.size(); i++)
	{
		if (rank_scores[phi_support[i].idx1]>=rank_scores[phi_support[i].idx2])
			pairs.push_back(idx_weight_pair(i,D[i]/org_D[i]));
	}

	sort(pairs.begin(),pairs.end());

	os << "Top miscalssified pairs: " << endl;
	for (i=0; i<num_top_pairs && i<pairs.size(); i++)
	{
		os << i << " " << pairs[i].idx << " " << pairs[i].weight << endl;
		const int idx1 = phi_support[pairs[i].idx].idx1;
		const int idx2 = phi_support[pairs[i].idx].idx2;

		os << idx1 << " > " << idx2 << endl;
	}
	os << endl;
}


/***************************************************************************

****************************************************************************/

struct score_pair {
	score_pair() : idx(int(NEG_INF)), score(NEG_INF) {};
	score_pair(int _i, float _n) : idx(_i), score(_n) {};
	bool o

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -