⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 me_regression_dataset.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 2 页
字号:
}



// returns the relative weight in the class of a certain feature
double ME_Regression_DataSet::get_relative_weight_of_feature(int label, int feature_idx) const
{
	double class_weight=0;
	double feature_weight=0;

	int i;
	for (i=0; i<samples.size(); i++)
	{
		if (samples[i].label != label)
			continue;

		class_weight += samples[i].weight;
		if (samples[i].get_feature_value(feature_idx) != 0)
			feature_weight+=samples[i].weight;
	}
	return (feature_weight/class_weight);
}


// sets the weights of samples in the class in such a way that the relative weight of samples
// with non-zero values for the given feature is given in the relative_weight
void ME_Regression_DataSet::scale_samples_to_feature_relative_weight(int label, 
										int feature_idx, double relative_weight)
{
	double class_weight=0;
	double feature_weight=0;
	vector<bool> sams_ind;

	sams_ind.resize(samples.size(),false);
	int i;
	for (i=0; i<samples.size(); i++)
	{
		if (samples[i].label != label)
			continue;

		class_weight += samples[i].weight;
		if (samples[i].get_feature_value(feature_idx) != 0)
		{
			feature_weight+=samples[i].weight;
			sams_ind[i]=true;
		}
	}


	double org_weight=feature_weight/class_weight;

	if (org_weight<=0)
		return;

	double mult_feature = relative_weight/org_weight;
	if (mult_feature<0.2)
		mult_feature=0.2;
	if (mult_feature>5)
		mult_feature=5;

	relative_weight = mult_feature*org_weight;

	double mult_others = (1.0-relative_weight)/(1.0-org_weight);

	for (i=0; i<samples.size(); i++)
		samples[i].weight *= (sams_ind[i] ? mult_feature : mult_others);
	
	tally_samples();
}


/**************************************************************************
Tries to scale 
***************************************************************************/
void  ME_Regression_DataSet::serial_scale(const vector<int>& feature_idxs)
{
	int i;
	for (i=0; i<feature_idxs.size(); i++)
	{
		const int f_idx = feature_idxs[i];
		double ratio = get_relative_weight_of_feature(0,f_idx);
		scale_samples_to_feature_relative_weight(1,f_idx,ratio);
	}
}


void ME_Regression_DataSet::report_feature_statistics(int f_idx, char *name) const
{
	vector<double> vals0,vals1;
	double avg_nz0=0, avg_nz1=0;
	double wnz0=0, wnz1=0, wz0=0, wz1=0;
	int i;

	for (i=0; i<samples.size(); i++)
	{
		double val = 0;
		int j;
		for (j=0; j<samples[i].f_vals.size(); j++)
		{
			if (samples[i].f_vals[j].f_idx == f_idx)
			{
				val = samples[i].f_vals[j].val;
				break;
			}
		}

		double weight = samples[i].weight;
		int label = samples[i].label;

		if (val != 0)
		{
			if (label == 0)
			{
				wnz0+= weight;
				avg_nz0 += weight * val;
				vals0.push_back(val);
			}
			else
			{
				wnz1+= weight;
				avg_nz1 += weight * val;
				vals1.push_back(val);
			}
		}
		else
		{
			if (label == 0)
			{
				wz0+=weight;
			}
			else
				wz1+=weight;
		}
	}

	if (avg_nz0 != 0)
		avg_nz0/=wnz0;
	if (avg_nz1 != 0)
		avg_nz1/=wnz1;
	
	printf("Statistics for feature %d ",f_idx);
	if (name)
		printf(" %s",name);
	printf("\n");
	printf("Class 0:\n");
	printf("weight samples with non-zero vals: %.3f (%.2f)  samples with zero val: %.3f (%.2f)\n",
			wnz0,wnz0/(wnz0+wz0),wz0,wz0/(wnz0+wz0));

	printf("Avg weighted: %g    non-weighted vals:\n",avg_nz0);
	sort(vals0.begin(),vals0.end());
	
	// prints avgs of tenths of the values
	int ts=vals0.size()/10;
	int p=0;
	for (i=0; i<9; i++)
	{
		int next=p+ts;
		int j;
		double av=0;
		for (j=p; j<next; j++)
			av+=vals0[j];

		printf("%.4f  ",av/ts);
		p+=ts;
	}

	double av=0;
	for (i=p; i<vals0.size(); i++)
		av+=vals0[i];

	printf("%.4f\n",av/(vals0.size()-p));


	printf("Class 1:\n");
	printf("weight samples with non-zero vals: %.3f (%.2f)  samples with zero val: %.3f (%.2f)\n",
			wnz1,wnz1/(wnz1+wz1),wz1,wz1/(wnz1+wz1));

	printf("Avg weighted: %g    non-weighted vals:\n",avg_nz1);
	sort(vals1.begin(),vals1.end());
	
	// prints avgs of tenths of the values
	ts=vals1.size()/10;
	p=0;
	for (i=0; i<9; i++)
	{
		int next=p+ts;
		int j;
		double av=0;
		for (j=p; j<next; j++)
			av+=vals1[j];

		printf("%.4f  ",av/ts);
		p+=ts;
	}

	av=0;
	for (i=p; i<vals1.size(); i++)
		av+=vals1[i];

	printf("%.4f\n\n\n",av/(vals1.size()-p));

}






// extracts all the samples of the given class and puts them in a new dataset
void ME_Regression_DataSet::extract_class_samples(int label, ME_Regression_DataSet& extract) const
{
	int i;

	extract.samples.clear();
	extract.num_samples = 0;
	extract.num_classes = num_classes;

	for (i=0; i<num_samples; i++)
		if (samples[i].label == label)
			extract.add_sample(samples[i]);

	extract.tally_samples();
}


// exctract samples that have a non-zero value for the given feature
void ME_Regression_DataSet::extract_samples_with_activated_feature(int feature_idx,
												ME_Regression_DataSet& extract) const
{
	int i;

	extract.samples.clear();
	extract.num_samples = 0;
	extract.num_classes = num_classes;	

	for (i=0; i<num_samples; i++)
	{
		int j;
		for (j=0; j<samples[i].f_vals.size(); j++)
			if (samples[i].f_vals[j].f_idx == feature_idx && samples[i].f_vals[j].val != 0)
				extract.add_sample(samples[i]);
	}

	extract.tally_samples();
}


// adds the samples from the other dataset, and adjust weights
void ME_Regression_DataSet::add_other_dataset_samples(const ME_Regression_DataSet& other)
{
	int i;

	for (i=0; i<other.num_samples; i++)
		add_sample(other.samples[i]);

	tally_samples();
}





// return all samples in the datatset that have a desired label
void ME_Regression_DataSet::get_samples_with_label(int label, vector<int>& idxs) const
{
	int i;

	idxs.clear();

	for (i=0; i<samples.size(); i++)
		if (samples[i].label== label)
			idxs.push_back(i);
}





// prints info on features (num non zero and p~(f) )
void ME_Regression_DataSet::print_feature_summary(ostream& os, const char **feature_names) const
{
	int i;

	vector< vector<double> > ratios, avg_nz;

	calc_feature_non_zero_weights(ratios,avg_nz);

	for (i=0; i<num_features; i++)
	{
		os << setw(4) << left << i << " ";
		os << setw(10) << left << setprecision(3) << ratios[i][0] << " " << setw(10) << setprecision(3) << left << ratios[i][1] << " ";
		os << " ( " << setw(6) << setprecision(3) << left << avg_nz[i][0] << " , " <<  setw(6) << setprecision(3) << left << avg_nz[i][1] << ") ";
		os << "   " << setw(6);
		if (feature_names)
			cout << feature_names[i];
		cout << endl;
	}
}

void ME_Regression_DataSet::clear(int num_classes)
{

	num_classes=num_classes;  // number of classes k in the data = max_label+1
	num_samples=0;
	num_features=0;
	class_weights.clear();
	if (num_classes>0)
		class_weights.resize(num_classes,0);

	samples.clear();
}



void ME_Regression_Sample::print(const char **feature_names) const
{
	int j;

	if (! feature_names)
	{
		cout << "> " << label << " " <<weight << endl;
		for (j=0; j<f_vals.size(); j++)
			cout << f_vals[j].f_idx << " " << f_vals[j].val << " ";

		cout << endl;
		return;
	}

	cout << "LABEL " << label << ",  weight " << weight << endl;
	for (j=0; j<f_vals.size(); j++)
	{
		cout << f_vals[j].f_idx << "\t" << setprecision(3) << fixed << f_vals[j].val << "\t" <<
			feature_names[f_vals[j].f_idx] << endl;
	}
	cout << endl;

}


void ME_Regression_Sample::remove_feature(int f_idx)
{
	int f;
	for (f=0; f<f_vals.size(); f++)
		if (f_vals[f].f_idx == f_idx)
			break;
	
	if (f==f_vals.size())
		return;

	if (f == f_vals.size()-1)
	{
		f_vals.pop_back();
		return;
	}

	int i;
	for (i=f+1; i<f_vals.size(); i++)
		f_vals[i-1]=f_vals[i];

	f_vals.pop_back();
}

void ME_Regression_DataSet::print() const
{
	int i;

	for (i=0; i<num_samples; i++)
		samples[i].print();
}


void ME_Regression_DataSet::print_summary() const
{
	int j;

	printf("Classes %d\n",num_classes);
	printf("Samples %d\n",num_samples);
	printf("Total weight %.3f\n",total_weight);
	printf("Relative class weights:\n");
	for (j=0; j<num_classes; j++)
		printf("%d - %.4f\n",j,class_weights[j]/total_weight);

}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -