⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qcdotprodsim.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
字号:
#include "QuickClustering.h"
#include "auxfun.h"

struct ppair {
	bool operator< (const ppair& other) const
	{
		return (inten>other.inten);
	}

	intensity_t inten;
	int idx;
};






 
// sets the adjusted intensity of the peaks
void set_adjusted_inten(QCPeak *peaks, int num_peaks)
{
	int i;
	float total_intensity = 0.0;
	for (i=0; i<num_peaks; i++)
		total_intensity+= peaks[i].intensity;

	if (total_intensity <=0)
		return;

	const float norm_factor = 1000.0/total_intensity;

	for (i=0; i<num_peaks; i++)
		peaks[i].adjusted_inten = log(norm_factor * peaks[i].intensity);

	return; 
} 






double num_similarity_calculations=0;




/************************************************************************
// Calculates the cosine dot product of to sets of peaks (designated by the idxs)
// the values that are used are the ones in the adjusted_inten field
*************************************************************************/
float calc_selected_dot_prod(mass_t tolerance, 
							 const QCPeak *pa, int na, const vector<int>& peak_idxs_a,
	  					     const QCPeak *pb, int nb, const vector<int>& peak_idxs_b,
							 bool verbose)
{	
	float top_sum = 0;
	float sum_a_sqr = 0;
	float sum_b_sqr = 0;

	num_similarity_calculations++;

	const int num_a = peak_idxs_a.size();
	const int num_b = peak_idxs_b.size();

	int a=0,b=0;

	float top=0;
	while (a<num_a && b<num_b)
	{
		const int a_idx = peak_idxs_a[a];
		const int b_idx = peak_idxs_b[b];

		if (a_idx<0 || a_idx>=na)
		{
			int i;
			for (i=0; i<num_a; i++)
				cout << "a " << i << "\t" << peak_idxs_a[i] << endl;
			int qq=1;
		}

		if (b_idx<0 || b_idx>=nb)
		{
			int i;
			for (i=0; i<num_b; i++)
				cout << "b " << i << "\t" << peak_idxs_b[i] << endl;
			int qq=1;
		}
		const QCPeak& peak_a = pa[a_idx];
		const QCPeak& peak_b = pb[b_idx];

		if (fabs(peak_a.mass - peak_b.mass)<=tolerance)
		{
			top += peak_a.adjusted_inten * peak_b.adjusted_inten;
			a++;
			b++;
			continue;

		}

		if (peak_a.mass<peak_b.mass)
		{
			a++;
		}
		else
			b++;
	}

	float sqr_a=0;
	for (a=0; a<num_a; a++)
	{
		const float& inten = pa[peak_idxs_a[a]].adjusted_inten;
		sqr_a += (inten*inten);
	}

	float sqr_b=0;
	for (b=0; b<num_b; b++)
	{
		const float& inten = pb[peak_idxs_b[b]].adjusted_inten;
		sqr_b += (inten*inten);
	}

	if (top<=0)
		return 0;

	float simz = (float)top/sqrt(sqr_a*sqr_b);
	if (simz>1)
		simz=1;

	return simz; 
}



// scans the peaks in the spectrum to select the idxs of the peaks with the
// highest ranks (both absolute and using a local window). Invalidates certian 
// mass such as M M-H2O, etc.
void select_top_peak_idxs(QCPeak *peaks, 
						  int num_peaks, 
						  mass_t m_over_z, 
						  mass_t tolerance, 
						  vector<int>& top_ranked_peak_idxs, 
						  float top_x_masses[NUM_TOP_CLUSTER_PEAKS],
						  int top_peaks_per_1000da,
						  Config *config)
{
	static vector<bool> inv_peak_indicators;
	static vector<bool> high_inten_indicators;
	static vector<ppair> pairs;
	const mass_t local_window_size = 75.0; // also look at the strongest peak in the local window of this size
	const mass_t iso_mass_margin = 1.003 + tolerance * 0.6;
	const mass_t min_exclude_mass = (config ? config->get_min_exclude_range() : 99999);
	const mass_t max_exclude_mass = (config ? config->get_max_exclude_range() : -1 );
	int i;

	int num_global_high_inten = (int)(top_peaks_per_1000da * peaks[num_peaks-1].mass * 0.001);

	if (NUM_TOP_CLUSTER_PEAKS<1)
		num_global_high_inten = NUM_TOP_CLUSTER_PEAKS;

	// reallocate static memory
	if (num_peaks<=1 || num_peaks>1E5)
	{
		cout << "Error: invalid number of peaks : " << num_peaks << endl;
		exit(1);
	}
	
	inv_peak_indicators.clear();
	high_inten_indicators.clear();

	inv_peak_indicators.resize(num_peaks,false);
	high_inten_indicators.resize(num_peaks,false);

	// mark invalid peaks such as isotopes and derivatives of the parent ion
	// set indicators if there is a peak ahead
	for (i=0; i<num_peaks; i++)
	{
		const mass_t forward_mass = peaks[i].mass + iso_mass_margin;
		int j=i+1;
		while (j<num_peaks && peaks[j].mass < forward_mass)
		{
			if (peaks[j].intensity>peaks[i].intensity)
			{
				inv_peak_indicators[i]=true;
				break;
			}
			++j;
		}

		const mass_t backwards_mass = peaks[i].mass - iso_mass_margin;
		j=i-1;
		while (j>=0 && peaks[j].mass > backwards_mass)
		{
			if (peaks[j].intensity>peaks[i].intensity)
			{
				inv_peak_indicators[i]=true;
				break;
			}
			--j;
		}
	}

	// find highest intensity peaks
	pairs.clear();	
	for (i=0; i<num_peaks; i++)
	{
		if (inv_peak_indicators[i])
			continue;

		if (max_exclude_mass>0)
		{
			const mass_t peak_mass = peaks[i].mass;
			if (peak_mass<=max_exclude_mass &&
				peak_mass>=min_exclude_mass &&
				config->check_if_mass_is_in_exclude_range(peak_mass))
			{
//				cout << "Excluded: " << peak_mass << endl;
				continue;
			}
		}

		ppair p;
		p.idx=i;
		p.inten = peaks[i].intensity;
		pairs.push_back(p);
	}
	sort(pairs.begin(),pairs.end());

	
	for (i=0; i<num_global_high_inten && i<pairs.size(); i++)
		high_inten_indicators[pairs[i].idx]=true;
		
	if (top_x_masses)
	{
		vector<float> top_masses;

		for (i=0; i<NUM_TOP_CLUSTER_PEAKS && i<pairs.size(); i++)
			top_masses.push_back(peaks[pairs[i].idx].mass);
	
		sort(top_masses.begin(),top_masses.end());
		for (i=0; i<top_masses.size(); i++)
			top_x_masses[i]=top_masses[i];
	}
	
	// collect the selected peak idxs
	top_ranked_peak_idxs.clear();
	for (i=0; i<num_peaks; i++)
		if (high_inten_indicators[i])
			top_ranked_peak_idxs.push_back(i);
}






// functions for dot product statistics















⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -