⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pmc_rank.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 4 页
字号:

		if (back_idx>=0 && peaks[forward_idx].mass + peaks[back_idx].mass > min2)
		{
			if (! (strict_iso_inds[back_idx] || strict_iso_inds[forward_idx]))
				continue;

			const mass_t offset = fabs(peaks[forward_idx].mass + peaks[back_idx].mass - single_charge_pair_sum);
			const float inten_sum = peaks[forward_idx].intensity + peaks[back_idx].intensity;
					
			pairs2.push_back(offset_pair(offset,inten_sum));
		}
	}


	c2_pairs0.clear();
	forward_idx = -1;
	back_idx = num_peaks-1;
	while (forward_idx<back_idx)
	{
		forward_idx++;
		if (strict_iso_inds[forward_idx])
			continue;
			
		mass_t sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
		while (back_idx>=0 && sum>max_double_sum)
		{
			back_idx--;
			if (back_idx<0)
				break;
			sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
		}

		if (back_idx>=0 && sum > min_double_sum)
		{
			if (strict_iso_inds[back_idx])
				continue;

			const mass_t offset = fabs(sum - double_charge_pair_sum);
			const float inten_sum = peaks[forward_idx].intensity + peaks[back_idx].intensity;
			
			c2_pairs0.push_back(offset_pair(offset,inten_sum));
		}
	}

	c2_pairs1.clear();
	const mass_t maxc21 = max_double_sum + 1.0;
	const mass_t minc21 = min_double_sum + 1.0;
	forward_idx = -1;
	back_idx = num_peaks-1;
	while (forward_idx<back_idx)
	{
		forward_idx++;
	
		mass_t sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
		while (back_idx>=0 && sum>maxc21)
		{
			back_idx--;
			if (back_idx<0)
				break;
			sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
		}

		if (back_idx>=0 && sum > minc21)
		{
			if (! (strict_iso_inds[back_idx] || strict_iso_inds[forward_idx]) )
				continue;

			const mass_t offset = fabs(sum - double_charge_pair_sum);
			const float inten_sum = peaks[forward_idx].intensity + peaks[back_idx].intensity;
			
			c2_pairs1.push_back(offset_pair(offset,inten_sum));
		}
	}


	c2_pairs2.clear();
	const mass_t maxc22 = max_double_sum + 2.0;
	const mass_t minc22 = min_double_sum + 2.0;
	forward_idx = -1;
	back_idx = num_peaks-1;
	while (forward_idx<back_idx)
	{
		forward_idx++;
	
		mass_t sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
		while (back_idx>=0 && sum>maxc22)
		{
			back_idx--;
			if (back_idx<0)
				break;
			sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
		}

		if (back_idx>=0 && sum > minc22)
		{
			if (! (strict_iso_inds[back_idx] || strict_iso_inds[forward_idx]) )
				continue;

			const mass_t offset = fabs(sum - double_charge_pair_sum);
			const float inten_sum = peaks[forward_idx].intensity + peaks[back_idx].intensity;
			
			c2_pairs2.push_back(offset_pair(offset,inten_sum));
		}
	}

	// use the first 4 peaks
	stats.inten_strict_pairs0=0;
	stats.num_strict_pairs0 = pairs0.size();
	sort(pairs0.begin(),pairs0.end(),cmp_offset_pair_inten);
	for (i=0; i<4 && i<pairs0.size(); i++)
	{
//		stats.offset_strict_pairs0.push_back(pairs0[i].offset);
		stats.inten_strict_pairs0+=pairs0[i].inten_sum;
	}


	stats.inten_strict_pairs1=0;
	stats.num_strict_pairs1 = pairs1.size();
	sort(pairs1.begin(),pairs1.end(),cmp_offset_pair_inten);
	for (i=0; i<4 && i<pairs1.size(); i++)
	{
//		stats.offset_strict_pairs1.push_back(pairs1[i].offset);
		stats.inten_strict_pairs1+=pairs1[i].inten_sum;
	}

	stats.inten_strict_pairs2=0;
	stats.num_strict_pairs2 = pairs2.size();
	sort(pairs2.begin(),pairs2.end(),cmp_offset_pair_inten);
	for (i=0; i<4 && i<pairs2.size(); i++)
	{
//		stats.offset_strict_pairs2.push_back(pairs2[i].offset);
		stats.inten_strict_pairs2+=pairs2[i].inten_sum;
	}
	
	stats.c2_inten_strict_pairs0=0;
	stats.c2_num_strict_pairs0 = c2_pairs0.size();
	sort(c2_pairs0.begin(),c2_pairs0.end(),cmp_offset_pair_inten);
	for (i=0; i<4 && i<c2_pairs0.size(); i++)
	{
//		stats.c2_offset_strict_pairs0.push_back(c2_pairs0[i].offset);
		stats.c2_inten_strict_pairs0+=c2_pairs0[i].inten_sum;
	}
	
	stats.c2_inten_strict_pairs1=0;
	stats.c2_num_strict_pairs1 = c2_pairs1.size();
	sort(c2_pairs1.begin(),c2_pairs1.end(),cmp_offset_pair_inten);
	for (i=0; i<4 && i<c2_pairs1.size(); i++)
	{
//		stats.c2_offset_strict_pairs1.push_back(c2_pairs1[i].offset);
		stats.c2_inten_strict_pairs1+=c2_pairs1[i].inten_sum;
	}
	
	stats.c2_inten_strict_pairs2=0;
	stats.c2_num_strict_pairs2 = c2_pairs2.size();
	sort(c2_pairs2.begin(), c2_pairs2.end(),cmp_offset_pair_inten);
	for (i=0; i<4 && i<c2_pairs2.size(); i++)
	{
//		stats.c2_offset_strict_pairs2.push_back(c2_pairs2[i].offset);
		stats.c2_inten_strict_pairs2+=c2_pairs2[i].inten_sum;
	}

}



void fill_rank_PMC_stats(int charge,
						  const mass_t single_charge_pair_sum, // the sum of b+y or c+z
						  mass_t minus_range, 
						  mass_t plus_range,
						  mass_t increment,
						  Config *config,
						  const BasicSpectrum& bs,
						  const vector<bool>& strong_inds,
						  const vector<float>& iso_levels,
						  const vector<bool>& iso_inds,
						  vector<PMCRankStats>& pmc_stats_vec)
{

	const mass_t tolerance = config->get_tolerance()*0.55;
	const int num_bins_per_Da = (int)(1.0/increment);
	const int num_bins = (int)((plus_range-minus_range)*num_bins_per_Da)+1;
	const mass_t one_over_charge = 1.0/(mass_t)charge;


	if (pmc_stats_vec.size() != num_bins)
		pmc_stats_vec.resize(num_bins);

	mass_t delta=minus_range;
	int i;
	for (i=0; i<num_bins; i++)
	{	
		calc_pmc_rank_stats_for_mass(bs.peaks,bs.num_peaks,single_charge_pair_sum+delta,
			tolerance, iso_levels, strong_inds, iso_inds, pmc_stats_vec[i]);

		pmc_stats_vec[i].m_over_z = (single_charge_pair_sum+delta+charge-2.0005)/charge;
		delta+=increment;
	}
}




void PMCRankStats::clear()
{
	m_over_z=0;

	rank_score = NEG_INF;

	num_frag_pairs=0;
	num_strong_frag_pairs=0;
	num_c2_frag_pairs=0;
	num_strong_c2_frag_pairs=0;
	num_h2o_loss_frag_pairs=0;

	inten_frag_pairs=0;
	inten_strong_pairs=0;
	inten_c2_pairs=0;
	inten_c2_strong_pairs=0;
	inten_h2o_loss_frag_pairs=0;
	itnen_h2o_loss_c2_frag_pairs=0;

	mean_offset_pairs=0;
	mean_offset_strong_pairs=0;
	mean_offset_c2_pairs=0;
	mean_offset_c2_strong_pairs=0;
	mean_offset_h2o_pairs=0;
	mean_offset_c2_h2o_pairs=0;

	ind_pairs_with_min_tol=false;			 
	ind_strong_pairs_with_min_tol=false;
	ind_c2_pairs_with_min_tol=false;
	ind_c2_strong_pairs_with_min_tol=false;
	log_dis_from_pairs_min_tol=0;			 
	log_dis_from_strong_pairs_min_tol=0;
	log_dis_from_c2_pairs_min_tol=0;		 
	log_dis_from_c2_strong_pairs_min_tol=0;

	offset_pairs_ordered_by_inten.clear();
	strong_offset_pairs_ordered_by_inten.clear();
	c2_offset_pairs_ordered_by_inten.clear();


	num_strict_pairs0=0; inten_strict_pairs0=0;
	num_strict_pairs1=0; inten_strict_pairs1=0;
	num_strict_pairs2=0; inten_strict_pairs2=0;
}




/**************************************************************************

  Fills in the RankBoost feature data
***************************************************************************/
void PMCSQS_Scorer::fill_RankBoost_smaples_with_PMC(
									const BasicSpectrum& bs,
									int charge,
									vector<RankBoostSample>& samples) const
{

	const int num_samples = curr_spec_rank_pmc_tables[charge].size();
	const int idx_skip = int((1.0/bin_increment)+0.00001);
	vector<int> idx_offsets;
	int i;

	idx_offsets.clear();
	idx_offsets.push_back(-2*idx_skip);
	idx_offsets.push_back(-1*idx_skip);
	idx_offsets.push_back(idx_skip);
	idx_offsets.push_back(2*idx_skip);

	if (samples.size() != num_samples)
		samples.resize(num_samples);

	for (i=0; i<num_samples; i++)
	{
		const PMCRankStats& stats = curr_spec_rank_pmc_tables[charge][i];
		RankBoostSample& sam = samples[i];

		const float inten_norm = 1.0/(curr_spec_total_intensity+1.0);
		int r_idx=0;
		const mass_t mz_offset = (stats.m_over_z - bs.ssf->m_over_z);

		sam.clear();
		sam.add_real_feature(r_idx++,mz_offset);

		if (stats.num_frag_pairs<=2)
		{
			sam.add_real_feature(r_idx,mz_offset);
		}
		else if (stats.num_frag_pairs<4)
		{
			sam.add_real_feature(r_idx+1,mz_offset);
		}
		else
			sam.add_real_feature(r_idx+2,mz_offset);

		r_idx+=3;

		if (stats.num_strong_frag_pairs<3)
		{
			sam.add_real_feature(r_idx,mz_offset);
		}
		else
			sam.add_real_feature(r_idx+1,mz_offset);

		r_idx+=2;

		if (stats.num_c2_frag_pairs<=2)
		{
			sam.add_real_feature(r_idx,mz_offset);
		}
		else if (stats.num_c2_frag_pairs<4)
		{
			sam.add_real_feature(r_idx+1,mz_offset);
		}
		else
			sam.add_real_feature(r_idx+2,mz_offset);

		r_idx+=3;

		if (stats.num_strong_c2_frag_pairs<3)
		{
			sam.add_real_feature(r_idx,mz_offset);
		}
		else
			sam.add_real_feature(r_idx+1,mz_offset);

		r_idx+=2;

			
	/*	names.push_back("OFFSET FROM MEASURED M/Z, NUM PAIRS <=2");
		names.push_back("OFFSET FROM MEASURED M/Z, NUM PAIRS <=5");
		names.push_back("OFFSET FROM MEASURED M/Z, NUM PAIRS >5");
		names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG PAIRS <4");
		names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG PAIRS >4");

		names.push_back("OFFSET FROM MEASURED M/Z, NUM C2 PAIRS <=2");
		names.push_back("OFFSET FROM MEASURED M/Z, NUM C2 PAIRS <=5");
		names.push_back("OFFSET FROM MEASURED M/Z, NUM C2 PAIRS >5");
		names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG C2 PAIRS <4");
		names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG C2 PAIRS >4");*/

		sam.add_real_feature(r_idx++,stats.num_frag_pairs);
		sam.add_real_feature(r_idx++,stats.num_strong_frag_pairs);
		sam.add_real_feature(r_idx++,stats.num_c2_frag_pairs);
		sam.add_real_feature(r_idx++,stats.num_strong_c2_frag_pairs);
		sam.add_real_feature(r_idx++,stats.num_h2o_loss_frag_pairs);
		sam.add_real_feature(r_idx++,stats.num_h2o_loss_c2_frag_pairs);

		sam.add_real_feature(r_idx++,stats.inten_frag_pairs * inten_norm);
		sam.add_real_feature(r_idx++,stats.inten_strong_pairs * inten_norm);
		sam.add_real_feature(r_idx++,stats.inten_c2_pairs * inten_norm);
		sam.add_real_feature(r_idx++,stats.inten_c2_strong_pairs * inten_norm);
		sam.add_real_feature(r_idx++,stats.inten_h2o_loss_frag_pairs * inten_norm);
		sam.add_real_feature(r_idx++,stats.itnen_h2o_loss_c2_frag_pairs * inten_norm);

		// averages of top k offsets

		float avg=0;
		int j;
		for (j =0; j<7 && j<stats.offset_pairs_ordered_by_inten.size(); j++)
		{
			avg += fabs(stats.offset_pairs_ordered_by_inten[j]);
			if (j>=2)
				sam.add_real_feature(r_idx+j-2,avg/(float)j);
		}
		r_idx+=5;

		avg=0;
		for (j =0; j<7 && j<stats.c2_offset_pairs_ordered_by_inten.size(); j++)
		{
			avg += fabs(stats.c2_offset_pairs_ordered_by_inten[j]);
			if (j>=2)
				sam.add_real_feature(r_idx+j-2,avg/(float)j);
		}
		r_idx+=5;


		// offset data
	
		if (stats.mean_offset_pairs<POS_INF)
		{
			sam.add_real_feature(r_idx++,stats.mean_offset_pairs);
			sam.add_real_feature(r_idx++,stats.mean_offset_pairs/(1.0+stats.num_frag_pairs));
		}
		else
			r_idx+=2;

		if (stats.mean_offset_strong_pairs<POS_INF)
		{
			sam.add_real_feature(r_idx++,stats.mean_offset_strong_pairs);
			sam.add_real_feature(r_idx++,stats.mean_offset_strong_pairs/(1.0+stats.num_strong_frag_pairs));
		}
		else
			r_idx+=2;

		if (stats.mean_offset_c2_pairs<POS_INF)
		{
			sam.add_real_feature(r_idx++,stats.mean_offset_c2_pairs);
			sam.add_real_feature(r_idx++,stats.mean_offset_c2_pairs/(1.0+stats.num_c2_frag_pairs));
		}
		else
			r_idx+=2;

		if (stats.mean_offset_c2_strong_pairs<POS_INF)
		{
			sam.add_real_feature(r_idx++,stats.mean_offset_c2_strong_pairs);
			sam.add_real_feature(r_idx++,stats.mean_offset_c2_strong_pairs/(1.0+stats.num_strong_c2_frag_pairs));
		}
		else
			r_idx+=2;

		if (stats.mean_offset_h2o_pairs<POS_INF)
		{
			sam.add_real_feature(r_idx++,stats.mean_offset_h2o_pairs);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -