⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fragmentselection.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 3 页
字号:
	
		const int charge = as.get_charge();
		const int size_idx = as.get_size_idx();
		const vector<Breakage>& breakages = as.get_breakages();
		const mass_t min_mass = as.get_min_peak_mass() ;
		const mass_t max_mass = as.get_max_peak_mass() ;

		vector< vector<int> > frag_ind;
		frag_ind.clear();
		frag_ind.resize(num_regions[size_idx]);
		for (j=0; j<num_regions[size_idx]; j++)
			frag_ind[j].resize(all_fragments.size(),0);

		for (j=0; j<breakages.size(); j++)
		{
			const Breakage& breakage = breakages[j];
		
			int f;
			
			if (very_verbose)
				cout << " " << breakage.fragments.size();
		//	cout << j << " (" << breakage.breakage_region << ")   ";
			for (f=0; f<breakage.fragments.size(); f++)
			{
				if (breakage.fragments[f].mass>0)
				{
				//	cout << all_fragments[breakage.fragments[f].frag_type_idx].frag_label
				//		 << "    ";
					frag_probs[size_idx][breakage.region_idx]
						      [breakage.fragments[f].frag_type_idx]++;

					frag_ind[breakage.region_idx][breakage.fragments[f].frag_type_idx]=1;
				}
			}

			
			for (f=0; f<all_fragments.size(); f++)
			{
				mass_t exp_mass = all_fragments[f].calc_expected_mass(breakage.mass,as.get_true_mass_with_19());
				if (exp_mass >= min_mass  && exp_mass <= max_mass)
					in_range_counts[size_idx][breakage.region_idx][f]++;

			}
		}
		
		if (very_verbose)
			cout << endl;


		int f;
		for (f=0; f<all_fragments.size(); f++)
		{
			int r;
			for (r=0; r<num_regions[size_idx]; r++)
				spec_counts[size_idx][r][f]+= frag_ind[r][f];
		}
	
		avg_rand += (as.get_num_peaks() * config->get_tolerance() *2.0) / (max_mass - min_mass) ;
	}

	const double num_spectra = fs.get_total_spectra();
	avg_rand /= num_spectra;

	int s;
	for (s=0; s<frag_probs.size(); s++)
	{
		int r;
		for (r=0; r<frag_probs[s].size(); r++)
		{
			int f;
			for (f=0; f<frag_probs[s][r].size(); f++)
				if (frag_probs[s][r][f]>0)
				{
				//	cout << frag_probs[c][s][r][f] << "   " << 
				//			in_range_counts[c][s][r][f] << endl;
					frag_probs[s][r][f] /= in_range_counts[s][r][f];
						
				}
		}
	}
}



struct frag_per {
	bool operator< (const frag_per& other) const
	{
		return (percent > other.percent);
	}
	int frag_idx;
	double percent;
	int in_range_count;
};



/********************************************************************
Determines which fragments should belong in a regional fragment set.
These are the actual fragments that get scored when a breakage falls in
that regions.
*********************************************************************/
void select_regional_fragments(const FileManager& fm, Config *config, int charge,
						bool verbose)
{
	vector< vector< vector<double> > > frag_probs, in_range_counts;
	vector< vector< vector<int> > >    spec_counts;
	double avg_rand;
	int    num_files_used;

	collect_probs_of_known_fragments(fm, config, frag_probs, in_range_counts, spec_counts,
		avg_rand, num_files_used, charge, verbose);
	
	const double min_num_in_range = 0.05 * num_files_used;

	int size_idx;
	for (size_idx=0; size_idx < frag_probs.size(); size_idx++)
	{
		int region_idx;
		for (region_idx=0; region_idx<frag_probs[size_idx].size(); region_idx++)
		{
			if (verbose)
			{
				cout << "Charge " << charge << ", Size " << size_idx <<", Region " << 
					region_idx << endl;
				cout << "Random " << fixed << setprecision(4) << avg_rand << endl;
			}

			const vector<int>& frag_type_idxs = config->get_regional_fragment_type_idxs(charge,
				size_idx,region_idx);

			vector<score_t> probs;
			probs.resize(frag_type_idxs.size(),0);
			int f;

			cout << "FRAG_TYPES::: " << frag_type_idxs.size() << endl;
			for (f=0; f<frag_type_idxs.size(); f++)
			{
				const int frag_type_idx = frag_type_idxs[f];
				if (in_range_counts[size_idx][region_idx][frag_type_idx]>=min_num_in_range)
				{
					probs[f] = frag_probs[size_idx][region_idx][frag_type_idx];
				}
			}
			
			config->sort_accoriding_to_fragment_probs(probs,charge,size_idx,region_idx);
			config->set_regional_random_probability(charge,size_idx,region_idx,(float)avg_rand);
		
			if (verbose)
			{
				const vector<score_t>& frag_probs = config->get_regional_fragments(charge,size_idx,region_idx).get_frag_probs();
				
				for (f=0; f<frag_type_idxs.size(); f++)
				{

					cout << setw(12) << left << config->get_fragment(frag_type_idxs[f]).label <<
							setprecision(5) << setw(10) << config->get_fragment(frag_type_idxs[f]).offset << 
							"   " << fixed << setprecision(5) << frag_probs[f] << 
							"   " << setprecision(0) << in_range_counts[size_idx][region_idx][frag_type_idxs[f]] 
							<< " ( " << spec_counts[size_idx][region_idx][frag_type_idxs[f]] <<  ")"<< endl;
				}
				cout << endl;
			}
		}
	}
}


struct combo_pair {
	bool operator< (const combo_pair& other) const
	{
		return count>other.count;
	}

	int count, f_idx1, f_idx2;
};


// chooses for each regional model the pair of fragments that identify
// the largest number of cuts not already found using strong fragments
void select_frag_combos(const FileManager& fm, 
						Config *config, 
						int charge,
						int max_num_combos)
{
	const vector< vector< RegionalFragments> > & regional_fragment_sets = 
														config->get_regional_fragment_sets()[charge];

	const vector<FragmentType>& all_fragments = config->get_all_fragments();
	const int num_all_frags = all_fragments.size();

	if (charge<1)
	{
		cout << "Error: charge must be > 1" << endl;
		exit(1);
	}

	int j;
	for (j=0; j<regional_fragment_sets.size(); j++)
	{
		int k;
		for (k=0; k<regional_fragment_sets[j].size(); k++)
			config->clear_combos(charge,j,k);
	}
	
	

	int c; // combo counter
	for (c=0; c<max_num_combos && c<6; c++)
	{
		FileSet fs;
		fs.select_files(fm,0,10000,-1,1,charge);

		vector< vector< vector< vector< int > > > > counts;
		counts.resize(regional_fragment_sets.size());
		int j;
		for (j=0; j<regional_fragment_sets.size(); j++)
		{
			int k;
			counts[j].resize(regional_fragment_sets[j].size());
			for (k=0; k<regional_fragment_sets[j].size(); k++)
			{
				int f;
				counts[j][k].resize(num_all_frags);
				for (f=0; f<num_all_frags; f++)
					counts[j][k][f].resize(num_all_frags,0);
			}
		}

		cout << endl << "Round " << c+1 << endl << endl;

		// check breakage instances
		while(1)
		{
			int j;
			AnnotatedSpectrum as;
			if (! fs.get_next_spectrum(fm,config,&as))
				break;

			as.annotate_spectrum(as.get_true_mass_with_19());

			const int size_idx = as.get_size_idx();
			const vector<Breakage>& breakages = as.get_breakages();
			const vector<Peak>& peaks = as.get_peaks();
			const vector<int>& strong_peak_idxs = as.get_strong_peak_idxs();
			vector<int> strong_ind;
			strong_ind.resize(peaks.size(),0);
			for (j=0; j<strong_peak_idxs.size(); j++)
				strong_ind[strong_peak_idxs[j]]=1;

			for (j=0; j<breakages.size(); j++)
			{
				const Breakage& breakage = breakages[j];
				if (breakage.num_frags_detected<2)
					continue;

				// check if breakage is found by a strong frag

				const int region_idx = breakage.region_idx;
				const RegionalFragments& rf = regional_fragment_sets[size_idx][region_idx];
				int f;

				for (f=0; f<breakage.fragments.size(); f++)
					if (rf.is_a_strong_frag_type(breakage.fragments[f].frag_type_idx) &&
						strong_ind[breakage.fragments[f].peak_idx])
						break;

				if (f<breakage.fragments.size())
					continue;

				// check if breakage is found by an existing combo
				const vector<FragmentCombo>& combos = rf.get_frag_type_combos();
				bool found_by_combo = false;
				if (combos.size()>0)
				{
					for (f=0; f<combos.size(); f++)
					{
						int i;

						for (i=0; i<combos[f].frag_inten_idxs.size(); i++)
							if (breakage.get_position_of_frag_idx(combos[f].frag_inten_idxs[i])<0)
								break;

						if (i<combos[f].frag_inten_idxs.size())
							continue;

						if (combos[f].frag_no_inten_idxs.size() == 0)
						{
							found_by_combo = true;
							break;
						}

						for (i=0; i<combos[f].frag_no_inten_idxs.size(); i++)
							if (breakage.get_position_of_frag_idx(combos[f].frag_no_inten_idxs[i])>0)
								break;

						if (i<combos[f].frag_inten_idxs.size())
							continue;
						
						found_by_combo = true;
						break;
					}
				}
				if (found_by_combo)
					continue;

				// add to count for every pair
				for (f=0; f<breakage.fragments.size()-1; f++)
				{
					int g;
					for (g=f+1; g<breakage.fragments.size(); g++)
					{
						int f_idx = breakage.fragments[f].frag_type_idx;
						int g_idx = breakage.fragments[g].frag_type_idx;

						if (! rf.is_a_strong_frag_type(f_idx) &&
							! rf.is_a_strong_frag_type(g_idx) )
							continue;

						if (f_idx < g_idx)
						{
							counts[size_idx][region_idx][f_idx][g_idx]++;
						//	cout << size_idx << " " << region_idx << " " << f_idx << " " << g_idx <<
						//		" >> " << counts[size_idx][region_idx][f_idx][g_idx] << endl;
						}
						else
						{
							counts[size_idx][region_idx][g_idx][f_idx]++;
						//	cout << size_idx << " " << region_idx << " " << g_idx << " " << f_idx <<
						//		" >> " << counts[size_idx][region_idx][g_idx][f_idx] << endl;
						}
					}
				}
			}
		}

		for (j=0; j<regional_fragment_sets.size(); j++)
		{
			int k;
			for (k=0; k<regional_fragment_sets[j].size(); k++)
			{
				vector<combo_pair> pairs;
				pairs.clear();
				int f,g;
				for (f=0; f<num_all_frags-1; f++)
					for (g=f+1; g<num_all_frags; g++)
						if ( counts[j][k][f][g]>0)
						{
							combo_pair p;
							p.count = counts[j][k][f][g];
							p.f_idx1 = f;
							p.f_idx2 = g;
							pairs.push_back(p);
						}
				sort(pairs.begin(),pairs.end());

				if (pairs.size() == 0)
					continue;

				const RegionalFragments& rf = regional_fragment_sets[j][k];
				FragmentCombo combo;

				if (rf.is_a_strong_frag_type(pairs[0].f_idx1))
				{
					combo.frag_inten_idxs.push_back(pairs[0].f_idx1);
					combo.frag_inten_idxs.push_back(pairs[0].f_idx2);				
				}
				else
				{
					combo.frag_inten_idxs.push_back(pairs[0].f_idx2);
					combo.frag_inten_idxs.push_back(pairs[0].f_idx1);
				}
				config->get_non_const_regional_fragments(charge,j,k).add_combo(combo);

			/*	int i;
				for (i=0; i<pairs.size(); i++)
					cout << pairs[i].count << "  " << config->get_fragment(pairs[i].f_idx1).label
						<< " " << config->get_fragment(pairs[i].f_idx2).label << endl;
				cout << endl;*/
			}
		}
	}
}





struct f_pair {
	bool operator< (const f_pair& other) const
	{
		return (prob>other.prob);
	}
	int idx;
	score_t prob;
};

void explore_fragment_set(FileManager& fm, Config *config)
{
	FileSet fs;
	vector<FragmentType> all_frags = config->get_all_fragments();
	const int num_frags = all_frags.size();
	const mass_t tolerance = 0.0075;
	vector<double> exp_counts, obs_counts, spec_counts;

	exp_counts.resize(num_frags,0);
	obs_counts.resize(num_frags,0);
	spec_counts.resize(num_frags,0);

	
	fs.select_all_files(fm);

	while (1)
	{
		Spectrum s;

		if (! fs.get_next_spectrum(fm,config,&s))
			break;

		vector<mass_t> break_masses;

		s.get_peptide().calc_expected_breakage_masses(config,break_masses);
		const int num_peaks = s.get_num_peaks();
		vector<int> used_peak_ind;
		vector<int> frag_ind;

		used_peak_ind.resize(num_peaks,0);
		frag_ind.resize(num_frags,0);
		const mass_t min_mass = s.get_min_peak_mass() - tolerance;
		const mass_t max_mass = s.get_max_peak_mass() + tolerance;
		const mass_t true_mass_with_19 = s.get_true_mass_with_19();
		// annotate peaks according to their order
		int i;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -