⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fragmentselection.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 3 页
字号:
		for (i=0; i<all_frags.size(); i++)
		{
			const FragmentType& frag = all_frags[i];

			int j;

			for (j=0; j<break_masses.size(); j++)
			{
				mass_t exp_mass = frag.calc_expected_mass(break_masses[j],true_mass_with_19);
				if (exp_mass>min_mass && exp_mass<max_mass)
				{
					exp_counts[i]++;
					int idx = s.get_max_inten_peak(exp_mass,tolerance);
					if (idx>=0)
					{
						if (! used_peak_ind[idx])
						{
							obs_counts[i]++;
							used_peak_ind[idx]=1;
							frag_ind[i]=1;
						}
					}

				}
			}
		}

		for (i=0; i<all_frags.size(); i++)
			spec_counts[i]+=frag_ind[i];
	}

	vector<f_pair> pairs;
	int f;
	for (f=0; f<num_frags; f++)
	{
		all_frags[f].prob = obs_counts[f] /exp_counts[f];
		if (all_frags[f].prob<0.001)
			continue;

		f_pair p;
		p.idx=f;
		p.prob = all_frags[f].prob;
		pairs.push_back(p);
	}

	sort(pairs.begin(),pairs.end());

	int i;
	for (i=0; i<pairs.size(); i++)
	{
		int f=pairs[i].idx;

		cout << all_frags[f].label << " & " ;
		cout << all_frags[f].charge<< " & " ;
		string term = "C";
		if (all_frags[f].orientation==PREFIX)
			term = "N";
		cout << term << " & ";
		cout << all_frags[f].offset << " &   & ";
		cout << setprecision(0) << obs_counts[f] << "/" << exp_counts[f] << " & " << spec_counts[f] << " & ";
		cout << setprecision(3) << all_frags[f].prob << " \\\\" << endl;
		
	}
}


void show_occurences(FileManager& fm, Config *config, string label)
{
	FileSet fs;
	vector<FragmentType> all_frags = config->get_all_fragments();
	const vector<string>& aa2label = config->get_aa2label();
	const int num_frags = all_frags.size();
	const mass_t tolerance = 0.0075;
	int f_idx = config->get_frag_idx_from_label(label);

	fs.select_all_files(fm);

	while (1)
	{
		Spectrum s;

		if (! fs.get_next_spectrum(fm,config,&s))
			break;

		vector<mass_t> break_masses;

		s.get_peptide().calc_expected_breakage_masses(config,break_masses);
		const int num_peaks = s.get_num_peaks();
		vector<int> used_peak_ind;
		used_peak_ind.resize(num_peaks,0);

		const mass_t true_mass_with_19 = s.get_true_mass_with_19();
		// annotate peaks according to their order
		int i;
		for (i=0; i<all_frags.size(); i++)
		{
			const FragmentType& frag = all_frags[i];

			int j;

			for (j=0; j<break_masses.size(); j++)
			{
				mass_t exp_mass = frag.calc_expected_mass(break_masses[j],true_mass_with_19);
				
				int idx = s.get_max_inten_peak(exp_mass,tolerance);
				if (idx>=0)
				{
					if (! used_peak_ind[idx])
					{
						used_peak_ind[idx]=1;
						
						if (i == f_idx)
						{
							const vector<int>& amino_acids = s.get_peptide().get_amino_acids();
							int k;
							for (k=0; k<j; k++)
								cout << aa2label[amino_acids[k]];

							cout <<" ";
							for ( ; k<amino_acids.size(); k++)
								cout << aa2label[amino_acids[k]];
							cout << endl;
						}
					}
				}
			}
		}
	}
}


void make_frag_rank_histogram(FileManager& fm, Config *config)
{
	FileSet fs;
	vector<FragmentType> all_frags = config->get_all_fragments();
	const mass_t tolerance = 0.45;
	int i,f;

	vector<int> rank_levels;
	rank_levels.push_back(1);
	rank_levels.push_back(2);
	rank_levels.push_back(3);
	rank_levels.push_back(5);
	rank_levels.push_back(7);
	rank_levels.push_back(10);
	rank_levels.push_back(15);
	rank_levels.push_back(20);
	rank_levels.push_back(30);
	rank_levels.push_back(55);
	rank_levels.push_back(99999);

	const int num_levels = rank_levels.size();

	const int num_frags = 6;
	const string labels[num_frags]={"y","b","y-H2O","b-H2O","s2+10.2","b2"};
	//	"b-H2O","y-H2O","y2","b-NH3","a","y2-H2O",
	//	"y2-H2OH2O","b-H2OH2O","y-NH3","y2-H2ONH3",,"b-NH3H2O","a-NH3","a-H2O",
	//	"b-NH3NH3","b2","y-NH3H2O","y-H2OH2O"};

	vector<int> frag_idxs;
	for (f=0; f<num_frags; f++)
		frag_idxs.push_back(config->get_frag_idx_from_label(labels[f]));
	


	vector<double> level_counts;
	vector< vector<double> > frag_level_counts;

	level_counts.resize(num_levels,0);
	frag_level_counts.resize(num_levels);
	for (i=0; i<num_levels; i++)
		frag_level_counts[i].resize(num_frags,0);

	
	
	fs.select_all_files(fm);

	while (1)
	{
		Spectrum s;

		if (! fs.get_next_spectrum(fm,config,&s))
			break;

		vector<mass_t> break_masses;

		s.get_peptide().calc_expected_breakage_masses(config,break_masses);
		const int num_peaks = s.get_num_peaks();
		vector<int> used_peak_ind;

		used_peak_ind.resize(num_peaks,0);
	
		const mass_t min_mass = s.get_min_peak_mass() - tolerance;
		const mass_t max_mass = s.get_max_peak_mass() + tolerance;
		const mass_t true_mass_with_19 = s.get_true_mass_with_19();
		// annotate peaks according to their order
		int i;
		for (i=0; i<frag_idxs.size(); i++)
		{
			const int frag_idx = frag_idxs[i];
			const FragmentType& frag = all_frags[frag_idx];

			int j;

			for (j=0; j<break_masses.size(); j++)
			{
				mass_t exp_mass = frag.calc_expected_mass(break_masses[j],true_mass_with_19);
				if (exp_mass>min_mass && exp_mass<max_mass)
				{
					int idx = s.get_max_inten_peak(exp_mass,tolerance);
					if (idx>=0)
					{
						if (! used_peak_ind[idx])
						{
							int rank =s.get_peak(idx).rank;
							int l;
							for (l=0; l<num_levels; l++)
								if (rank<=rank_levels[l])
									break;

							frag_level_counts[l][i]++;		
						}
					}

				}
			}
		}

		// add counts for all peaks
		for (i=0; i<s.get_num_peaks(); i++)
		{
			int rank =s.get_peak(i).rank;
			int l;
			for (l=0; l<num_levels; l++)
				if (rank<=rank_levels[l])
					break;
			level_counts[l]++;
		}

	}


	for (i=0; i<num_frags; i++)
	{
		const int frag_idx = frag_idxs[i];
		const FragmentType& frag = all_frags[frag_idx];

		cout << setw(15) << left << frag.label ;
		int l;

		for (l=0; l<num_levels; l++)
		{
	//		cout << " & " << setw(7) << setprecision(3) << frag_level_counts[l][i] / level_counts[l];
			cout << "\t" << setw(7) << setprecision(3) << frag_level_counts[l][i] / level_counts[l];

		}
	
		cout << endl;		
	}

//	cout << setw(15) << left << "Unexplained";
	cout << setw(15) << left << "Other";
	int l;
	for (l=0; l<num_levels; l++)
	{
		int f;
		double exp_sum=0;
		for (f=0; f<num_frags; f++)
			exp_sum += frag_level_counts[l][f];

		double exp_prob = exp_sum/level_counts[l];
		//cout << " & "<< setw(7) << setprecision(3) << 1.0 - exp_prob ;
		cout << "\t"<< setw(7) << setprecision(3) << 1.0 - exp_prob ;
	}
	cout  << endl;
}



// calculates the average random probability according to the offset frequency function
double calc_avg_rand(FileManager& fm, Config *config)
{
	FileSet fs;
	vector<FragmentType> all_frags = config->get_all_fragments();
	const mass_t tolerance = 0.0075;
	int i;
	vector<double> bins;
	double count=0;

	bins.resize(201,0);
	
	fs.select_all_files(fm);

	while (1)
	{
		Spectrum s;

		if (! fs.get_next_spectrum(fm,config,&s))
			break;

		vector<mass_t> break_masses;

		s.get_peptide().calc_expected_breakage_masses(config,break_masses);
		const int num_peaks = s.get_num_peaks();
		vector<int> used_peak_ind;

		used_peak_ind.resize(num_peaks,0);
	
		const mass_t min_mass = s.get_min_peak_mass() - tolerance;
		const mass_t max_mass = s.get_max_peak_mass() + tolerance;
		const mass_t true_mass_with_19 = s.get_true_mass_with_19();
		// annotate peaks according to their order
		int i;
		for (i=0; i<all_frags.size() && i<10; i++)
		{
			const FragmentType& frag = all_frags[i];

			int j;

			for (j=0; j<break_masses.size(); j++)
			{
				mass_t exp_mass = frag.calc_expected_mass(break_masses[j],true_mass_with_19);
				if (exp_mass>min_mass && exp_mass<max_mass)
				{
					int idx = s.get_max_inten_peak(exp_mass,tolerance);
					if (idx>=0)
						used_peak_ind[idx]=1;
				}
			}
		}

		int j;
		for (j=0; j<break_masses.size(); j++)
		{
			mass_t exp_mass = break_masses[j];

			if (exp_mass>min_mass && exp_mass<max_mass)
			{
				int k;
				for (k=0; k<s.get_num_peaks(); k++)
				{
					mass_t p_mass = s.get_peak(i).mass;
					int b_idx = (int)(exp_mass + 100 - p_mass);
					if (b_idx>=0 && b_idx<=200)
						bins[b_idx]++;
				}
				count++;
			}
		}
	}

	double avg = 0;
	for (i=0; i<bins.size(); i++)
		avg+=bins[i];

	avg/=201;
	avg/=count;

	avg*= config->get_tolerance()*2.0;

	cout << "AVG: " << setprecision(5) << avg << endl;

	return avg;
}


void make_bin_histograms(FileManager& fm, Config *config)
{
	FileSet fs;
	vector<FragmentType> all_frags = config->get_all_fragments();
	const mass_t mass_inc = 0.001;
	const mass_t max_off = 20*mass_inc;

	int i;
	vector<double> bins;

	bins.resize(41,0);
	
	fs.select_all_files(fm);

	while (1)
	{
		Spectrum s;

		if (! fs.get_next_spectrum(fm,config,&s))
			break;

		vector<mass_t> break_masses;

		s.get_peptide().calc_expected_breakage_masses(config,break_masses);
	
		const mass_t min_mass = s.get_min_peak_mass() - 0.1;
		const mass_t max_mass = s.get_max_peak_mass() + 0.1;
		const mass_t true_mass_with_19 = s.get_true_mass_with_19();
		// annotate peaks according to their order
		int i;
		for (i=0; i<all_frags.size() && i<2; i++)
		{
			const FragmentType& frag = all_frags[i];

			int j;

			for (j=0; j<break_masses.size(); j++)
			{
				mass_t exp_mass = frag.calc_expected_mass(break_masses[j],true_mass_with_19);
				if (exp_mass>min_mass && exp_mass<max_mass)
				{
					int idx = s.get_max_inten_peak(exp_mass,max_off);
					if (idx>=0)
					{
						int bin = (int)((exp_mass - s.get_peak(idx).mass)/mass_inc + 20);
						if (bin>=0 && bin<=40)
							bins[bin]++;
					}
				}
			}
		}
	}

	double count = 0;
	for (i=0; i<bins.size(); i++)
		count+=bins[i];

	for (i=0; i<bins.size(); i++)
		cout << setprecision(3) << i*mass_inc - max_off << "  " << bins[i]/count << endl;


}



⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -