⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 prmgraph.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 5 页
字号:
#include "PrmGraph.h"
#include "AnnotatedSpectrum.h"
#include "auxfun.h"


bool comp_SeqPath_sort_key(const SeqPath& a, const SeqPath& b)
{
	return (a.sort_key>b.sort_key);
}

bool comp_SeqPath_path_score (const SeqPath& a, const SeqPath& b)
{
	return (a.path_score>b.path_score);
}



/********************************************************
*********************************************************/
void PrmGraph::create_graph_from_spectrum(Model *_model, Spectrum *spectrum,
							mass_t _pm_with_19, int spec_charge, 
							bool add_all_pepitde_nodes, bool only_basic_score)
{
	if (nodes.size()>0)
		this->clear();

	model  = _model;
	config = model->get_config();
	source_spectrum = spectrum;
	pm_with_19 = _pm_with_19;
	charge = spec_charge;

	has_node_combo_scores=false;

	if (charge==0)
		charge = source_spectrum->get_charge();

	int org_size_idx = spectrum->get_size_idx();
	size_idx = config->calc_size_idx(charge,pm_with_19);

	spectrum->set_size_idx(size_idx);

	digest_node_score = config->get_digest_score();
	model->init_model_for_scoring_spectrum(source_spectrum);

//	config->print_session_aas();

	if (pm_with_19<10)
	{
		cout << "Error: supplied negative/low PM for graph!" << endl;
		exit(1);
	}

	create_nodes();
	add_digest_nodes();
	score_nodes(model);

	set_significant_masses();

	fill_single_multi_edges();
	fill_double_multi_edges();

	int l;
	for (l=3; l<=config->get_max_edge_length(); l++)
		fill_longer_multi_edges(l);

	if (! only_basic_score)
		model->initial_combos_score(this);

//	model->score_all_node_combos(this);
//	source_spectrum->print_expected_by();
//	this->print_with_combo_tables();
//	print();
//	exit(0);

	rank_nodes_according_to_score();

	set_idxs_max_in_out_for_nodes();

	// restore size idx
	spectrum->set_size_idx(org_size_idx);

}

/***********************************************************************
Creates a graph tailored for a peptide.
Contains only nodes and edges that correspond to that peptide.
************************************************************************/
void PrmGraph::create_graph_for_peptide_and_spectrum(Model *_model, Spectrum *spectrum, 
					mass_t _pm_with_19, int spec_charge, const Peptide& peptide)
{
	if (nodes.size()>0)
		this->clear();

	model  = _model;
	config = model->get_config();
	source_spectrum = spectrum;
	pm_with_19 = _pm_with_19;
	charge = spec_charge;

	has_node_combo_scores=false;

	if (charge==0)
		charge = source_spectrum->get_charge();

	int org_size_idx = spectrum->get_size_idx();
	size_idx = config->calc_size_idx(charge,pm_with_19);

	spectrum->set_size_idx(size_idx);

	digest_node_score = config->get_digest_score();
	model->init_model_for_scoring_spectrum(source_spectrum);

	if (pm_with_19<10)
	{
		cout << "Error: supplied negative/low PM for graph!" << endl;
		exit(1);
	}

	create_nodes_for_peptide(peptide);

	const vector<int>& pep_aas = peptide.get_amino_acids();
	mass_t n_digest_mass = NEG_INF;
	mass_t c_digest_mass = NEG_INF;
	
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	const vector<int>& n_digest_aas = config->get_n_term_digest_aas();
	if (n_digest_aas.size()>0)
	{
		int j;
		for (j=0; j<n_digest_aas.size(); j++)
			if (pep_aas[0] == n_digest_aas[j])
				break;
		if (j<n_digest_aas.size())
			n_digest_mass = aa2mass[n_digest_aas[j]];
	}

	const vector<int>& c_digest_aas = config->get_c_term_digest_aas();
	if (c_digest_aas.size()>0)
	{
		int j;
		for (j=0; j<c_digest_aas.size(); j++)
			if (pep_aas[pep_aas.size()-1] == c_digest_aas[j])
				break;
		if (j<c_digest_aas.size())
			c_digest_mass = peptide.get_mass() - aa2mass[c_digest_aas[j]];
	}

	if (n_digest_mass>0 || c_digest_mass>0)
		add_digest_nodes(n_digest_mass,c_digest_mass);

	score_nodes(model);

	set_significant_masses();

	fill_single_multi_edges();
	fill_double_multi_edges();

	int l;
	for (l=3; l<=config->get_max_edge_length(); l++)
		fill_longer_multi_edges(l);

	model->score_peptide_node_combos(this,peptide);

	rank_nodes_according_to_score();

	set_idxs_max_in_out_for_nodes();

	// restore size idx
	spectrum->set_size_idx(org_size_idx);

}


/***********************************************************************
Finds the optimal value for pm_with_19
Chooses the one for which there is a maximal score for the top 5-10 nodes
************************************************************************/
mass_t PrmGraph::find_optimal_pm_with_19_for_graph(Model *_model, 
							Spectrum *spectrum, mass_t base_pm_with_19,
							int spec_charge)
{
	const mass_t margin    = 1.5;
	const mass_t increment = 0.1;


	model  = _model;
	config = model->get_config();
	source_spectrum = spectrum;


	charge = spec_charge;

	int org_size_idx = spectrum->get_size_idx();
	size_idx = config->calc_size_idx(charge,base_pm_with_19);
	spectrum->set_size_idx(size_idx);
	

	digest_node_score = config->get_digest_score();
	model->init_model_for_scoring_spectrum(source_spectrum);

	const mass_t node_tol = config->get_tolerance()*1.25;

	mass_t best_mass_with_19=-1;
	score_t best_score=NEG_INF;

	int num_top_nodes = 5;
	num_top_nodes+= (int)((base_pm_with_19 - 800.0)/400.0);
	if (num_top_nodes>8)
		num_top_nodes=8;

	vector<score_t> mass_scores;
	vector<mass_t > masses_with_19;
	mass_scores.clear();
	masses_with_19.clear();

	for (pm_with_19 = base_pm_with_19 - margin; 
		 pm_with_19<= base_pm_with_19+ margin; 
		 pm_with_19 += increment)
	{

		create_nodes();
		add_digest_nodes();
		score_nodes(model);

		const int num_nodes = nodes.size();

		static bool *forbidden_node_map = NULL;
		static int   forbidden_map_size = 0;
		if (forbidden_map_size ==0 || forbidden_map_size<num_nodes)
		{
			forbidden_map_size = 400;
			if (forbidden_map_size<num_nodes)
				forbidden_map_size = num_nodes * 2;

			forbidden_node_map = new bool [ forbidden_map_size];
			if (! forbidden_node_map)
			{
				cout << "Error: couldn't allocate mem for bool node map!" << endl;
				exit(1);
			}
		}

		memset(forbidden_node_map,0,num_nodes*sizeof(bool));

		score_t node_set_score =0;
		int i;
		for (i=0; i<num_top_nodes; i++)
		{
			int top_node_idx = -1;
			score_t top_node_score=NEG_INF;

			const int max_node_idx= num_nodes - 3;
			int j;
			for (j=1; j<max_node_idx; j++)
				if (! forbidden_node_map[j] && nodes[j].score>top_node_score)
				{
					top_node_score = nodes[j].score;
					top_node_idx = j;
				}

			// mark the node and all masses in +- 25 Da range (including the mirror in a
			// +- 10 Da range
			if (top_node_idx>=0)
			{
				const mass_t min_mass = nodes[top_node_idx].mass - 25.0;
				const mass_t max_mass = nodes[top_node_idx].mass + 25.0;

				forbidden_node_map[top_node_idx]=true;

				int k=top_node_idx;
				while (k>=0 && nodes[k].mass>min_mass)
					forbidden_node_map[k--]=true;
				
				k=top_node_idx;
				while (k<num_nodes && nodes[k].mass<max_mass)
					forbidden_node_map[k++]=true;

				const mass_t mirror_node_mass = pm_with_19 - MASS_OHHH - nodes[top_node_idx].mass;
				const mass_t min_mirror_mass = mirror_node_mass - 10.0;
				const mass_t max_mirror_mass = mirror_node_mass + 10.0;

				k=0;
				while (k<num_nodes && nodes[k].mass<min_mirror_mass)
					k++;

				while (k<num_nodes && nodes[k].mass<max_mirror_mass)
					forbidden_node_map[k++]=true;

				// add to score
				node_set_score += nodes[top_node_idx].score;

				// check if this looks like part of a misalligned pair (i.e. there is a
				// very close node that also has a high score and a different source
				// if so give penaly (count only half of the score)

				if (top_node_idx>0 && 
					nodes[top_node_idx-1].mass + node_tol > nodes[top_node_idx].mass &&
					nodes[top_node_idx-1].source_frag_type_idx != nodes[top_node_idx].source_frag_type_idx)
				{
					node_set_score -= nodes[top_node_idx].score * 0.5;
				}

				if (top_node_idx<num_nodes-1 && 
					nodes[top_node_idx+1].mass - node_tol < nodes[top_node_idx].mass &&
					nodes[top_node_idx+1].source_frag_type_idx != nodes[top_node_idx].source_frag_type_idx)
				{
					node_set_score -= nodes[top_node_idx].score * 0.5;
				}

			//	cout << "  " << top_node_idx << " " << nodes[top_node_idx].score << endl;
			}
		}

		mass_scores.push_back(node_set_score);
		masses_with_19.push_back(pm_with_19);

	//	cout << setprecision(3) << pm_with_19 << "\t" << node_set_score << endl;
		if (node_set_score>best_score)
		{
			best_score = node_set_score;
			best_mass_with_19 = pm_with_19;
		}


	}

	// restore size idx
	spectrum->set_size_idx(org_size_idx);

	int i;
	vector<int> good_mass_idxs;
	good_mass_idxs.clear();
	for (i=0; i<mass_scores.size(); i++)
	{
		if (mass_scores[i]==best_score)
			good_mass_idxs.push_back(i);
	}


	if (good_mass_idxs.size()==0) // how would this happen?
		return best_mass_with_19;

	int idx = good_mass_idxs[good_mass_idxs.size()/2];

//	cout << " >>> " << masses_with_19[idx] << endl;

	return masses_with_19[idx];
}



/*********************************************************************
Initializes the index array.
For each rounded off Dalton m, it gives the index of the closest peak i
with mass m_i>= m.
**********************************************************************/
void PrmGraph::init_index_array()
{
	int i,c,size=(int)pm_with_19+2;
	const int max_node_idx = nodes.size()-1;
	
	index_array.clear();
	index_array.resize(size,-1);
	
	i=0;
	int m=(int)nodes[0].mass;
	while (i<m)
		index_array[i++]=0;

	c=0;
	while (c< max_node_idx)
	{
		int curr_m=(int)nodes[c].mass;
		int next_m = curr_m;
		int next_c = c;

		while (next_m == curr_m && next_c<max_node_idx)
			next_m=(int)nodes[++next_c].mass;

		while (i<next_m)
			index_array[i++]=c;
		
		c=next_c;
	}

	while (i<size)
		index_array[i++]=max_node_idx;
}



/***********************************************************
Merges nodes that are close to each other
Gives preference to the prefix fragment, but merge genereally
goes according to the intensities of the source fragments
************************************************************/
void PrmGraph::merge_close_nodes()
{
	int i;
	const vector<FragmentType>& all_fragments = config->get_all_fragments();

	mass_t delta = config->get_tolerance();
	if (delta>0.2)
		delta *= 0.8;

	for (i=0; i<nodes.size()-1; i++)
	{
		if (nodes[i+1].mass - nodes[i].mass < delta &&
			nodes[i+1].source_frag_type_idx != nodes[i].source_frag_type_idx)
		{
			const int frag1_pos = nodes[i].breakage.get_position_of_frag_idx(nodes[i].source_frag_type_idx);
			const int frag2_pos = nodes[i+1].breakage.get_position_of_frag_idx(nodes[i+1].source_frag_type_idx);

			if (frag1_pos <0)
			{
				nodes[i].mass=999999;
				continue;
			}

			if (frag2_pos<0)
			{
				nodes[i+1].mass=9999999;
				continue;
			}
			if (frag1_pos <0 || frag2_pos<0)
			{
				Node& node1 = nodes[i];
				Node& node2 = nodes[i+1];


				cout << i << "\t";
				node1.breakage.print_fragments(config);
				cout << endl;
				cout << i+1 << "\t";
				node2.breakage.print_fragments(config);
				cout << endl;
				cout << "Error: source fragments not found in breakage!: " <<
					config->get_fragment(node1.source_frag_type_idx).label << "  " << 
					config->get_fragment(node2.source_frag_type_idx).label << endl;

				print();
				exit(1);
			}
			mass_t new_node_mass;
			intensity_t inten1 = nodes[i].breakage.fragments[frag1_pos].intensity;
			intensity_t inten2 = nodes[i+1].breakage.fragments[frag2_pos].intensity;
			if (all_fragments[nodes[i].source_frag_type_idx].orientation == PREFIX)
			{
				inten1 *= 2;
			}
			else if (all_fragments[nodes[i+1].source_frag_type_idx].orientation == PREFIX)
			{
				inten2 *= 2;
			}

			mass_t mass_times_inten1 = nodes[i].mass * inten1;
			mass_t mass_times_inten2 = nodes[i+1].mass * inten2;

			new_node_mass = (mass_times_inten1 + mass_times_inten2)/ (inten1 + inten2);

			// create new node, move it to the i+1 position
			nodes[i].mass = 99999999;
			nodes[i+1].mass = new_node_mass;

			// transfer fragments from node i to i+1 if they are not there
			int f;
			for (f=0; f<nodes[i].breakage.fragments.size(); f++)
			{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -