⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 prmgraph.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 5 页
字号:
				if (nodes[i+1].breakage.get_position_of_frag_idx(
					 nodes[i].breakage.fragments[f].frag_type_idx) < 0)
				{
//					cout << "Adding fragments! " << endl;
//					nodes[i].breakage.print();
//					nodes[i+1].breakage.print();
//					cout<<endl;

					nodes[i+1].breakage.add_fragment(nodes[i].breakage.fragments[f]);
					
				}
			}
		}
	}
	
	sort(nodes.begin(),nodes.end());
	while (nodes.size()>0 && nodes[nodes.size()-1].mass > 50000)
		nodes.pop_back();

	init_index_array();

}



struct frag_region_list {
	int frag_type_idx;
	vector<int> region_idxs;
};


/*******************************************************
Selectes nodes for PrmGraph.
Selection done in two stages. First every strong peak is
considered, then combos are considered.
********************************************************/
void PrmGraph::create_nodes()
{
	const int num_regions = config->get_num_regions(charge,size_idx);
	const vector<FragmentType>& all_fragments = config->get_all_fragments();
	const vector<Peak>& peaks = source_spectrum->get_peaks();
	const vector<int>& strong_peak_idxs = source_spectrum->get_strong_peak_idxs();
	const mass_t max_mass_to_create_node = pm_with_19 - 55;
	const mass_t mid_mass = pm_with_19 * 0.5;
	const mass_t min_peak_mass = source_spectrum->get_min_peak_mass();
	const mass_t max_peak_mass = source_spectrum->get_max_peak_mass();

	nodes.clear();
	nodes.resize(1);

	vector< vector<int> > peak_usages; // holds for each peak all the interpertations
									   // that were given to it for creating nodes

	peak_usages.resize(peaks.size());
	
	// add N-TERM
	nodes[0].mass=0;
	nodes[0].type = NODE_N_TERM;
	nodes[0].breakage.mass = 0;
	nodes[0].breakage.region_idx=0;
	nodes[0].breakage.parent_charge=charge;
	nodes[0].breakage.parent_size_idx = size_idx;


	// create list for each strong frag type, what regions it can be used as 
	// a basis for creating a node
	vector<frag_region_list> strong_frags_lists;
	int r,i;
	for (r=0; r<num_regions; r++)
	{
		const RegionalFragments& rf = config->get_regional_fragments(charge,size_idx,r);
		const vector<int>& strong_frag_type_idxs = rf.get_strong_frag_type_idxs();
		int f;
		for (f=0; f<strong_frag_type_idxs.size(); f++)
		{
			int j;
			for (j=0; j<strong_frags_lists.size(); j++)
			{
				if (strong_frags_lists[j].frag_type_idx == strong_frag_type_idxs[f])
				{
					strong_frags_lists[j].region_idxs[r]=1;
					break;
				}
			}

			if (j==strong_frags_lists.size())
			{
				frag_region_list frl;
				frl.frag_type_idx= strong_frag_type_idxs[f];
				frl.region_idxs.resize(num_regions,0);
				frl.region_idxs[r]=1;
				strong_frags_lists.push_back(frl);
			}
		}
	}
	


	// create nodes from strong peaks

	for (i=0; i<strong_frags_lists.size(); i++)
	{
		const int strong_frag_idx = strong_frags_lists[i].frag_type_idx;
		const FragmentType& ft = all_fragments[strong_frag_idx];
		const vector<int>& permitted_regions = strong_frags_lists[i].region_idxs;

		int q;
		for (q=0; q<strong_peak_idxs.size(); q++)
		{
			const int p_idx = strong_peak_idxs[q];
			const mass_t exp_break_mass = ft.calc_breakage_mass(peaks[p_idx].mass,pm_with_19);

			if (exp_break_mass<mid_mass && ! config->is_allowed_prefix_mass(exp_break_mass))
			{
			//	cout << "Bad prefix mass: " << exp_break_mass << endl;
				continue;
			}

			if (exp_break_mass>mid_mass && ! config->is_allowed_suffix_mass(pm_with_19,exp_break_mass))
			{
			//	cout << "Bad Suffix mass: " << exp_break_mass << endl;
				continue;
			} 

			if (exp_break_mass < 3 || exp_break_mass > max_mass_to_create_node)
				continue;

			const int region_idx = config->calc_region_idx(exp_break_mass,pm_with_19, 
				charge, min_peak_mass, max_peak_mass);

			const RegionalFragments & rf = config->get_regional_fragments(charge,size_idx,region_idx);
			const vector<FragmentCombo>&  combos = rf.get_frag_type_combos();

			if (! permitted_regions[region_idx])
				continue;

			Node node;
			node.breakage.region_idx= region_idx;
			node.mass = exp_break_mass;
			node.breakage.mass = exp_break_mass;

			node.type = NODE_REG;
			node.source_frag_type_idx = strong_frag_idx;

			// make sure fragment is present
			BreakageFragment brf;
			brf.expected_mass = peaks[p_idx].mass;
			brf.frag_type_idx = strong_frag_idx;
			brf.mass = peaks[p_idx].mass;
			brf.intensity = peaks[p_idx].intensity;
			brf.peak_idx = p_idx;

			node.breakage.add_fragment(brf);

			annotate_breakage(source_spectrum, pm_with_19, size_idx, node.breakage);
			model->score_breakage(source_spectrum,&node.breakage);
			nodes.push_back(node);

			// add peak usage
			peak_usages[p_idx].push_back(strong_frag_idx);
		}
	}

	// Create nodes from combos
	// if peaks were already used for a certain fragment, then don't create a node
	// the first idx in the combo is a strong_frag_type_idx

	// first create a list for the first strong types from the combo
	vector<int> strong_frags_in_combos;
	for (r=0; r<num_regions; r++)
	{
		const vector<FragmentCombo>& combos = config->get_regional_fragments(charge,size_idx,r).get_frag_type_combos();
		int c;
		for (c=0; c<combos.size(); c++)
		{
			int j;
			for (j=0; j<strong_frags_in_combos.size(); j++)
				if (strong_frags_in_combos[j]== combos[c].frag_inten_idxs[0])
					break;
			if (j<strong_frags_in_combos.size())
				continue;
			strong_frags_in_combos.push_back(combos[c].frag_inten_idxs[0]);
		}
	}

	for (i=0; i<peaks.size(); i++)
	{
		if (peak_usages[i].size()>0 || peaks[i].iso_level>0)
			continue;

		int f;
		for (f=0; f<strong_frags_in_combos.size(); f++)
		{
			const int strong_frag_idx = strong_frags_in_combos[f]; 
			const FragmentType& strong_frag = all_fragments[strong_frag_idx];

			const mass_t exp_breakage_mass = strong_frag.calc_breakage_mass(peaks[i].mass,pm_with_19);

			if (exp_breakage_mass<mid_mass && ! config->is_allowed_prefix_mass(exp_breakage_mass))
				continue;
	
			if (exp_breakage_mass>mid_mass && ! config->is_allowed_suffix_mass(pm_with_19,exp_breakage_mass))
				continue;
		
			if (exp_breakage_mass < 3 || exp_breakage_mass > max_mass_to_create_node)
				continue;

			const int region_idx = config->calc_region_idx(exp_breakage_mass, pm_with_19, charge, 
															min_peak_mass, max_peak_mass);
			const vector<FragmentCombo>& combos = config->get_regional_fragments(charge,size_idx,region_idx).get_frag_type_combos();
			int c;
			for (c=0; c<combos.size(); c++)
			{
				const FragmentCombo& combo = combos[c];
				if (combo.frag_inten_idxs[0] == strong_frag_idx)
				{
					Node node;
					node.breakage.region_idx= region_idx;
					node.mass = exp_breakage_mass;
					node.breakage.mass = exp_breakage_mass;

					node.type = NODE_REG;
					node.source_frag_type_idx = strong_frag_idx;

					annotate_breakage(source_spectrum, pm_with_19, size_idx, node.breakage);
					// check that all frags are present
					int j;
					for (j=0; j<combo.frag_inten_idxs.size(); j++)
						if (node.breakage.get_position_of_frag_idx(combo.frag_inten_idxs[j])<0)
							break;
					
					if (j<combo.frag_inten_idxs.size())
						continue;

					model->score_breakage(source_spectrum,&node.breakage);
					if (node.mass<862.29 && node.mass>862.28)
					{
						int qq=1;
					}

					nodes.push_back(node);

					// add peak usage
					peak_usages[i].push_back(strong_frag_idx);
					break;
				}
			}
		}
	}

	if (nodes.size()>5)
	{
		sort(nodes.begin(),nodes.end());
		init_index_array();

		const int num_nodes_to_add = 8 +int(nodes[nodes.size()-1].mass * 0.00125);
		vector<Node> cand_nodes;

		// add nodes for peaks interperted as strong fragment types that have an amino acid before or an amino acid 
		// after them with offset tolerance/2
		const mass_t tolerance = config->get_tolerance();
		const mass_t third_tolerance = (tolerance<0.1 ? tolerance*0.666 : tolerance * 0.333);
		const vector<int>& session_aas = config->get_session_aas();
		const vector<mass_t>& aa2mass = config->get_aa2mass();

		for (i=0; i<peaks.size(); i++)
		{
			if (peaks[i].iso_level>0 || peak_usages[i].size()>0)
				continue;

			const mass_t peak_mass = peaks[i].mass;
			int f;
			for (f=0; f<strong_frags_lists.size(); f++)
			{
				const int strong_frag_idx = strong_frags_lists[f].frag_type_idx;
				const FragmentType& strong_frag = all_fragments[strong_frag_idx];
				const mass_t one_over_frag_charge = 1.0/strong_frag.charge;

				const mass_t exp_breakage_mass = strong_frag.calc_breakage_mass(peaks[i].mass,pm_with_19);

				if (exp_breakage_mass < 3 || exp_breakage_mass > max_mass_to_create_node)
					continue;

				if (exp_breakage_mass<mid_mass && ! config->is_allowed_prefix_mass(exp_breakage_mass))
					continue;
		
				if (exp_breakage_mass>mid_mass && ! config->is_allowed_suffix_mass(pm_with_19,exp_breakage_mass))
					continue;

				if (get_max_score_node(exp_breakage_mass+MASS_NH3*one_over_frag_charge,third_tolerance)>0)
					continue;

				if (get_max_score_node(exp_breakage_mass+MASS_H2O*one_over_frag_charge,third_tolerance)>0)
					continue;
			
				// look for N-side connection
				score_t n_score=NEG_INF;
				if (1)
				{
					int a;
					
					for (a=0; a<session_aas.size(); a++)
					{
						const int idx = this->get_max_score_node(exp_breakage_mass - aa2mass[session_aas[a]],third_tolerance);
						if (idx>=0 && nodes[idx].breakage.score>n_score)
							n_score = nodes[idx].breakage.score;
					}
					if (n_score== NEG_INF)
						continue;
				}
				
				// look for C-side connection with same frag
				score_t c_score=NEG_INF;
				if (1)
				{
					int a;
					
					for (a=0; a<session_aas.size(); a++)
					{
						const int idx = this->get_max_score_node(exp_breakage_mass + aa2mass[session_aas[a]],third_tolerance);
						if (idx>=0 && nodes[idx].breakage.score>c_score)
							c_score = nodes[idx].breakage.score;
					}
					if (c_score== NEG_INF)
						continue;
				}

				const int region_idx = config->calc_region_idx(exp_breakage_mass, pm_with_19, charge, 
																min_peak_mass, max_peak_mass);
			
				Node node;
				node.breakage.region_idx= region_idx;
				node.mass = exp_breakage_mass;
				node.breakage.mass = exp_breakage_mass;

				node.type = NODE_REG;
				node.source_frag_type_idx = strong_frag_idx;

				annotate_breakage(source_spectrum, pm_with_19, size_idx, node.breakage);
				model->score_breakage(source_spectrum,&node.breakage);

				node.tmp_score = n_score + c_score;

				int min_idx=NEG_INF;
				score_t min_score = POS_INF;
				if (cand_nodes.size()<num_nodes_to_add)
				{
					cand_nodes.push_back(node);
				}
				else
				{
					if (min_idx<0)
					{
						int j;
						for (j=0; j<cand_nodes.size(); j++)
							if (cand_nodes[j].tmp_score<min_score)
							{
								min_idx=j;
								min_score = cand_nodes[j].tmp_score;
							}
					}

					if (node.tmp_score>min_score)
					{
						cand_nodes[min_idx]=node;
						score_t min_score = POS_INF;
						int j;
						for (j=0; j<cand_nodes.size(); j++)
							if (cand_nodes[j].tmp_score<min_score)
							{
								min_idx=j;
								min_score = cand_nodes[j].tmp_score;
							}

					}
				}
			}
		}
		// add nodes
		for (i=0; i<cand_nodes.size(); i++)
		{
			nodes.push_back(cand_nodes[i]);
		//	cout << "Created :\t" << cand_nodes[i].mass << "\t" << cand_nodes[i].tmp_score << endl;
		}
	} 

	Node node;
	node.mass = pm_with_19 - MASS_OHHH;
	node.type = NODE_C_TERM;
	node.breakage.mass = node.mass;
	node.breakage.parent_charge=charge;
	node.breakage.parent_size_idx = size_idx;
	node.breakage.region_idx = config->calc_region_idx(node.mass,pm_with_19,charge,
														min_peak_mass,max_peak_mass);

	nodes.push_back(node);

	sort(nodes.begin(),nodes.end());
	
	merge_close_nodes();

	for (i=0; i<nodes.size(); i++)
	{
		nodes[i].breakage.parent_charge = source_spectrum->get_charge();
		nodes[i].breakage.parent_size_idx = source_spectrum->get_size_idx();
	}
	
}

/**********************************************************
Creates and annotates nodes that correspond to a pepitdes breakages
Only nodes that have some strong fragments associated with them
are kept.
***********************************************************/
void PrmGraph::create_nodes_for_peptide(const Peptide& pep)
{
	const vector< vector< vector< RegionalFragments > > >& all_rfs = config->get_regional_fragment_sets();
	const int num_regions = config->get_num_regions(charge,size_idx);
	const vector<FragmentType>& all_fragments = config->get_all_fragments();
	const vector<Peak>& peaks = source_spectrum->get_peaks();
	const vector<int>& strong_peak_idxs = source_spectrum->get_strong_peak_idxs();
	const mass_t max_mass_to_create_node = pm_with_19 - 55;
	const mass_t mid_mass = pm_with_19 * 0.5;
	const mass_t min_peak_mass = source_spectrum->get_min_peak_mass();
	const mass_t max_peak_mass = source_spectrum->get_max_peak_mass();

	nodes.clear();
	nodes.resize(1);
	
	// add N-TERM
	nodes[0].mass=0;
	nodes[0].type = NODE_N_TERM;
	nodes[0].breakage.mass = 0;
	nodes[0].breakage.region_idx=0;
	nodes[0].breakage.parent_charge=charge;
	nodes[0].breakage.parent_size_idx = size_idx;

	vector<mass_t> exp_break_masses;
	
	pep.calc_expected_breakage_masses(config,exp_break_masses);
	
	// create nodes for peptide breakages
	int i;
	for (i=1; i<exp_break_masses.size()-1; i++)
	{
		const mass_t exp_break_mass = exp_break_masses[i];

		const int region_idx = config->calc_region_idx(exp_break_mass,pm_with_19,
												charge, min_peak_mass, max_peak_mass);

	
		Node node;
		node.breakage.region_idx= region_idx;
		node.mass = exp_break_mass;
		node.breakage.mass = exp_break_mass;

		node.type = NODE_REG;
		node.source_frag_type_idx = -1;

		annotate_breakage(source_spectrum, pm_with_19, size_idx, node.breakage);


		if (node.breakage.fragments.size() == 0)
			continue;
		
		const vector<int>& strong_frag_idxs = all_rfs[charge][size_idx][region_idx].get_strong_frag_type_idxs();
		int j;
		for (j=0; j<strong_frag_idxs.size(); j++)
			if (node.breakage.get_position_of_frag_idx(strong_frag_idxs[j])>=0)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -