⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 multipath.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 3 页
字号:

			if (nodes[n_idx].type == NODE_N_TERM &&	nodes[c_idx].type == NODE_C_TERM) 
			{
				if (! config->get_need_to_estimate_pm() )
				{
					const vector<mass_t>& aa2mass = config->get_aa2mass();
					vector<int> aas;
					variants[j].get_amino_acids(aas);
					mass_t pep_mass = 0;
					int a;
					for (a=0; a<aas.size(); a++)
						pep_mass += aa2mass[aas[a]];

					pep_mass+=MASS_OHHH;

					if (fabs(pep_mass-source_spectrum->get_org_pm_with_19())>config->get_pm_tolerance())
					{
					//	cout << "Rejected: " << variants[j].seq_str << endl;
					//	cout << "aa_mass: " << fixed << setprecision(3) << pep_mass << " spec_mass: " << source_spectrum->get_org_pm_with_19();
					//	cout << " (" << pep_mass-source_spectrum->get_org_pm_with_19() << ")" << endl;

						continue;
					}

					// add bonus score since the amino acids match the mass
				//	variants[j].path_score += BONUS_FOR_COMPLETE_PEPTIDE;
				//	cout << "Added bonus!: " << fixed << setprecision(3) <<  variants[j].path_score << endl;
				}
			}

			// check that there are no PTMs that are specific to the +1, -1 positions

	
			path_heap.add_path(variants[j]);
		}
	}

	paths = path_heap.get_paths();
	sort(paths.begin(),paths.end(),comp_SeqPath_path_score);

	while (paths.size()>0)
	{
		int idx = paths.size()-1;
		if (paths[idx].path_score<= NEG_INF || paths[idx].get_num_aa() < 1)
		{
			paths.pop_back();
		}
		else
			break;
	}

//	int i;
//	for (i=0; i<paths.size(); i++)
//		cout << i << "\t" << paths[i].path_score << "\t" << paths[i].seq_str << endl;

//	cout << "Considered: " << num_vars << endl;
}



bool MultiPath::check_if_correct(const Peptide& p, Config *config) const
{
	const mass_t tolerance = config->get_tolerance() * 1.25;
	vector<mass_t> break_masses;
	int idx=0;
	int i;

	p.calc_expected_breakage_masses(config,break_masses);

	for (i=0; i<breakages.size(); i++)
	{
		const mass_t& mass = breakages[i]->mass;
		const mass_t max_mass = mass + tolerance;
		const mass_t min_mass = mass - tolerance;

		while (idx < break_masses.size() && break_masses[idx] < min_mass)
			idx++;

		if (break_masses[idx]>max_mass)
			return false;
	}

	return true;
}



int  MultiPath::get_num_correct_aas(const PrmGraph& prm, const Peptide& p, Config *config) const
{
	const mass_t tolerance = config->get_tolerance() * 1.25;
	vector<mass_t> break_masses;
	int idx=0;
	int num_correct=0;
	int i;

	p.calc_expected_breakage_masses(config,break_masses);

	for (i=0; i<breakages.size(); i++)
	{
		const mass_t& mass = breakages[i]->mass;
		const mass_t max_mass = mass + tolerance;
		const mass_t min_mass = mass - tolerance;

		while (idx < break_masses.size() && break_masses[idx] < min_mass)
			idx++;

		if (break_masses[idx]>max_mass)
			continue;
		
		if (idx<breakages.size()-1 && edge_idxs[idx]>=0)
			num_correct += prm.get_multi_edge(edge_idxs[idx]).num_aa;
	}
	return num_correct;
}


int  MultiPath::get_num_aas() const
{
	return (breakages.size()-1);
}


// returns the number of b/y ions
int SeqPath::get_num_frags(const vector<int>& frag_idxs) const
{
	int num_frags=0;
	int i;
	for (i=0; i<positions.size(); i++)
	{
		Breakage *bb = positions[i].breakage;

		if (positions[i].breakage && positions[i].breakage->fragments.size()>0)
		{
			int j;
			for (j=0; j<frag_idxs.size(); j++)
			{
				int k;
				for (k=0; k<positions[i].breakage->fragments.size(); k++)
				{
					if (positions[i].breakage->fragments[k].frag_type_idx == frag_idxs[j])
						num_frags++;
				}
			}
		}
	}
	return num_frags;
}


mass_t SeqPath::calculate_peptide_mass(Config *config) const
{
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	mass_t pep_mass=0;

	int i;
	for (i=0; i<positions.size()-1; i++)
	{
		pep_mass+=aa2mass[positions[i].aa];
	}
	return pep_mass;
}

int SeqPath::get_num_correct_aas(const Peptide& pep, Config *config) const
{
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	const vector<int>& pep_aas = pep.get_amino_acids();

	int num_correct=0;
	int i;

	vector<mass_t> pep_masses;
	vector<int> path_aas;
	
	get_amino_acids(path_aas);

	pep_masses.resize(pep_aas.size(),0);
	for (i=1; i<pep_aas.size(); i++)
		pep_masses[i]=pep_masses[i-1]+aa2mass[pep_aas[i-1]];

	mass_t path_mass = n_term_mass;
	for (i=0; i<path_aas.size(); i++)
	{
		const int path_aa = path_aas[i];
		int j;
		for (j=0; j<pep_aas.size(); j++)
		{
			const int pep_aa = pep_aas[j];
			if (fabs(pep_masses[j]-path_mass)<1.0 && pep_aas[j] == path_aas[i])
			{
				num_correct++;
				break;
			}
		}

		path_mass += aa2mass[path_aas[i]];
	}

	return num_correct;
}





void PrmGraph::parse_seq_path_to_smaller_ones(const SeqPath& org_path, 
										  int min_length, 
										  int max_length, 
										  vector<SeqPath>& new_paths)
{
	const vector<PathPos>& org_positions = org_path.positions;
	new_paths.clear();

	
	const int num_org_positions = org_positions.size();
	const int last_org_position = num_org_positions-1;
	if (num_org_positions<=min_length)
		return;

	const int num_iters = org_positions.size()-min_length;
	new_paths.resize(num_iters);

	int np_idx=0;

	int i;
	for (i=0; i<num_iters; i++)
	{
		if (org_positions[i].aa>=0 && org_positions[i].edge_idx>=0)
		{
			int j=0;
			SeqPath& path = new_paths[np_idx];
			path.positions.clear();

			if (i==0)
				path.n_term_aa = org_path.n_term_aa;

			path.n_term_mass = org_positions[i].mass;
			path.path_score = 0;
			path.multi_path_rank = org_path.multi_path_rank;
			
			const int start_pos=i;
			bool score_this_path = false;
			int pos=i;
			while (pos<num_org_positions )
			{
				path.positions.push_back(org_positions[pos]);
				pos++;
			
				while (pos<num_org_positions  && org_positions[pos].node_idx<0)
					path.positions.push_back(org_positions[pos++]);
				
				const int length = pos-start_pos;

				if (pos<num_org_positions && length>=min_length && length<=max_length)
				{
					path.c_term_mass = org_positions[pos].mass;
					if (pos==last_org_position)
						path.c_term_aa= org_path.c_term_aa;

					path.positions.push_back(org_positions[pos]);
					score_this_path = true;
					break;
				}
			}	


			if (path.positions.size()>max_length+1 || ! score_this_path)
			{
				path.positions.clear();
				continue;
			}

			np_idx++; // stores the path

			// compute combo scores
			vector<PathPos>& new_positions = path.positions;
			const int num_new_positions = new_positions.size();
			const int last_new_position = num_new_positions-1;

			path.path_score=0;

			// special treatment for scores at the begining/end of the parsed tag
			// (need to use the Gap score on the terminal ends)
			if (new_positions[0].node_idx == org_positions[0].node_idx)
			{
				new_positions[0].node_score = org_positions[0].node_score;
				path.path_score += new_positions[0].node_score; 
				path.path_score += new_positions[0].edge_variant_score;
			}
			else
			{
				// find edge variant
				const int aa = new_positions[0].aa;
				const MultiEdge& edge = multi_edges[new_positions[0].edge_idx];
				const int var_idx = edge.get_variant_idx(aa);
				if (var_idx<0)
				{
					cout << "Error: bad aa in var idx! 1" << endl;
					exit(1);
				}
				new_positions[0].node_score = model->get_node_combo_score(this,new_positions[0].node_idx,
														-1,0,new_positions[0].edge_idx,var_idx);

				path.path_score += new_positions[0].node_score;
				path.path_score += new_positions[0].edge_variant_score;
			}

			if (new_positions[last_new_position].node_idx == org_positions[last_org_position].node_idx)
			{
				new_positions[last_new_position].node_score = org_positions[last_org_position].node_score;
				path.path_score += new_positions[last_new_position].node_score;
				path.path_score += new_positions[last_new_position].edge_variant_score;
			}
			else
			{
				int k=last_new_position-1;
				while (k>0 && new_positions[k].edge_idx<0)
					k--;

				if (k==0)
				{
					cout << "Error: bad parse of tag!" << endl;
					exit(1);
				}

				const int aa = new_positions[k].aa;
				const int edge_idx = new_positions[k].edge_idx;
				const MultiEdge& edge = multi_edges[edge_idx];
				const int var_idx = edge.get_variant_idx(aa);
				if (var_idx<0)
				{
					cout << "Error: bad aa in var idx! 2" << endl;
					exit(1);
				}

				new_positions[last_new_position].node_score = model->get_node_combo_score(this,
					new_positions[last_new_position].node_idx,edge_idx,var_idx,-1,0);

				path.path_score += new_positions[last_new_position].node_score;
				////	no edge score for last position!
				//path.path_score += new_positions[last_new_position].edge_variant_score;
				new_positions[last_new_position].edge_variant_score =0;

			}

			for (j=1; j<last_new_position; j++)
			{
				path.positions[j].node_score = org_positions[i+j].node_score;
				path.path_score += new_positions[j].node_score; 
				path.path_score += new_positions[j].edge_variant_score;
			}
			
			path.make_seq_str(config);
			path.charge = charge;
			path.pm_with_19 = pm_with_19;
			path.prm_ptr = this;
			path.sort_key = path.path_score;
		}
	}

	while(new_paths.size()>0 && new_paths[new_paths.size()-1].positions.size()==0)
		new_paths.pop_back();
}



bool SeqPath::check_if_correct(const string& str, Config *config) const
{
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	const char *path_str = seq_str.c_str();
	const char *corr_str = str.c_str();

	int len_path_str = strlen(path_str);
	int len_corr_str = strlen(corr_str);

	if (len_path_str>len_corr_str)
		return false;

	
	int i;
	for (i=0; i<=len_corr_str-len_path_str; i++)
	{
		int j;
		bool correct_seq = true;
		for (j=0; j<len_path_str; j++)
			if (! (path_str[j] == corr_str[i+j] ||
				  (path_str[j] == 'I' && corr_str[i+j]== 'L') ||
				  (path_str[j] == 'L' && corr_str[i+j]== 'I') ||
				  (path_str[j] == 'Q' && corr_str[i+j]== 'K') ||
				  (path_str[j] == 'K' && corr_str[i+j]== 'Q') ) )
			{
				correct_seq = false;
				break;
			}



		if (correct_seq)
		{
			// check prefix mass
			Peptide pep;
			pep.parse_from_string(config,corr_str);
			const vector<int>& aas= pep.get_amino_acids();
			mass_t mass=0;
			int j;

			if (n_term_mass == 0 && i==0)
				return true;

			for (j=0; j<aas.size(); j++)
			{
				mass+=aa2mass[aas[j]];
				if (fabs(mass-this->n_term_mass)<6)
					return true;

				if (mass>n_term_mass)
					break;
			}
		}
	}

	
	
	return false;
}



bool SeqPath::check_if_cut_correct(const vector<mass_t>& exp_cut_masses, mass_t tolerance) const
{
	int c_idx=0;
	int i;

	for (i=0; i<positions.size()-1; i++)
	{
		if (positions[i].node_idx<0)
			continue;

		const mass_t pos_mass = positions[i].mass;
		while (c_idx<exp_cut_masses.size() && exp_cut_masses[c_idx]< pos_mass - tolerance)
			c_idx++;

		if (c_idx == exp_cut_masses.size() || exp_cut_masses[c_idx]-tolerance > pos_mass)
			return false;

	}
	return true;
}





void SeqPath::make_seq_str(Config *config)
{
	const vector<string>& aa2label = config->get_aa2label();
	int i;

	seq_str = "";

	if (n_term_aa>N_TERM)
		seq_str += aa2label[n_term_aa] ;
	
	if (positions.size()>0)
		for (i=0; i<positions.size()-1; i++)
			seq_str += aa2label[positions[i].aa];

	if (c_term_aa>C_TERM)
		seq_str +=  aa2label[c_term_aa];

}

void MultiPath::print(Config *config, ostream& os) const
{
	os << "MultiPath: " << n_term_mass << " - " << c_term_mass << 
		   " score: " << path_score << "  ";
	int i;

	cout << " Nodes:";
	for (i=0; i<node_idxs.size(); i++)
		cout << " " << node_idxs[i];
	cout << "   Edges:";
	for (i=0; i<edge_idxs.size(); i++)
		cout << " " << edge_idxs[i];
	cout << endl;
}


void SeqPath::print(ostream& os) const
{
	os << setprecision(5);
	os << n_term_mass << " " << seq_str << " " << c_term_mass << " (s: " << 
		this->path_score << ")" << endl;
}


void SeqPath::print_with_probs(ostream& os) const
{
	os << setprecision(5);
	os << n_term_mass << " " << seq_str << " (s: " << 
		this->path_score << ")";

	int i;
	os << setprecision(2);
	for (i=0; i<this->positions.size(); i++)
	{
		os << " X " ;
	}
	os << endl;
}

void SeqPath::print_full(Config *config, ostream &os) const
{
	const vector<string>& aa2label = config->get_aa2label();
	os << n_term_mass << " " << seq_str << " " << c_term_mass << " (s: " << 
		this->path_score << ")  NF: " << this->num_forbidden_nodes << endl;
	int i;

	if (positions.size()==0)
		return;

	for (i=0; i<positions.size()-1; i++)
	{
		cout << left << setw(3) << i;
		cout << setw(5) << left << aa2label[positions[i].aa] << "\t" <<
			positions[i].node_idx << "\t" << positions[i].edge_idx<< "\t" << 
			fixed << setprecision(2) << positions[i].mass << "\t" << 
			positions[i].node_score << "\t" << positions[i].edge_variant_score << endl;
	}
	cout << "Exact score: " << path_score << "\t" << "Approx score: " << setprecision(4) << multi_path_score << endl;
		
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -