⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 prmgraph.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 5 页
字号:
		const vector<int>& in_idxs = nodes[i].in_edge_idxs;
		for (e=0; e<in_idxs.size(); e++)
		{
			const int prev_node_idx = multi_edges[in_idxs[e]].n_idx;
			if (nodes[prev_node_idx].score>max_in_score)
			{
				nodes[i].idx_max_in_score_node = prev_node_idx;
				max_in_score = nodes[prev_node_idx].score; 
			}
		}

		score_t max_out_score = NEG_INF;
		nodes[i].idx_max_out_score_node=-1;

		const vector<int>& out_idxs = nodes[i].out_edge_idxs;
		for (e=0; e<out_idxs.size(); e++)
		{
			const int next_node_idx = multi_edges[out_idxs[e]].c_idx;
			if (nodes[next_node_idx].score>max_out_score)
			{
				nodes[i].idx_max_out_score_node = next_node_idx;
				max_out_score = nodes[next_node_idx].score; 
			}
		}		
	}
}



// this function performs all the scoring operations on edges 
// (amino acid scores, missing cleavage scores etc.)
// *** Most of the scoring is now done through EdgeModel !!!!!
//
score_t PrmGraph::calc_edge_variant_score(const MultiEdge& edge, int num_aa, int *aa) const
{
	const Node& n_node = nodes[edge.n_idx];
	const Node& c_node = nodes[edge.c_idx];


	// give digest score only if there aren't other digest nodes with intensity
	if (num_aa == 1 && nodes[edge.n_idx].type == NODE_DIGEST && nodes[edge.c_idx].type == NODE_C_TERM)
	{
		//cout << config->get_aa2label()[aa[0]] << " ";
		// check for digest edge
		const vector<int>& c_term_digest_aas = config->get_c_term_digest_aas();
		if (c_term_digest_aas.size()>0)
		{
			int i;
			for (i=0; i<c_term_digest_aas.size(); i++)
				if (aa[0] == c_term_digest_aas[i])
					break;

			if (i == c_term_digest_aas.size())
			{
				return digest_node_score*-0.5;
			}

			bool other_digest_is_good=false;
			for (i=0; i<this->c_digest_node_idxs.size(); i++)
			{
				if (c_digest_node_idxs[i] == edge.n_idx)
					continue;
				if (nodes[c_digest_node_idxs[i]].breakage.fragments.size()>0)
					other_digest_is_good=true;
			}
			if (other_digest_is_good)
			{
				return digest_node_score*0.5;
			}
			return digest_node_score;
		}
	}

	if (num_aa == 1 && nodes[edge.c_idx].type == NODE_DIGEST && nodes[edge.n_idx].type == NODE_N_TERM)
	{
		const vector<int>& n_term_digest_aas = config->get_n_term_digest_aas();
		if (n_term_digest_aas.size()>0)
		{
			int i;
			for (i=0; i<n_term_digest_aas.size(); i++)
				if (aa[0] == n_term_digest_aas[i])
					break;

			if (i == n_term_digest_aas.size())
				return digest_node_score*-0.5;
			
			bool other_digest_is_good=false;
			for (i=0; i<this->n_digest_node_idxs.size(); i++)
			{
				if (n_digest_node_idxs[i] == edge.c_idx)
					continue;
				if (nodes[n_digest_node_idxs[i]].score>0)
					other_digest_is_good=true;
			}
			if (other_digest_is_good)
				return 0;
			return digest_node_score;
		}
	}

	if (num_aa >1)
	{
	//	return ((num_aa-1)*model->get_missing_breakage_score(charge,size_idx,edge.c_break->region_idx));
	//	return ((num_aa-1)*-6);
	}

	return 0;

}













// removes all edges to and from nodes with the active flag set to 0
void PrmGraph::remove_edges_from_inactive_nodes()
{
	int i;

	for (i=0; i<nodes.size(); i++)
	{
		if (nodes[i].active)
			continue;

		int j;
		Node& node = nodes[i];

		for (j=0; j<node.in_edge_idxs.size(); j++)
		{
			// remove edge from other node's list
			const int e_idx = node.in_edge_idxs[j];
			Node& other_node = nodes[multi_edges[e_idx].n_idx];
			int k;

			for (k=0; k<other_node.out_edge_idxs.size(); k++)
				if (other_node.out_edge_idxs[k] == e_idx)
					break;

			if (k== other_node.out_edge_idxs.size())
			{
				cout << "Error: missing out edge idx!" << endl;
				exit(1);
			}

			other_node.out_edge_idxs[k] = other_node.out_edge_idxs[other_node.out_edge_idxs.size()-1];
			other_node.out_edge_idxs.pop_back();			
		}

		for (j=0; j<node.out_edge_idxs.size(); j++)
		{
			// remove edge from other node's list
			const int e_idx = node.out_edge_idxs[j];
			Node& other_node = nodes[multi_edges[e_idx].c_idx];
			int k;

			for (k=0; k<other_node.in_edge_idxs.size(); k++)
				if (other_node.in_edge_idxs[k] == e_idx)
					break;

			if (k== other_node.in_edge_idxs.size())
			{
				cout << "Error: missing in edge idx!" << endl;
				exit(1);
			}

			other_node.in_edge_idxs[k] = other_node.in_edge_idxs[other_node.in_edge_idxs.size()-1];
			other_node.in_edge_idxs.pop_back();			
		}

		node.in_edge_idxs.clear();
		node.out_edge_idxs.clear();
	}
}






struct idx_score {
	idx_score() : edge_idx(-1), score(NEG_INF) {}
	bool operator< (const idx_score& other) const
	{
		return score>other.score;
	}
	int edge_idx;
	score_t score;
};



// sorts edges according to the value to which they can possibly lead
// uses the max_gains table which state for each node i and length n, what is the maximal
// score attainable by using i + n amino acids in the graph
void PrmGraph::sort_outgoing_edges_according_to_max_gains(const vector< vector< score_t > >& max_gains)
{
	int i;
	const int last_size_idx = max_gains[0].size()-1;
	for (i=0; i<nodes.size(); i++)
	{
		if (nodes[i].out_edge_idxs.size()==0)
			continue;
		
		int j;
		vector<idx_score> pairs;
		pairs.resize(nodes[i].out_edge_idxs.size());
		for (j=0; j<nodes[i].out_edge_idxs.size(); j++)
		{
			const int edge_idx = nodes[i].out_edge_idxs[j];
			const MultiEdge& edge = multi_edges[edge_idx];
			pairs[j].edge_idx= edge_idx;
			pairs[j].score = max_gains[edge.c_idx][last_size_idx] +
							nodes[edge.c_idx].score + edge.max_variant_score;
		}
		sort(pairs.begin(),pairs.end());

		for (j=0; j<nodes[i].out_edge_idxs.size(); j++)
			nodes[i].out_edge_idxs[j]=pairs[j].edge_idx;
	}
}

/***********************************************************************
// returns the optimal ordering of nodes for the search
************************************************************************/
void PrmGraph::get_node_ordering_according_to_max_gains(
	 vector< vector< score_t > >& max_gains_for_length,
	 vector<int>& node_order) const
{
	vector<idx_score> pairs;
	pairs.resize(nodes.size());
	int i;
	const int last_size_idx = max_gains_for_length[0].size()-1;
	for (i=0; i<nodes.size(); i++)
	{
		pairs[i].edge_idx=i;
		pairs[i].score = nodes[i].score + max_gains_for_length[i][last_size_idx];
	}

	sort(pairs.begin(),pairs.end());

	node_order.resize(nodes.size());
	
	for (i=0; i<pairs.size(); i++)
		node_order[i]=pairs[i].edge_idx;
}


// sorts edges according to the value to which they lead
void PrmGraph::sort_outgoing_edges()
{
	int i;
	for (i=0; i<nodes.size(); i++)
	{
		if (nodes[i].out_edge_idxs.size()==0)
			continue;
		
		int j;
		vector<idx_score> pairs;
		pairs.resize(nodes[i].out_edge_idxs.size());
		for (j=0; j<nodes[i].out_edge_idxs.size(); j++)
		{
			pairs[j].edge_idx=nodes[i].out_edge_idxs[j];
			pairs[j].score = multi_edges[nodes[i].out_edge_idxs[j]].max_variant_score + 
							 nodes[multi_edges[nodes[i].out_edge_idxs[j]].c_idx].score;
		}
		sort(pairs.begin(),pairs.end());

		for (j=0; j<nodes[i].out_edge_idxs.size(); j++)
			nodes[i].out_edge_idxs[j]=pairs[j].edge_idx;
	}
}





struct edge_idx_pair {
	bool operator< (const edge_idx_pair& other) const
	{
		return n_idx<other.n_idx;
	}

	int edge_idx;
	int n_idx;
};


/*******************************************************************
// creates a path object from a collection of edges that are assumed
// to correspond to a path in the graph
********************************************************************/
void PrmGraph::create_path_from_edges(vector<int>& edge_idxs, MultiPath& path) const
{
	int i;
	vector<edge_idx_pair> pairs;

	if (edge_idxs.size()==0)
	{
		path.path_score = 0;
		return;
	}
	
	for (i=0; i<edge_idxs.size(); i++)
	{
		edge_idx_pair p;
		p.edge_idx = edge_idxs[i];
		p.n_idx = multi_edges[edge_idxs[i]].n_idx;

		pairs.push_back(p);
	}
	sort(pairs.begin(),pairs.end());

	path.edge_idxs.resize(pairs.size());
	for (i=0; i<pairs.size(); i++)
		path.edge_idxs[i]=pairs[i].edge_idx;

	path.n_term_mass= nodes[multi_edges[edge_idxs[0]].n_idx].mass;
	path.c_term_mass= nodes[multi_edges[edge_idxs[edge_idxs.size()-1]].c_idx].mass;

	for (i=1; i<path.edge_idxs.size(); i++)
	{
		if (multi_edges[path.edge_idxs[i]].n_idx != multi_edges[path.edge_idxs[i-1]].c_idx)
		{
			cout << "Error: inconsistent edges when creating path!" << endl;
			exit(1);
		}
	}

	// collect breakage info and edges

	path.edge_idxs = edge_idxs;
	path.breakages.clear();
	path.node_idxs.clear();
	for (i=0; i<edge_idxs.size(); i++)
	{
		path.breakages.push_back(multi_edges[edge_idxs[i]].n_break);
		path.node_idxs.push_back(multi_edges[edge_idxs[i]].n_idx);
	}
	path.breakages.push_back(multi_edges[edge_idxs[i-1]].c_break);
	path.node_idxs.push_back(multi_edges[edge_idxs[i-1]].c_idx);
}



// finds the highest scoring continuous subpath for a given peptide in the graph
SeqPath PrmGraph::get_highest_scoring_subpath(const Peptide& peptide, mass_t start_mass) const
{
	SeqPath ret_path;
	const vector<int>& path_aas = peptide.get_amino_acids();
	mass_t pre_mass = pre_mass = start_mass;
	mass_t double_tolerance = config->get_tolerance() * 3.0;
	vector<bool> use_as_start_pos;
	int i;
	
	use_as_start_pos.resize(nodes.size(),true);
	ret_path.path_score = 0;

	// give start idx double tolerance
	for (i=0; i<path_aas.size(); i++)
	{
		int j;
		PeakRange nr = this->get_nodes_in_range(pre_mass - double_tolerance, pre_mass + double_tolerance);
		for (j=0; j<nr.num_peaks; j++)
		{
			int node_idx = nr.low_idx+j;
			if (! use_as_start_pos[node_idx])
				continue;

			// find max correct subpath from this node
			SeqPath path;

			path.n_term_mass = nodes[node_idx].mass;
			path.c_term_mass = nodes[node_idx].mass;
			path.path_score = 0;
			path.positions.clear();

			// loop until end is reached
			int k=i;
			
			int lass_good_edge_idx = -1;
			while (k<path_aas.size())
			{
				const Node& node = nodes[node_idx];
				int e;
				for (e=0; e<node.out_edge_idxs.size(); e++)
				{
					const int& e_idx = node.out_edge_idxs[e];
					const MultiEdge& edge = multi_edges[e_idx];

					int var_idx = edge.get_variant_idx(1,(int *)&path_aas[k]);
					if (var_idx<0 && i<path_aas.size()-1)
						var_idx = edge.get_variant_idx(2,(int *)&path_aas[k]);

					if (var_idx>=0)
					{
						path.add_edge_variant(edge,e_idx,var_idx);
						k+=edge.num_aa;
						lass_good_edge_idx = e_idx;
						break;
					}
				}
				if (e == node.out_edge_idxs.size())
					break;
			}

			// add last position
			if (lass_good_edge_idx>=0)
			{
				const MultiEdge& last_edge = multi_edges[lass_good_edge_idx];
				PathPos last_pos;

				last_pos.breakage = last_edge.c_break;
				last_pos.edge_idx =-1;
				last_pos.mass = last_edge.c_break->mass;
				last_pos.node_idx = last_edge.c_idx;
				last_pos.node_score = last_edge.c_break->score;
				path.positions.push_back(last_pos);
				
				path.path_score += last_pos.node_score;
				path.c_term_mass = last_pos.mass;
			}

			// check if this path is better
			if (path.path_score>ret_path.path_score)
				ret_path = path;
		}	

		pre_mass += config->get_aa2mass()[path_aas[i]];
	}
	
	return ret_path;
}





// finds the longest continuous subpath for a given peptide in the graph
SeqPath PrmGraph::get_longest_subpath(const Peptide& peptide, mass_t start_mass, bool verbose)
{
	const int max_edge_length     = config->get_max_edge_length();
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	vector<mass_t> prefix_masses;
	vector<int> path_aas = peptide.get_amino_acids();
	vector<int> path_edges;
	vector<int> path_aa_count;
	int a_start=-1;
	mass_t pre_mass = 0;
	mass_t double_tolerance = config->get_tolerance() *  5.0;
	int i;

	path_edges.clear();

	for (i=0; i<path_aas.size(); i++)
		if (path_aas[i]==Ile)
			path_aas[i]=Leu;

	prefix_masses.push_back(start_mass);
	for (i=0; i<path_aas.size(); i++)
		prefix_masses.push_back(p

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -