⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 homeomorphic.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "Homeomorphic.h"
#include "AnnotatedSpectrum.h"
#include "DeNovoDP.h"
#include "auxfun.h"





void hgraph::create_graph(Config *_config, Peptide& p)
{
	config = _config;
	org_pep = p;
	int i,j;

	const int num_aa = p.get_num_aas();
	const vector<int>& amino_acids = p.get_amino_acids();
	const vector<int>& session_aas = config->get_session_aas();
	const vector<string>& aa2label = config->get_aa2label();
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	const mass_t peptide_mass = p.get_mass();
	const mass_t p18_mass = peptide_mass + MASS_H2O;
	const mass_t tolerance = config->get_tolerance();


	vector<mass_t> break_masses;
	p.calc_expected_breakage_masses(config,break_masses);

	nodes.resize(2*num_aa+2);
	nodes[0].mass=0;
	nodes[0].type = 0;
	nodes[0].idx =0;

	nodes[1].mass = p18_mass;
	nodes[1].type = 1;
	nodes[1].idx = 0;

	for (i=1; i<=num_aa; i++)
	{
		int n_idx=2*i;

		nodes[n_idx].mass = nodes[n_idx-2].mass + aa2mass[amino_acids[i-1]];
		nodes[n_idx].type = 0;
		nodes[n_idx].idx = i;

		n_idx++;

		nodes[n_idx].mass = p18_mass - nodes[n_idx-1].mass;
		nodes[n_idx].type = 1;
		nodes[n_idx].idx = i;
	}
	sort(nodes.begin(),nodes.end());

	// connect edges (single only)

	i=0;
	j=1;

	while (i<nodes.size()-2)
	{
		int k=i+1;
		while (nodes[k].type != 0)
			k++;

		hedge e;
		e.n1=i;
		e.n2=k;
	
		nodes[i].add_edge(e);
		i=k;
	}

	while (j<nodes.size()-2)
	{
		int k=j+1;
		while (nodes[k].type != 1)
			k++;

		hedge e;
		e.n1=j;
		e.n2=k;
	
		nodes[j].add_edge(e);
		j=k;	
	}



	// connect crossover edges
	for (i=0; i<nodes.size()-2; i++)
	{
		int j;
		for (j=0; j<session_aas.size(); j++)
		{
			const int aa= session_aas[j];
			if (aa==Ile)
				continue;

			int k;
			for (k=i+1; k<nodes.size(); k++)
			{
				if (nodes[i].type != nodes[k].type)
				{
					hedge e;
					e.n1=i;
					e.n2=k;
					e.num_aa=1;
					e.score=1;
					e.is_crossover = (nodes[i].type != nodes[k].type);
					e.aa[0] = aa;

					nodes[i].add_edge(e);
					break;
				}
			}
		}
	}
}


bool hgraph::has_two_paths()
{
	const int num_aa = org_pep.get_num_aas();
	const int goal_node_idx = nodes.size()-2;
	vector<int> out_idxs, prev_idxs;

	bool print = true;

	// init different vectors
	out_idxs.clear();
	out_idxs.resize(nodes.size(),0);
	prev_idxs.clear();
	prev_idxs.resize(nodes.size(),-1);

	// do a DFS
	int p=0;
	int count =0;
    
	while (1)
	{
		while (out_idxs[p]<nodes[p].edges.size())
		{

			// take this edge
			const hedge& e = nodes[p].edges[out_idxs[p]];
			const int node_idx = nodes[e.n2].idx ;
			const hnode& node = nodes[e.n2];

			int old_p=p;
			p=  e.n2;

			prev_idxs[p]=old_p;;
			out_idxs[old_p]++;

			if (p == goal_node_idx )
			{
				// check for forbidden
				count++;

				if (count>1)
					return true;
				
				break;
			}

		}

		// back track
		
		out_idxs[p]=0;
		p=prev_idxs[p];

		if (p<0)
			break;
	}

	return false;
}


void hgraph::get_delta_path_nums(vector<int>& counts, int max_print)
{
	const int num_aa = org_pep.get_num_aas();
	const int goal_node_idx = nodes.size()-2;
	vector<bool> used_idxs;
	vector<int> out_idxs, prev_idxs;

	// init different vectors
	counts.clear();
	counts.resize(num_aa*2,0);
	used_idxs.resize(num_aa+1,false);
	used_idxs[0]=true;
	out_idxs.clear();
	out_idxs.resize(nodes.size(),0);
	prev_idxs.clear();
	prev_idxs.resize(nodes.size(),-1);

	// do a DFS
	int score=0;
	int p=0;

//	int n;
//	for (n=0; n<nodes.size(); n++)
//		cout << n << " " << nodes[n].idx << (nodes[n].type==0? "  " : "' ") << 
//		        nodes[n].edges.size() << endl;

	while (1)
	{
		while (out_idxs[p]<nodes[p].edges.size())
		{

			// take this edge
			const hedge& e = nodes[p].edges[out_idxs[p]];
			const int node_idx = nodes[e.n2].idx ;
			const hnode& node = nodes[e.n2];

			if (used_idxs[node_idx]) // forbidden pair check
			{
				out_idxs[p]++;
				continue;
			}

			int old_p=p;
			p=  e.n2;
			score+= e.score;
			prev_idxs[p]=old_p;
			used_idxs[node_idx]=true;
			out_idxs[old_p]++;

			if (p == goal_node_idx )
			{
				// check for forbidden
				int bin = num_aa-score;
				counts[bin]++;
				if (bin<=max_print)
				{
					int q=p;
					vector<int> ns;
					while (q>=0)
					{
						ns.push_back(q);
						q=prev_idxs[q];
					}

					sort(ns.begin(),ns.end());

					// count alternating
					int type=0;
					int alt_count =0;
					int i;
					for (i=0; i<ns.size(); i++)
					{
						if (nodes[ns[i]].type != type)
						{
							alt_count++;
							type = nodes[ns[i]].type;
						}
					}

					if (alt_count>4 && bin == 0)
					{
						cout << alt_count << "  PEP: ";
						for (q=0; q<ns.size(); q++)
						{
							cout << nodes[ns[q]].idx;
							if (nodes[ns[q]].type == 1)
								cout << "'";
							cout << " ";
						}
						cout << endl;
					}
				}
				break;
			}

		}

		// back track
		
		out_idxs[p]=0;
		used_idxs[nodes[p].idx]=false;
		p=prev_idxs[p];

		if (p<0)
			break;

		score -= nodes[p].edges[out_idxs[p]-1].score;
	}


}




void hgraph::print() const
{
	int i;
	int type;
	for (type =0; type<=1; type++)
	{
		for (i=0; i<nodes.size(); i++)
		{
			if (nodes[i].type == type)
			{
				const vector<hedge>& edges = nodes[i].edges;
				int j;
				for (j=0; j<edges.size(); j++)
					if (edges[j].is_crossover)
					{
						if (type == 0)
						{
							cout << nodes[edges[j].n1].idx << " -> " << nodes[edges[j].n2].idx << "' *" << endl; 
						}
						else
							cout << nodes[edges[j].n1].idx << "' ->" << nodes[edges[j].n2].idx << " *" <<endl;
					}
				/*	else
					{
						if (nodes[edges[j].n1].type == 0)
						{
							cout << nodes[edges[j].n1].idx << " -> " << nodes[edges[j].n2].idx << endl;
						}
						else
						{
							cout << nodes[edges[j].n1].idx << "' -> " << nodes[edges[j].n2].idx << "'" << endl;
						}
					}*/
			}
			
		}
	}
}


void hexp1(Config *config)
{
	int i,size;
	int total = 5000;

	for (size =5; size<=20; size++)
	{
		int count =0;
		for (i=0; i<total; i++)
		{
			Peptide p;
			vector<int> counts;
			p.generate_random_peptide(config,size);

			hgraph hg;
			hg.create_graph(config,p);
		
			if (hg.has_two_paths())
			{
				count++;
				hg.print();
				cout << endl;
			}

		
		}

		cout << size << " " << count << "/" << total << "=" << (double)count/(double)total << endl;
	}
}





//-------------------------------------------


struct cross_jump {
	cross_jump() : idx1(-1), idx2(-1), aa(-1), aa2(-1) {}
	int idx1,idx2;
	int aa,aa2;
};

struct node_mass {
	bool operator< ( const node_mass& other) const
	{
		return mass<other.mass;
	}
		
	int source;
	mass_t mass;
	int idx;
};

int get_num_paths(Config *config, Peptide& peptide, 
						 mass_t tolerance, bool check_forbidden, bool allow_double,
						 bool print)
{
	const vector<int>& session_aas = config->get_session_aas();
	const vector<string>& aa2label = config->get_aa2label();
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	const mass_t peptide_mass = peptide.get_mass();

	vector<mass_t> break_masses;
	vector<mass_t> other_masses;
	peptide.calc_expected_breakage_masses(config,break_masses);

	int i;
	for (i=0; i<break_masses.size(); i++)
	{
		other_masses.push_back(peptide_mass - break_masses[i]+18);
		if (print)
			cout << i<< " " << setw(8) << left <<break_masses[i] << " " << setw(8) << left << other_masses[i] << endl;
	}

	vector<cross_jump> over,back;
	over.clear();
	back.clear();
	int length = break_masses.size();

	for (i=0; i<break_masses.size(); i++)
	{
		int j;
		for (j=0; j<other_masses.size(); j++)
		{
			int a;
			for (a=0; a<session_aas.size(); a++)
			{
				const int& aa = session_aas[a];
				if (aa == Ile || aa == Lys)
					continue;

				if (fabs(break_masses[i] + aa2mass[aa] - other_masses[j])<=tolerance)
				{
					cross_jump cj;
					cj.aa = aa;
					cj.idx1 = i;
					cj.idx2 = j;

					over.push_back(cj);					
				}
				else
				{
					if (allow_double)
					{
						int b;
						for (b=0; b<session_aas.size(); b++)
						{
							const int& bb = session_aas[b];
							if (bb == Ile || bb == Lys)
								continue;

							if (! config->is_allowed_double_edge(aa,bb))
								continue;

							if (fabs(break_masses[i] + aa2mass[aa] + aa2mass[bb]  - other_masses[j])<=tolerance)
							{
								cross_jump cj;
								cj.aa = aa;
								cj.aa2 = bb;
								cj.idx1 = i;
								cj.idx2 = j;
								over.push_back(cj);
							}				
						}
					}
				}
			}
		}
	}


	for (i=0; i<break_masses.size(); i++)
	{
		int j;
		for (j=other_masses.size()-1; j>=0; j--)
		{
			int a;
			for (a=0; a<session_aas.size(); a++)
			{
				const int& aa = session_aas[a];
				if (aa == Ile || aa == Lys)
					continue;

				if (fabs(break_masses[i] - aa2mass[aa] - other_masses[j])<=tolerance)
				{
					cross_jump cj;
					cj.aa = aa;
					cj.idx1 = i;
					cj.idx2 = j;

					back.push_back(cj);
				}
				else
				{
					if (allow_double)
					{
						int b;
						for (b=0; b<session_aas.size(); b++)
						{
							const int& bb = session_aas[b];
							if (bb == Ile || bb == Lys)
								continue;

							if (fabs(break_masses[i] - aa2mass[aa] - aa2mass[bb]  - other_masses[j])<=tolerance)
							{
								cross_jump cj;
								cj.aa = aa;
								cj.aa2 = bb;
								cj.idx1 = i;
								cj.idx2 = j;
								back.push_back(cj);
							}				
						}
					}
				}
			}
		}
	}

//	if (over.size()==0 || back.size()==0)
//		return;

	if (print && over.size()>0 && back.size() >0)
	{
		int i;
		for (i=0; i<over.size(); i++)
		{
			string aa_label = aa2label[over[i].aa] ;
			mass_t aa_mass = aa2mass[over[i].aa];
			if (over[i].aa2>=0)
			{
				aa_label += aa2label[over[i].aa2];
				aa_mass += aa2mass[over[i].aa2];
			}
			cout << break_masses[over[i].idx1] << " -> " << aa_label << " (" << aa_mass
						 << ") " << " -> " << other_masses[over[i].idx2] << endl;
		}

		for (i=0; i<back.size(); i++)
		{
			string aa_label = aa2label[back[i].aa] ;
			mass_t aa_mass = aa2mass[back[i].aa];
			if (back[i].aa2>=0)
			{
				aa_label += aa2label[back[i].aa2];
				aa_mass += aa2mass[back[i].aa2];
			}
			cout << break_masses[back[i].idx1] << " <- " << aa_label << " (" <<
						aa_mass << ") " << " <- " << other_masses[back[i].idx2] << endl;
		}
	}

	// create graph
	vector<node_mass> nodes;
	vector< vector<bool> > edges;
	vector< int > break_idxs, other_idxs;

	// fill in nodes

	
	for (i=0; i<break_masses.size(); i++)
	{
		node_mass nm;
		nm.idx=i;
		nm.mass = break_masses[i];
		nm.source=0;
		nodes.push_back(nm);
	}

	for (i=0; i<other_masses.size(); i++)
	{
		node_mass nm;
		nm.idx=i;
		nm.mass = other_masses[i];
		nm.source=1;
		nodes.push_back(nm);
	}

	sort(nodes.begin(),nodes.end());
	int num_nodes = nodes.size();

	// fill in edges
	edges.resize(nodes.size());
	for (i=0; i<nodes.size(); i++)
		edges[i].resize(nodes.size(),false);

	// make translation vectors
	break_idxs.resize(length,-1);
	other_idxs.resize(length,-1);
	for (i=0; i<nodes.size(); i++)
		if (nodes[i].source == 0)
		{
			break_idxs[nodes[i].idx]=i;
		}
		else
			other_idxs[nodes[i].idx]=i;

	
	// fill in regular edges
	for (i=0; i<length-1; i++)
	{
		edges[break_idxs[i]][break_idxs[i+1]]=true;
		edges[other_idxs[i+1]][other_idxs[i]]=true;
	}

	// fill in crossover edges
	for (i=0; i<over.size(); i++)
		edges[break_idxs[over[i].idx1]][other_idxs[over[i].idx2]]=true;

	for (i=0; i<back.size(); i++)
		edges[other_idxs[back[i].idx2]][break_idxs[back[i].idx1]]=true;

	// fill forbidden pairs
	vector<int> forbidden_pairs;
	forbidden_pairs.resize(nodes.size(),-1);
	for (i=0; i<break_idxs.size(); i++)
	{
		int n1=break_idxs[i];
		int n2=other_idxs[i];
		forbidden_pairs[n1]=n2;
		forbidden_pairs[n2]=n1;
	}


	// print graph

	if (print)
	{
		for (i=0; i<nodes.size(); i++)
			cout << i << " " << nodes[i].mass << " " << nodes[i].idx <<" (" << nodes[i].source << ")" << endl;

		cout << endl;
		cout << "   ";
		for (i=0; i<nodes.size(); i++)
			cout << i % 10;
		cout << endl;

		for (i=0; i<nodes.size(); i++)
		{
			int j;
			cout << setw(2) << left << i << " ";
			for (j=0; j<nodes.size(); j++)
				if (edges[i][j])
				{
					cout << "+";
				}
				else
					cout << " ";
			cout << endl;
		}
		cout << endl;
	}

	// find number of paths
	int num_paths_found=0;
	vector<bool> used;
	vector<int> out_idxs, prev_idxs;
	out_idxs.resize(num_nodes);
	for (i=0; i<num_nodes; i++)
		out_idxs[i]=i;

	used.resize(num_nodes,false);
	prev_idxs.resize(num_nodes,-1);

	int p=0;
	used[0]=true;
	while (1)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -