⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 homeomorphic.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 2 页
字号:
	{
		while (out_idxs[p]<num_nodes)
		{
			if (p == num_nodes -2 )
			{
				// check for forbidden
				
				int i=-1;
				if (check_forbidden)
					for (i=0; i<num_nodes; i++)
						if (used[i] && used[forbidden_pairs[i]])
							break;

				if (!check_forbidden || i == num_nodes)
				{
					if (print)
					{
						int j;
						for (j=0; j<num_nodes; j++)
							if (used[j])
								cout << j << " ";
						cout << endl;
					}

					num_paths_found++;

				//	if (num_paths_found>1)
				//		return num_paths_found;
				}

				break;
			}

			if (! edges[p][out_idxs[p]])
			{
				out_idxs[p]++;
				continue;
			}


			// take this edge
			int old_p=p;
			p=out_idxs[p];
			prev_idxs[p]=old_p;
			used[p]=true;
			out_idxs[old_p]++;
		}

		// back track
		out_idxs[p]=p;
		used[p]=false;
		p=prev_idxs[p];

		if (p<0)
			break;
	}
	
	if (print)
		cout << "NUM PATHS = " << num_paths_found << endl;

	return num_paths_found;
}


void random_check_homemorphic(Config *config, int num_peptides, int peptide_length)
{
	const vector<mass_t>& aa2mass = config->get_aa2mass();

	int i,l;

	for (l=5; l<=peptide_length; l++)
	{
		int sum=0;

		for (i=0; i<num_peptides; i++)
		{
			Peptide p;
			p.generate_random_peptide(config,l);
			
		//	cout << p.as_string(config) << endl;
			
		
			if (get_num_paths(config,p,config->get_tolerance(),false,false,false)>1)
				sum++;
		//	cout << endl;
		}

		cout << l << " " << setprecision(4) << (double) sum/(double)num_peptides << endl;
	}
}


void homeomorphic_exp1(Config *config, int num_peptides)
{
	int i,peptide_length = 10;
	mass_t tol;
	vector<mass_t> tolerances;
	for (tol=0; tol<=0.101; tol += 0.01)
		tolerances.push_back(tol);
	tolerances.push_back(0.25);
	tolerances.push_back(0.5);
	tolerances.push_back(0.75);

	int t;
	for (t=0; t<tolerances.size(); t++)
	{
		tol = tolerances[t];
		double sum_tf=0, sum_tt=0, sum_ff=0, sum_ft=0;
		double num_tf=0, num_tt=0, num_ff=0, num_ft=0;
		for (i=0; i<num_peptides; i++)
		{
			Peptide p;
			p.generate_random_peptide(config,peptide_length);
		
			double tf = get_num_paths(config,p,tol,true,false,false);
			double tt = get_num_paths(config,p,tol,true,true,false);
		//	double ff = get_num_paths(config,p,tol,false,false,false);
		//	double ft = get_num_paths(config,p,tol,false,true,false);

			if (tf>1)
			{
				num_tf++;
				sum_tf+=tf;
			}

			if (tt>1)
			{
				num_tt++;
				sum_tt+=tt;
			}

		//	if (ff>1)
		//	{
		//		num_ff++;
		//		sum_ff+=ff;
		//	}

		//	if (ft>1)
		//	{
		//		num_ft++;
		//		sum_ft+=ft;
		//	}
		}
		sum_tf /= num_tf;
		sum_tt /= num_tt;
	//	sum_ff /= num_ff;
	//	sum_ft /= num_ft;

		num_tf /= num_peptides;
		num_tt /= num_peptides;
	//	num_ff /= num_peptides;
	//	num_ft /= num_peptides;

	//	cout << setw(5) << left << setprecision(4) << tol << " " << 
	//		 setw(5) << left << num_tf << " " << setw(5) << left << sum_tf <<
	//		" " << setw(5) << left << num_ff << " " << setw(5) << left << sum_ff << " | ";

		cout << setw(5) << left << setprecision(4) << tol << " " << 
			 setw(5) << left << num_tf << " " << setw(5) << left << sum_tf <<
			" " << setw(5) << left << num_tt << " " << setw(5) << left << sum_tt << endl;
	}
}



void homeomorphic_exp2(Config *config, int num_peptides)
{
	int i,a;
	mass_t tol=0.5;
	vector<int> peptide_lengths;
	for (i=5; i<=32; i+=3)
		peptide_lengths.push_back(i);

//	peptide_lengths.push_back(8);
//	peptide_lengths.push_back(17);
//	peptide_lengths.push_back(26);
	
	tol = 0.5;
	for (a=0; a<peptide_lengths.size(); a++)
	{
		double sum_tf=0, sum_tt=0, sum_ff=0, sum_ft=0;
		double num_tf=0, num_tt=0, num_ff=0, num_ft=0;
		for (i=0; i<num_peptides; i++)
		{
			Peptide p;
			p.generate_random_peptide(config,peptide_lengths[a]);
		
		//	double tf=0;
		//	double ff=0;
			double tf = get_num_paths(config,p,tol,true,false,false);
			double tt = get_num_paths(config,p,tol,true,true,false);
		//	double ff = get_num_paths(config,p,tol,false,false,false);
		//	double ft = get_num_paths(config,p,tol,false,true,false);

		
			if (tf>1)
			{
				num_tf++;
				sum_tf+=tf;
			}

			if (tt>1)
			{
				num_tt++;
				sum_tt+=tt;
			}

		//	if (ff>1)
		//	{
		//		num_ff++;
		//		sum_ff+=ff;
		//	}

		//	if (ft>1)
		//	{
		//		num_ft++;
		//		sum_ft+=ft;
		//	}
		}
		sum_tf /= num_tf;
		sum_tt /= num_tt;
	//	sum_ff /= num_ff;
	//	sum_ft /= num_ft;

		num_tf /= num_peptides;
		num_tt /= num_peptides;
	//	num_ff /= num_peptides;
	//	num_ft /= num_peptides;

	//	cout << setw(5) << left << peptide_lengths[a] << " " << 
	//		 setw(5) << left << num_tf << " " << setw(5) << left << sum_tf <<
	//		" " << setw(5) << left << num_ff << " " << setw(5) << left << sum_ff << " | ";

		cout << setw(5) << left << peptide_lengths[a] << " " << 
			 setw(5) << left << num_tf << " " << setw(5) << left << sum_tf <<
			" " << setw(5) << left << num_tt << " " << setw(5) << left << sum_tt << endl;

	//	break;
	}
}



void homeomorphic_exp3(Config *config, int num_peptides)
{
	int i,peptide_length = 10;
	mass_t tol=0.1;
	
	for (i=0; i<num_peptides; i++)
	{
		Peptide p;
		p.generate_random_peptide(config,6);
		

		if (get_num_paths(config,p,tol,true,false,false)>1)
		{
			cout << p.as_string(config) << endl;
			get_num_paths(config,p,tol,true,false,true);
			cout << endl;
		}
		
	}
}




void read_fasta(char *file_name, int **int_array, int *total_size, Config *con)
{
	char buff[1024];
	int file_size;
	int seq_p=0;
	int *arr;

	const vector<int>& char2aa = con->get_char2aa();


	ifstream file (file_name, ios::in|ios::ate);
	if (file.is_open())
	{
		file_size = file.tellg();
		file.seekg (0, ios::beg);
	}
	else
	{
		cout << "Error: reading!"<< file_name << endl;
		exit(1);
	}



	// allocate sequence memory
	arr = new int[file_size];
	if (int_array == NULL)
	{
		printf("Couldn't allocate memory for sequences!\n");
		exit(1);
	}

	// add sequence terminatng symbol -1
	// before first sequence

	file.getline(buff,1024);
	while(1)
	{
		if (buff[0] != '>')
		{
			file.getline(buff,1024);
			if (! file.good())
				break;

			continue;
		}

		// push the protein name
	

		// read protein sequence
		while ( file.getline(buff,1024) )
		{
			if (buff[0] == '>' || ! file.good())
				break;

			int i,len=strlen(buff);

			for (i=0; i<len; i++)
			{
				if (buff[i]>= 'A' && buff[i]<'Z')
					arr[seq_p]=char2aa[buff[i]];
				if (arr[seq_p]==Gln)
					arr[seq_p]=Lys;
				if (arr[seq_p]==Ile)
					arr[seq_p]=Leu;

				if (arr[seq_p]<Ala || arr[seq_p]>Val)
					seq_p--;

				seq_p++;
			}
		}
	}

	*total_size = seq_p;
	*int_array = arr;

//	int i;
//	for (i=0; i<100; i++)
//		cout << arr[i] << " ";
//	cout << endl;
	cout << "Read fasta file:   " << file_name << endl;
	cout << "Total amino acids: " << seq_p <<endl;
}



struct hpeptide {

	void calc_pep_masses_with_missing_cleavages(int *_addr, int _len, 
								Config *config, int num_miss);

	void calc_pep_masses(int *_addr, int _len, Config *config)
	{
		const vector<mass_t>& aa2mass = config->get_aa2mass();
		addr=_addr;
		len=_len;
		
		const int max_len = (len*2)+2;
		masses.resize(max_len);
		
		int i;

		masses[0]=0;
		for (i=1; i<=len; i++)
			masses[i]=masses[i-1]+aa2mass[*_addr++];
		
		masses[i++]=MASS_H2O;
		for ( ; i< max_len; i++)
			masses[i]=masses[i-1]+aa2mass[*(--_addr)];

		sort(masses.begin(),masses.end());

	/*	cout << len << " : " ;
		for (i=0; i<masses.size(); i++)
			cout << masses[i] << " ";
		cout<<endl;*/
	}


	int find_delta(const hpeptide& other, mass_t tolerance) const
	{
		int count =0;
		int i,j=0;
	
		for (i=0; i<masses.size(); i++)
		{
			mass_t min_mass = masses[i]-tolerance;
			mass_t max_mass = masses[i]+tolerance;

			while (j<other.masses.size() && other.masses[j]<min_mass)
				j++;

			if (j<other.masses.size() && other.masses[j] <= max_mass)
				count++;
		}

		return (masses.size() - count)/2;
	}

	void print(Config *config)
	{
		const vector<string>& aa2label = config->get_aa2label();
		int i;
		for (i=0; i<len; i++)
			cout << aa2label[addr[i]];

		cout << "  " << masses.size() << ": " ;
		for (i=0; i<masses.size(); i++)
			cout << " " << masses[i];
		cout << endl;
	}

	int *addr;
	int len;
	vector<mass_t> masses;
};


// removes a certain number of cleavages at random
void hpeptide::calc_pep_masses_with_missing_cleavages(int *_addr, int _len, 
								Config *config, int num_miss)
{
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	addr=_addr;
	len=_len;
		
	const int max_len = (len*2)+2;
	masses.clear();
	masses.reserve(max_len-2*num_miss);

	if (len-num_miss<2)
	{
		calc_pep_masses_with_missing_cleavages(_addr,_len,config,num_miss-1);
		return;
	}

	vector<int> skip_flags; // mask which cleavages should be skipped
	skip_flags.resize(len+1,0);

	int i;

	if (num_miss>0)
	{
	//	cout << "miss ";
		for (i=0; i<num_miss; i++)
		{
			int idx=(int)(my_random()*(len-1))+1;

			if (!skip_flags[idx])
			{
				skip_flags[idx]=1;
		//		cout << " " << idx ;
			}
			else
				idx--;
		}
	//	cout << endl;
	}

	masses.push_back(0);
	mass_t previous=0;

	for (i=1; i<=len; i++)
	{
		previous+=aa2mass[*_addr++];
		if (skip_flags[i])
			continue;
		masses.push_back(previous);
	}
		
	previous+=MASS_H2O;

	const int size=masses.size();
	for (i=0; i<size; i++)
		masses.push_back(previous-masses[i]);

	sort(masses.begin(),masses.end());
}



int homeomorphic_levels(Config *config, int *int_array, int max_length, mass_t min_mass,
						 mass_t max_mass, int num_missing_cleavages,  char *res_file, vector<int>& total_counts,
						 vector< vector<int> >& md_counts)
{
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	vector<hpeptide> peps;
	int max_idx = max_length - 40;
	int i;

//	ofstream fs(res_file,ios::out|ios::app);

	peps.reserve(300000);
	peps.clear();

	total_counts.resize(100,0);
	md_counts.resize(100);
	for (i=0; i<md_counts.size(); i++)
		md_counts[i].resize(100,0);

	// simple fill all peptides

	for (i=0; i<max_idx; i++)
	{
		int *org_addr = int_array+i;
		int *addr= org_addr;
		mass_t m=0;

		while (m<min_mass)
			m+=aa2mass[*addr++];

		if (m>max_mass)
			continue;

		hpeptide pep;
		pep.calc_pep_masses(org_addr,addr-org_addr,config);
		peps.push_back(pep);
	}

	if (peps.size()<2)
		return 0;

	for (i=0; i<10000; i++)
	{
		int min_dis=999999;
		int j;

		int p_idx= (int)(my_random()*peps.size());

		hpeptide p = peps[p_idx];
		p.calc_pep_masses_with_missing_cleavages(p.addr,p.len,config,num_missing_cleavages);

	//	p.print(config);
	//	peps[p_idx].print(config);

		vector<int> counts;
		counts.resize(peps[p_idx].len,0);


		for (j=0; j<peps.size(); j++)
		{
			if (p_idx==j)
				continue;

			int dis = p.find_delta(peps[j],config->get_tolerance());
			counts[dis]++;

			if (dis < min_dis)
			{
				// check if this is the same peptide
				if (dis == 0)
				{
					if (peps[p_idx].len == peps[j].len)
					{
						int k;
						for (k=0; k<peps[p_idx].len; k++)
							if (peps[p_idx].addr[k] != peps[j].addr[k])
								break;

						if (k==peps[p_idx].len)
						{
							counts[0]--;
							continue;
						}
					}
				}

				min_dis = dis;

				if (dis<1)
				{

				//	if (peps[j].find_delta(peps[p_idx],0.5)<1)
				//	{
						cout << "d("<<p_idx<<","<<j<<")= "<<dis << endl;
						p.print(config);
						peps[j].print(config);
						cout << endl;
				//	}
				}
			}

			total_counts[peps[p_idx].len]++;
			md_counts[peps[p_idx].len][min_dis]++;
		}

	//	fs << peps[p_idx].len << " " << peps.size() << " ";
	//	for (j=0; j<counts.size(); j++)
	//		fs << " " << counts[j];
	//	fs << endl;


	//	cout << endl;
	}

//	fs.close();
	return 1;

}


void full_exp(Config *config, int num_missing, int *int_array, int max_length, char *res_file)
{
	mass_t mass = 700;
	const int max_aa = 50;
	int i;

	vector<int> total_counts; // aa
	vector< vector<int> > min_dis_counts; // aa, dis

	total_counts.resize(max_aa,0);
	min_dis_counts.resize(max_aa);
	for (i=0; i<min_dis_counts.size(); i++)
		min_dis_counts[i].resize(max_aa,0);

	mass_t tol = config->get_tolerance() * 2;
	while (mass<2200)
	{
		vector<int> counts;
		vector< vector<int> > md_counts;
		
		mass += (my_random()*99);
		if (! homeomorphic_levels(config, int_array, max_length , mass-tol, mass+tol, num_missing, res_file,
							counts, md_counts) )
			continue;

		int j;
		for (i=0; i<max_aa; i++)
		{
			total_counts[i]+=counts[i];
			for (j=0; j<max_aa; j++)
				min_dis_counts[i][j]+=md_counts[i][j];
		}
	}

	// print results
	ofstream fs(res_file,ios::out|ios::app);
	for (i=4; i<30; i++)
	{
		if (i != 7 && i != 14 && i != 21)
			continue;

		if (i> 21 && total_counts[i]==0)
			break;

		fs << i << " ";
		int j;
		for (j=0; j<=i; j++)
			fs << setw(5) << (double)min_dis_counts[i][j]/(double)total_counts[i] << "\t";
		fs << endl;
	}

	fs.close();
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -