⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 config.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 3 页
字号:
				if (aa2 == Ile)
					continue;

				const mass_t mass2 = mass1 + aa2mass[aa2];
				int k;

				for (k=j; k<num_aas; k++)
				{
					const int aa3 = session_aas[k];
					if (aa3 == Ile)
						continue;

					const mass_t mass3 = mass2 + aa2mass[aa3];
					int l;

					for (l=k; l<num_aas; l++)
					{
						const int aa4 = session_aas[l];
						if (aa4 == Ile)
							continue;

						AA_combo ac;
						ac.amino_acids[0]=aa1;
						ac.amino_acids[1]=aa2;
						ac.amino_acids[2]=aa3;
						ac.amino_acids[3]=aa4;
						ac.num_aa=4;
						ac.total_mass+=mass3 + aa2mass[aa4];
						
						aa_combo_by_length[4].push_back(ac);
					}
				}
			}
		}
		sort(aa_combo_by_length[4].begin(),aa_combo_by_length[4].end());
	}

	if (max_edge_length>=5)
	{
		aa_combo_by_length[5].clear();
		for (i=0; i<num_aas; i++)
		{
			int j;
			const int aa1 = session_aas[i];
			const mass_t mass1 = aa2mass[aa1];
			if (aa1 == Ile)
				continue;

			for (j=i; j<num_aas; j++)
			{
				const int aa2 = session_aas[j];
				if (aa2 == Ile)
					continue;

				const mass_t mass2 = mass1 + aa2mass[aa2];
				int k;

				for (k=j; k<num_aas; k++)
				{
					const int aa3 = session_aas[k];
					if (aa3 == Ile)
						continue;

					const mass_t mass3 = mass2 + aa2mass[aa3];
					int l;

					for (l=k; l<num_aas; l++)
					{
						const int aa4 = session_aas[l];
						if (aa4 == Ile)
							continue;

						const mass_t mass4 = mass3 + aa2mass[aa4];
						int p;

						for (p=l; p<num_aas; p++)
						{
							const int aa5 = session_aas[p];
							if (aa5 == Ile)
								continue;

							AA_combo ac;
							ac.amino_acids[0]=aa1;
							ac.amino_acids[1]=aa2;
							ac.amino_acids[2]=aa3;
							ac.amino_acids[3]=aa4;
							ac.amino_acids[4]=aa5;
							ac.num_aa=5;
							ac.total_mass+=mass4 + aa2mass[aa5];
						
							aa_combo_by_length[5].push_back(ac);
						}
					}
				}
			}
		}
		sort(aa_combo_by_length[5].begin(),aa_combo_by_length[5].end());
	}
	
	max_combo_mass = aa_combo_by_length[max_edge_length][aa_combo_by_length[max_edge_length].size()-1].total_mass + 1.0;

	// fill in aa_edge_combos
	aa_edge_combos.clear();
	aa_edge_combos.reserve((int)(1.5 * aa_combo_by_length[max_edge_length].size()));
	for (i=1; i<aa_combo_by_length.size(); i++)
	{
		int j;
		for (j=0; j<aa_combo_by_length[i].size(); j++)
			aa_edge_combos.push_back(aa_combo_by_length[i][j]);	
	}

	sort(aa_edge_combos.begin(),aa_edge_combos.end());

	// make edge variant vector and fill in the variant pointers
	// and make combo_idxs_by_length vectors
	combo_idxs_by_length.resize(max_edge_length+1);
	for (i=0; i<=max_edge_length; i++)
		combo_idxs_by_length[i].clear();

	variant_vector.clear();
	variant_vector.reserve(aa_edge_combos.size()*5);

	for (i=0; i<aa_edge_combos.size(); i++)
	{
		AA_combo& combo = aa_edge_combos[i];
		combo_idxs_by_length[combo.num_aa].push_back(i);

		if (combo.num_aa == 1)
		{
			combo.num_variants = 1;
			combo.variant_start_idx = variant_vector.size();

			variant_vector.push_back(1);
			variant_vector.push_back(combo.amino_acids[0]);
			continue;
		}


		if (combo.num_aa == 2)
		{
			if (combo.amino_acids[0]==combo.amino_acids[1])
			{
				combo.num_variants = 1;
				combo.variant_start_idx = variant_vector.size();

				variant_vector.push_back(2);
				variant_vector.push_back(combo.amino_acids[0]);
				variant_vector.push_back(combo.amino_acids[1]);
				continue;
			}
			else
			{
				combo.num_variants = 2;
				combo.variant_start_idx = variant_vector.size();

				variant_vector.push_back(2);
				variant_vector.push_back(combo.amino_acids[0]);
				variant_vector.push_back(combo.amino_acids[1]);
				variant_vector.push_back(2);
				variant_vector.push_back(combo.amino_acids[1]);
				variant_vector.push_back(combo.amino_acids[0]);
				continue;
			}
		}

		// generate variant using permutation generating function
		vector<int> org_perm;
		vector< vector<int> > all_perms;

		int i;
		org_perm.clear();
		for (i=0; i<combo.num_aa; i++)
			org_perm.push_back(combo.amino_acids[i]);

		generate_all_permutations(org_perm,all_perms);
		combo.num_variants = all_perms.size();
		combo.variant_start_idx = variant_vector.size();

		for (i=0; i<all_perms.size(); i++)
		{
			int j;
			variant_vector.push_back(combo.num_aa);
			for (j=0; j<combo.num_aa; j++)
				variant_vector.push_back(all_perms[i][j]);
		}
		continue;
	}

	// fill the combo_start_idxs
	int last_combo_idx = aa_edge_combos.size()-1;
	mass_t largest_combo_mass = aa_edge_combos[last_combo_idx].total_mass;
	int last_mass_idx = (int)(largest_combo_mass + 1.0);

	combo_start_idxs.resize(last_mass_idx+5,-1);

	for (i=0; i<aa_edge_combos.size(); i++)
	{
		int mass_idx = (int)(aa_edge_combos[i].total_mass);
		if (combo_start_idxs[mass_idx]<0)
			combo_start_idxs[mass_idx]=i;
	}

	// fill in combo idxs for entries with -1
	int previous=-1;
	for (i=0; i<combo_start_idxs.size(); i++)
	{
		if (combo_start_idxs[i]<0)
		{
			combo_start_idxs[i]=previous;
		}
		else
			previous = combo_start_idxs[i];
	}

	
}


const int * Config::get_first_variant_ptr(int combo_idx) const
{ 
	return &variant_vector[aa_edge_combos[combo_idx].variant_start_idx]; 
}


// returns a pointer and number of variants which have masses in the given mass ranges
int Config::get_ptrs_for_combos_in_mass_range(mass_t min_mass, mass_t max_mass, 
												   int& num_combos) const
{
	num_combos = 0;
	int min_idx = (int)(min_mass);

	if (min_idx>=combo_start_idxs.size())
		return -1;

	int combo_idx = combo_start_idxs[min_idx];
	while (combo_idx<aa_edge_combos.size())
	{
		if (aa_edge_combos[combo_idx].total_mass>max_mass)
			return -1;

		if (aa_edge_combos[combo_idx].total_mass>=min_mass)
			break;

		combo_idx++;
	}

	if (combo_idx == aa_edge_combos.size())
		return -1;

	int combos_start_idx = combo_idx;
	while (combo_idx<aa_edge_combos.size())
	{
		if (aa_edge_combos[combo_idx].total_mass>max_mass)
			break;

		combo_idx++;
	}

	num_combos = combo_idx-combos_start_idx;
	
	return combos_start_idx;
}

// returns true if there is a combo that contains the ordered variant of the given aas
bool Config::combos_have_variant(const vector<int>& combos, int num_aa, int *var_aas) const
{
	int c;
	for (c=0; c<combos.size(); c++)
	{
		const AA_combo& combo =  aa_edge_combos[combos[c]];
		int pos = combo.variant_start_idx;
		int v;

		for (v=0; v<combo.num_variants; v++)
			if (variant_vector[pos++]==num_aa)
			{
				int a;
				for (a=0; a<num_aa; a++)
					if (variant_vector[pos+a] != var_aas[a])
						break;
				
				if (a==num_aa)
					return true;

				pos += num_aa;
			}
	}
	return false;
}






/**********************************************************
	calculates the aa_variants vectors (for terminalss and
	amino acids Ala-Val
***********************************************************/
void Config::set_aa_variants()
{
	const vector<int>& org_aas = this->get_org_aa();
	int a;
	aa_variants.clear();
	aa_variants.resize(Val+1);

	aa_variants[N_TERM].push_back(N_TERM);
	aa_variants[C_TERM].push_back(C_TERM);
	aa_variants[Xle].push_back(Xle);

	for (a=0; a<session_aas.size(); a++)
	{
		int aa = session_aas[a];
		int org_aa=org_aas[aa];
		aa_variants[org_aa].push_back(aa);
	}
}


/**********************************************************
Fills the array of amino acid combinations that can be
double edeges. Based on Yingying Huang et al  2003
***********************************************************/
void Config::fill_allowed_double_edges(bool allow_all)
{
	int i;
	int max_aa = Val;
	for (i=0; i<session_aas.size(); i++)
		if (session_aas[i]>max_aa)
			max_aa= session_aas[i];

	int num_aa = max_aa+1;

	allowed_double_edge.clear();
	double_edge_with_same_mass_as_single.clear();

	allowed_double_edge.resize(num_aa);
	double_edge_with_same_mass_as_single.resize(num_aa);

	for (i=0; i<num_aa; i++)
	{
		allowed_double_edge[i].resize(num_aa,allow_all);
		double_edge_with_same_mass_as_single[i].resize(num_aa,false);
	}

	if (tolerance<0.05)
		allow_all=true;


	if (! allow_all)
	{
		for (i=Ala; i<=Val; i++)
		{
			allowed_double_edge[Pro][i]=true;
			allowed_double_edge[Gly][i]=true;
			allowed_double_edge[Ser][i]=true;
		}

	//	allowed_double_edge[Gly][Ala]=false;
		allowed_double_edge[Ser][Ser]=false;
		allowed_double_edge[Ser][Tyr]=false;
		allowed_double_edge[Ser][Pro]=false;
		allowed_double_edge[Ser][His]=false;
		allowed_double_edge[Ser][Gly]=false;
		allowed_double_edge[Ser][Val]=false;



		allowed_double_edge[Thr][Val]=true;
		allowed_double_edge[Thr][Gln]=true;
		allowed_double_edge[Thr][His]=true;
		allowed_double_edge[Thr][Glu]=true;
		allowed_double_edge[Thr][Asp]=true;

		allowed_double_edge[Asn][Asp]=true;
		allowed_double_edge[Asn][Glu]=true;
		allowed_double_edge[Asn][His]=true;
		allowed_double_edge[Asn][Ile]=true;
		allowed_double_edge[Asn][Leu]=true;
		allowed_double_edge[Asn][Met]=true;
		allowed_double_edge[Asn][Asn]=true;
		allowed_double_edge[Asn][Gln]=true;
		allowed_double_edge[Asn][Val]=true;
		allowed_double_edge[Asn][Tyr]=true;

		allowed_double_edge[Tyr][Asp]=true;
		allowed_double_edge[Tyr][Glu]=true;
		allowed_double_edge[Tyr][Thr]=true;

		for (i=Ala; i<=Val; i++)
		{
			allowed_double_edge[i][Lys]=true;
			allowed_double_edge[i][Arg]=true;
		}

		allowed_double_edge[Phe][Glu]=true;

		allowed_double_edge[Ala][Leu]=true;
		allowed_double_edge[Leu][Ala]=true;
		allowed_double_edge[Ala][Ala]=true;
		allowed_double_edge[Trp][Glu]=true;

		// add these double edges, give penalty if they are used as a double edge
		allowed_double_edge[Ala][Gly]=true;
		allowed_double_edge[Gly][Ala]=true;
		allowed_double_edge[Gly][Gly]=true;
		allowed_double_edge[Ala][Asp]=true;
		allowed_double_edge[Asp][Ala]=true;
		allowed_double_edge[Val][Ser]=true;
		allowed_double_edge[Ser][Val]=true;
		allowed_double_edge[Gly][Glu]=true;
		allowed_double_edge[Glu][Gly]=true;

	}	

	double_edge_with_same_mass_as_single[Ala][Gly]=true;
	double_edge_with_same_mass_as_single[Gly][Ala]=true;
	double_edge_with_same_mass_as_single[Gly][Gly]=true;
	double_edge_with_same_mass_as_single[Ala][Asp]=true;
	double_edge_with_same_mass_as_single[Asp][Ala]=true;
	double_edge_with_same_mass_as_single[Val][Ser]=true;
	double_edge_with_same_mass_as_single[Ser][Val]=true;
	double_edge_with_same_mass_as_single[Gly][Glu]=true;
	double_edge_with_same_mass_as_single[Glu][Gly]=true;

	// add fix for PTMs
	const vector<int>& org_aas = session_tables.get_org_aa();
	for (i=0; i<session_aas.size(); i++)
	{
		int j;
		for (j=0; j<session_aas.size(); j++)
		{
			int aa1 = session_aas[i];
			int aa2 = session_aas[j];

			if (org_aas[aa1]>=Ala && org_aas[aa2]>=Ala &&
				allowed_double_edge[org_aas[aa1]][org_aas[aa2]])
				allowed_double_edge[aa1][aa2]=true;
		}
	}
}


/************************************************************
Add new fragments if they do not appear in list
*************************************************************/
void Config::add_fragment_types(const FragmentTypeSet& fts)
{
	int i;
	for (i=0; i<fts.get_fragments().size(); i++)
	{
		all_fragments.add_fragment_type(fts.get_fragment(i));
	}
}



void Config::print_config_parameters(ostream& os) const
{
	int i;

	if (fragments_file.length()>0)
		os << "#CONF FRAGMENTS_FILE " << fragments_file << endl;

	if (regional_fragment_sets_file.length()>0)
		os << "#CONF REGIONAL_FRAGMENT_SETS_FILE " << regional_fragment_sets_file << endl;

	if (aa_combo_file.length()>0)
		os << "#CONF AA_COMBO_FILE " << aa_combo_file << endl;

//	if (resource_dir.length()>0)
//		os << "#CONF RESOURCE_DIR " << resource_dir << endl;


	os << "#CONF MASS_SPEC_TYPE " << mass_spec_type << "(currently only option 1) " << endl;

	os << "#CONF DIGEST_TYPE " << digest_type << " (" << NON_SPECIFIC_DIGEST << " - non specific, " <<
		 TRYPSIN_DIGEST << " - trypsin)" << endl;

	os << "#CONF MAX_NUMBER_OF_PEAKS_PER_LOCAL_WINDOW " << 
		max_number_peaks_per_local_window << endl;

	os << "#CONF NUMBER_OF_STRONG_PEAKS_PER_LOCAL_WINDOW " <<
		number_of_strong_peaks_per_local_window << endl;


	os << "#CONF LOCAL_WINDOW_SIZE " << setprecision(4) << local_window_size << endl;

	os << "#CONF TOLERANCE " << setprecision(4) << tolerance << endl;

	os << "#CONF PM_TOLERANCE " << setprecision(4) << pm_tolerance << endl;

	os << "#CONF MAX_EDGE_LENGTH " << max_edge_length << endl;

	os << "#CONF TERMINAL_SCORE " << terminal_score << endl;

	os << "#CONF DIGEST_SCORE " << digest_score << endl;

	os << "#CONF FORBIDDEN_PAIR_PENALTY " << forbidden_pair_penalty << endl;

	os << "#CONF NEED_TO_CORRECT_PM " << need_to_estimate_pm << " (0 - no, 1 - yes)" << endl;

	os << "#CONF SIZE_THRESHOLDS " << size_thresholds.size() << " ";
	for (i=0; i<size_thresholds.size(); i++)
	{
		int j;
		os << size_thresholds[i].size() << " ";
		for (j=0; j<size_thresholds[i].size(); j++)
			os << fixed << setprecision(4) << size_thresholds[i][j] << " ";
	}
	os << endl;

	os << "#CONF REGION_THRESHOLDS " << region_thresholds.size();
	for (i=0; i<region_thresholds.size(); i++)
		os << " " << setprecision(4) << region_thresholds[i];

	os << endl;

	for (i=0; i<min_ranges.size(); i++)
	{
		os << "#CONF MASS_EXCLUDE_RANGE " << setprecision(3) << min_ranges[i] << " " << max_ranges[i] << endl;
	}
}


/************************************************************
outputs the selected aas for the different regions
*************************************************************/
void Config::print_table_aas(const ConversionTables& table, 
							 const vector<int>& aas) const
{
	int i;
	cout << aas.size() << " amino acids:" << endl;

	for (i=0; i<aas.size(); i++)
		cout << setw(6) << left << aas[i] << setprecision(4) << right << fixed << setw(8) 
			  << table.get_aa2mass(aas[i]) << "   " << left << 
			  table.get_aa2label(aas[i]) << endl;
}

void Config::print_session_aas() const
{
	cout << endl << "AMINO ACIDS" << endl;
	print_table_aas(session_tables,session_aas);
	cout << "N_TERM " << session_tables.get_aa2mass(N_TERM) << endl;
	cout << "C_TERM " << session_tables.get_aa2mass(C_TERM) << endl;
}


void Config::print_aa_variants() const
{
	int i;
	const vector<string>& aa2label = get_aa2label();

	for (i=0; i<aa_edge_combos.size(); i++)
	{
		cout << left << i << " " << aa_edge_combos[i].num_variants << " , " <<
			aa_edge_combos[i].variant_start_idx << "  ";
		int j;

		int pos = aa_edge_combos[i].variant_start_idx;
		cout << "(" << pos << ") ";
		for (j=0; j<aa_edge_combos[i].num_variants; j++)
		{
			int k;
			int num_aa = variant_vector[pos++];
			cout << " ";
			for (k=0; k<num_aa; k++)
				cout << aa2label[variant_vector[pos++]];
		}
		cout << endl;
	}
}



⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -