⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 config.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 3 页
字号:
#include "Config.h"
#include "auxfun.h"



void Config::init_with_defaults()
{
	int i;

	mass_spec_type=ESI_MASS_SPEC; // default type
	config_file = "";
	model_name = "";

	ind_read_PTM_file = false;

	max_n_term_mod = 0;
	max_c_term_mod = 0;
	min_n_term_mod = 0;
	min_c_term_mod = 0; 

	tolerance = 0.5;
	pm_tolerance = 2.5;

	local_window_size=200;
	max_number_peaks_per_local_window=15;
	number_of_strong_peaks_per_local_window=10;
//	random_prob = 0.1;

	max_combo_mass = 0;

	max_edge_length = 2;

	max_charge_for_size=10;       // the size  of size_thresholds

	set_digest_type(0);

	need_to_estimate_pm = 1;

	use_spectrum_charge = 0;

	use_spectrum_mz = 0;

	filter_flag = 1;

	need_to_normalize = 1;

	itraq_mode = 0;

	terminal_score = 10;

	digest_score = 10;

	forbidden_pair_penalty = 25;
	strong_type1_idx =-1;
	strong_type2_idx =-1;

	
	min_ranges.clear();
	max_ranges.clear();
	min_exclude_range=999999;
	max_exclude_range=NEG_INF;

	init_standard_aas();

	session_aas = standard_aas;

	init_model_size_and_region_thresholds();

	int c;
	for (c=max_charge_for_size; c>=0; c--)
		init_regional_fragment_set_defaults(0,c);

	// these all operate on the original aas
	session_tables.init_for_standard_aas();

	// insert labels of original aas
	label2aa.clear();
	const vector<string>& aa2label = get_aa2label();
	for (i=0; i<aa2label.size(); i++)
		label2aa.insert(STRING2INT_MAP::value_type(aa2label[i],i));

	calc_aa_combo_masses();
	set_aa_variants();
	fill_allowed_double_edges();

	init_allowed_node_masses();

	char PTM_file[256];
	strcpy(PTM_file,get_resource_dir().c_str());
	strcat(PTM_file,"/");
	strcat(PTM_file,"PepNovo_PTMs.txt"); 

	read_PTM_file(PTM_file);

//	all_fragments.print();
}





void Config::init_standard_aas()
{
	int i;
	standard_aas.clear();
	for (i=Ala; i<=Val; i++)
		standard_aas.push_back(i);
}


void Config::init_model_size_and_region_thresholds()
{


	size_thresholds.clear();
	size_thresholds.resize(max_charge_for_size+1);

	// region thresholds
	region_thresholds.resize(2);
	region_thresholds[0]=0.225;
	region_thresholds[1]=0.775;
}



void Config::add_exclude_range(mass_t min_range, mass_t max_range)
{
	if (min_range>=max_range)
		return;

	this->min_ranges.push_back(min_range);
	this->max_ranges.push_back(max_range);
	if (this->min_exclude_range>min_range)
		min_exclude_range = min_range;

	if (this->max_exclude_range<max_range)
		max_exclude_range = max_range;
}


/********************************************************
Sets the diegest type.
Assumes that each digest type has a set of N or C terminal
preferred amino acids
*********************************************************/
void Config::set_digest_type(int type)
{
	digest_type = type;
	n_term_digest_aas.clear();
	c_term_digest_aas.clear();

	if (digest_type == NON_SPECIFIC_DIGEST)
		return;

	if (digest_type == TRYPSIN_DIGEST)
	{
		c_term_digest_aas.push_back(Lys);
		c_term_digest_aas.push_back(Arg);
		return;
	}

	cout << "Error: digest type not supported: " << type << endl;
	exit(1);
}

/*********************************************************
Inits the regional fragment sets (resizes according to the
charges, size_idxs, and region_idxs). If set_type = 0, 
each region is given all fragments that are relevant.
If set type = 0, uses all
**********************************************************/
void Config::init_regional_fragment_set_defaults(int set_type, int charge)
{
	const int num_regions = region_thresholds.size()+1;

	if (max_charge_for_size <= charge)
	{
		max_charge_for_size = charge;
		regional_fragment_sets.resize(max_charge_for_size+1);
	}

	regional_fragment_sets[charge].resize(size_thresholds[charge].size());
	int j;
	for (j=0; j<size_thresholds[charge].size(); j++)
	{
		int k;
		regional_fragment_sets[charge][j].resize(num_regions);
		for (k=0; k<num_regions; k++)
		{
			if (set_type == 0)
			{
				regional_fragment_sets[charge][j][k].init_with_all_types(charge,this);
			}
			else
			{
				cout << "Error: bad option for init!"<< endl;
				exit(1);
			}
		}
	}
}



// selects the fragments to be used, uses a cutoff that is X times random prob)
void Config::select_fragments_in_sets(score_t min_prob_coef, int max_num_frags)
{
	int c;
	for (c=1; c<regional_fragment_sets.size(); c++)
	{
		int s;
		for (s=0; s<regional_fragment_sets[c].size(); s++)
		{
			int r;
			
			for (r=0; r<regional_fragment_sets[c][s].size(); r++)
			{
				float min_prob = min_prob_coef * get_regional_random_probability(c,s,r);
				regional_fragment_sets[c][s][r].select_fragments_with_minimum_prob(min_prob,
					max_num_frags);
			}
		}
	}
}





// For each regional fragments selects all fragments that have a minimal probability
// to be strong.
// also look for the two strongest fragment types and store them in the
// strong_type1_idx and strong_type2_idx variables
void Config::select_strong_fragments(int charge,
						score_t min_prob, int max_num_strong, bool verbose)
{
	const vector<FragmentType>& all_fragment_types = get_all_fragments();
	
	int s;
	for (s=0; s<regional_fragment_sets[charge].size(); s++)
	{
		int r;
		for (r=0; r<regional_fragment_sets[charge][s].size(); r++)
		{
			regional_fragment_sets[charge][s][r].select_strong_fragments(this,min_prob,max_num_strong);
			const vector<int>& strong_type_idxs = regional_fragment_sets[charge][s][r].get_strong_frag_type_idxs();

			// see if these frags should be put in the strong two fragments
			int i;
			for (i=0; i<strong_type_idxs.size(); i++)
			{
				int frag_idx = strong_type_idxs[i];

				if (strong_type1_idx == -1)
				{
					strong_type1_idx = frag_idx;
					continue;
				}

				if (strong_type2_idx == -1 &&
					frag_idx != strong_type1_idx)
				{
					strong_type2_idx = frag_idx;
					continue;
				}

				// make sure that type2 has the lower probability
				if (all_fragment_types[strong_type1_idx].prob<
					all_fragment_types[strong_type2_idx].prob)
				{
					int tmp;
					tmp=strong_type1_idx;
					strong_type1_idx=strong_type1_idx;
					strong_type1_idx=tmp;
				}
				if (all_fragment_types[frag_idx].prob>
					all_fragment_types[strong_type2_idx].prob)
				{
					strong_type2_idx = frag_idx;
				}
			}
		}
	}	
}

// returns the region idx for a (breakge) mass
int	 Config::calc_region_idx(mass_t break_mass, mass_t pm_with_19, int charge,
					 mass_t min_peak_mass, mass_t max_peak_mass) const
{
//	return 0;

	const int n_threshes = region_thresholds.size();
	int i;
	vector<mass_t> threshes;
	threshes.resize(region_thresholds.size());
	for (i=0; i<region_thresholds.size(); i++)
		threshes[i] = pm_with_19 * region_thresholds[i];

	if (charge<=2)
	{
		mass_t alt_first_thresh = (min_peak_mass>0) ? min_peak_mass + 150 : threshes[0];
		mass_t alt_last_thresh =  (max_peak_mass>0) ? max_peak_mass - 150 : threshes[n_threshes-1];
		
		if (break_mass<threshes[0] || break_mass<alt_first_thresh)
			return 0;

		if (break_mass>threshes[n_threshes-1] || break_mass>alt_last_thresh)
			return n_threshes;


		for (i=1; i<n_threshes; i++)
			if (break_mass<threshes[i])
				break;
		return i;
	}
	else
	{
		if (break_mass<threshes[0])
			return 0;

		if (break_mass>threshes[n_threshes-1])
			return n_threshes;


		for (i=1; i<n_threshes; i++)
			if (break_mass<threshes[i])
				break;
		return i;
	}
}


void Config::set_size_thresholds_according_to_set_of_masses(int charge,
											vector<mass_t>& spectra_masses)
{
	const int num_spectra = spectra_masses.size();

	if (size_thresholds.size()<=charge)
		size_thresholds.resize(charge+1);

	size_thresholds[charge].clear();

	if (num_spectra<1500)
		return;

	sort(spectra_masses.begin(),spectra_masses.end());

	if (num_spectra<3000)
	{
		int idx = num_spectra/2;
		size_thresholds[charge].push_back(spectra_masses[idx]);
		return;
	}

	if (num_spectra<10000)
	{
		// use 3 sizes
		
		int idx1 = num_spectra/3;
		int idx2 = idx1+idx1;

		size_thresholds[charge].push_back(spectra_masses[idx1]);
		size_thresholds[charge].push_back(spectra_masses[idx2]);
	}

	// for large datasets of spectra, use the following rule
	const mass_t min_pm_diff = 200.0;
	const int    min_num_spectra_per_model = 25000;
	const int	 num_models = spectra_masses.size()/min_num_spectra_per_model;

	if (num_models <=1)
	{
		size_thresholds[charge].clear();
		size_thresholds[charge].push_back(POS_INF);
		return;
	}

	int i=0;
	int prev_i=0;
	int next_idx = min_num_spectra_per_model;
	mass_t last_mass =0;

	size_thresholds[charge].clear();

	while (i<spectra_masses.size())
	{
		if (spectra_masses[i]-last_mass>min_pm_diff && 
			i>= next_idx)
		{
			size_thresholds[charge].push_back(spectra_masses[i]);
			cout << "charge " << charge << "\t size " << size_thresholds[charge].size() << " \tmasss " << 
				setprecision(1) << fixed << size_thresholds[charge][size_thresholds[charge].size()-1] << 
				"\t  (spectra " << i-prev_i << ")" << endl;

			last_mass = spectra_masses[i];
			prev_i = i;

			int md_count = size_thresholds[charge].size();
			if (md_count == num_models -1 || spectra_masses.size() - i < 1.5*min_num_spectra_per_model)
				break;
			
			next_idx = i + (spectra_masses.size() - i)/(num_models-md_count);
	
		}
		i++;
	}

	size_thresholds[charge].push_back(POS_INF);
	
	cout << "charge " << charge << "\t size " << size_thresholds[charge].size() << 
		" \tmasss " << setprecision(1) << fixed << size_thresholds[charge][size_thresholds[charge].size()-1] <<
		"\t  (spectra " << spectra_masses.size()-prev_i << ")" << endl;
	
}

void Config::print_size_thresholds() const
{
	int charge;
	for (charge=0; charge<size_thresholds.size(); charge++)
	{
		cout << charge << "\t" << size_thresholds[charge].size();
		int t;
		for (t=0; t<size_thresholds[charge].size(); t++)
			cout << "\t" << size_thresholds[charge][t];
		cout << endl;
	}
}


int Config::determine_charge(Peptide& pep, mass_t m_over_z) const
{
	int c;
	pep.calc_mass(this);
	mass_t pep_mass_with_19 = pep.get_mass()+MASS_OHHH;

	for (c=1; c<10; c++)
	{
		mass_t calc_mass_with_19 = m_over_z * c - (1.0023 * (c-1));
		if (fabs(calc_mass_with_19- pep_mass_with_19)<10)
			return c;
	}
	return -1;
}


int	 Config::get_max_session_aa_idx() const
{
	const vector<int>& aas = get_session_aas();
	int max_aa=-1;
	int i;
	for (i=0; i<aas.size(); i++)
		if (aas[i]>max_aa)
			max_aa=aas[i];

	return max_aa;
}



/***********************************************************
// returns the idx of an aa from its label
// -1 if label is not found
***********************************************************/
int Config::get_aa_from_label(const string& label) const
{
	STRING2INT_MAP::const_iterator iter = label2aa.find(label);

	if (iter == label2aa.end())
		return -1;

	return (*iter).second;
}

int Config::get_aa_with_position_from_label(const string& label, int position) const
{
	STRING2INT_MAP::const_iterator iter;
	for (iter = label2aa.begin();iter != label2aa.end(); iter++)
	{
		int aa_idx = (*iter).second;
		if (session_tables.get_aa_position(aa_idx) != position ||
			label[0] != (*iter).first[0])
			continue;

		if (label == session_tables.get_aa2label(aa_idx))
			return aa_idx;
	}


	return -1;
}


void Config::print_fragments(ostream &os) const
{
	int i;
	os << "#ALL FRAGMENTS " << all_fragments.fragments.size() << endl;
	for (i=0; i<all_fragments.fragments.size(); i++)
		all_fragments.fragments[i].write_fragment(os);
}


void Config::print_regional_fragment_sets(ostream &os) const
{
	os << "#FRAGMENT SETS" << endl;

	int c;
	for (c=regional_fragment_sets.size()-1; c>=0; c--)
	{
		int s;
		for (s=regional_fragment_sets[c].size()-1; s>=0; s--)
		{
			int r;
			for (r=regional_fragment_sets[c][s].size()-1; r>=0; r--)
			{
				int f;
				const vector<int>& frag_idxs = regional_fragment_sets[c][s][r].get_frag_type_idxs();

				if (frag_idxs.size()>0)
				{
					os << c << " " << s << " " << r << " " << frag_idxs.size() << " " <<
						setprecision(5) << regional_fragment_sets[c][s][r].get_rand_prob() << endl;
					int i;

					// write fragments
					for (f=0; f<frag_idxs.size(); f++)
					{
						os << left << setw(9) << all_fragments.get_fragment(frag_idxs[f]).label 
							<< " " << setprecision(4) << regional_fragment_sets[c][s][r].get_frag_prob(f)<< "  " << endl;
						//	setprecision(5) << all_fragments.get_fragment(frag_idxs[f]).offset << endl;
					}

					// write strong
					const vector<int>& strong = regional_fragment_sets[c][s][r].get_strong_frag_type_idxs();
					os << "strong " << strong.size();
					for (i=0; i<strong.size(); i++)
						os << " " << get_fragment(strong[i]).label;
					os << endl;

					// write combo
					const vector<FragmentCombo>& combos = regional_fragment_sets[c][s][r].get_frag_type_combos();
					os << "combos " << combos.size() << endl;
					for (i=0; i<combos.size(); i++)
						combos[i].print_combo(this,os);

					os << endl;
				}
			}
		}
	}
}





/************************************************************
Reads the fragments from the stream
*************************************************************/
void Config::read_fragments(istream& is)
{
	int i,num_frags=-1;
	char buff[128];
	is.getline(buff,128);

	if (sscanf(buff,"#ALL FRAGMENTS %d",&num_frags) != 1)
	{
		cout << "Error: bad line in fragments file: " << buff << endl;

		if (strlen(buff)<4)
		{
			cout << "This error can possibly be due to Wnix/Windows issues. Try running dos2unix on all \".txt\" files in the Models directory." << endl;
		}
		exit(1);
	}

	all_fragments.clear_set();
	all_strong_fragment_type_idxs.clear();
	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -