⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 peakrankmodel.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 4 页
字号:
#include "PeakRankModel.h"
#include "RegularRankModel.h"
#include "auxfun.h"

const char* mobility_labels[]={"","Mobile","Partially Mobile","Non-Mobile"};

extern const int num_RKH_combos;
extern const int num_RKH_pairs;
extern const string combos[];






/**********************************************************************
Categorizes the peptides as mobile,partially mobile, and non-mobile
according to Wysocki 2000, and Kapp 2003.
***********************************************************************/
int get_proton_mobility(const Peptide& pep, int charge)
{
	const vector<int>& amino_acids = pep.get_amino_acids();
	size_t i;

	int num_arg=0;
	int num_his=0;
	int num_lys=0;
	for (i=0; i<amino_acids.size(); i++)
	{
		if (amino_acids[i]==Arg)
			num_arg++;
		if (amino_acids[i]==Lys)
			num_lys++;
		if (amino_acids[i]==His)
			num_his++;
	}
	if (num_arg+num_his+num_lys<charge)
		return MOBILE;
	if (num_arg>=charge)
		return NONMOBILE;
	return PARTIALLYMOBILE;
}


/**************************************************************
***************************************************************/
int PeptideSolution::calc_mobility() const
{
	const vector<int>& amino_acids = pep.get_amino_acids();
	int numH=0,numK=0,numR=0;
	int i;
	for (i=0; i<amino_acids.size(); i++)
	{
		if (amino_acids[i] == His)
			numH++;

		if (amino_acids[i] == Lys)
			numK++;

		if (amino_acids[i] == Arg)
			numR++;
	}
	if (this->most_basic_aa_removed_from_n == His)
		numH++;
	if (this->most_basic_aa_removed_from_n == Lys)
		numK++;
	if (this->most_basic_aa_removed_from_n == Arg)
		numR++;
	if (this->most_basic_aa_removed_from_c == His)
		numH++;
	if (this->most_basic_aa_removed_from_c == Lys)
		numK++;
	if (this->most_basic_aa_removed_from_c == Arg)
		numR++;

	return get_proton_mobility(this->charge,numR,numH,numK);
	
}

int get_proton_mobility(int charge, int num_arg, int num_his, int num_lys)
{
	if (num_arg+num_his+num_lys<charge)
		return MOBILE;
	if (num_arg>=charge)
		return NONMOBILE;
	return PARTIALLYMOBILE;
}


struct inten_pair {
	inten_pair() : idx(int(NEG_INF)), inten(int(NEG_INF)) {};
	inten_pair(int _i, float _n) : idx(_i), inten(_n) {};
	bool operator< (const inten_pair& other) const
	{
		return inten>other.inten;
	}
	int idx;
	float inten;
};

/***********************************************************************
calculates the relative rank of the cuts. If intensity is 0, the rank 
999 is given (rank[0] also is 999)
************************************************************************/
void calc_cut_ranks(const vector<float>& intens, vector<int>& cut_ranks)
{
	vector<inten_pair> pairs;
	int i;
	for (i=1; i<intens.size(); i++)
		if (intens[i]>0)
			pairs.push_back(inten_pair(i,intens[i]));
	sort(pairs.begin(),pairs.end());
	
	cut_ranks.clear();
	cut_ranks.resize(intens.size(),999);
	for (i=0; i<pairs.size(); i++)
		cut_ranks[pairs[i].idx]=i;
}

void convert_seq_path_to_peptide_soluition(Config *config, const SeqPath& seq_path, PeptideSolution& sol)
{
	vector<int> aas;
	seq_path.get_amino_acids(aas);

	sol.pep.set_peptide_aas(aas);
	sol.pep.set_n_gap(seq_path.n_term_mass);
	sol.pep.calc_mass(config);

	sol.pm_with_19 = seq_path.pm_with_19;
	sol.charge = seq_path.charge;
	if (sol.charge==0)
	{
		cout << "Error: must supply seq path with charge other than 0!" << endl;
		exit(1);
	}
	sol.reaches_n_terminal = (sol.pep.get_n_gap()<1.0);
	sol.reaches_c_terminal = (sol.pep.get_mass_with_19() + 15.0 > sol.pm_with_19);

	if (sol.reaches_n_terminal && sol.reaches_c_terminal)
		sol.pm_with_19 = sol.pep.get_mass_with_19();
}

void convert_seq_path_to_peptide_soluition_and_fill_in_aas(Config *config, 
														   const Peptide& correct_pep,
														   const SeqPath& seq_path, 
														   PeptideSolution& sol)
{
	const mass_t true_mass_with_19 = correct_pep.get_mass_with_19();
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	const vector<int>& correct_aas = correct_pep.get_amino_acids();
	vector<int> full_set_of_aas;
	vector<int> aas;
	seq_path.get_amino_acids(aas);

	full_set_of_aas.clear();
	if (seq_path.n_term_mass>1.0)
	{
		mass_t n_mass=0;
		int i;
		for (i=0; i<correct_aas.size(); i++)
		{
			if (fabs(n_mass-seq_path.n_term_mass)<3)
				break;
			n_mass+=aa2mass[correct_aas[i]];
			full_set_of_aas.push_back(correct_aas[i]);
		}
		if (i==correct_aas.size())
		{
			cout << "Error: seq_path doesn't have correct starting node!" << endl;
			cout << correct_pep.as_string(config) << endl;
			seq_path.print_full(config);
			exit(1);
		}
	}

	int i;
	for (i=0; i<aas.size(); i++)
		full_set_of_aas.push_back(aas[i]);

	if (fabs(seq_path.c_term_mass+19-true_mass_with_19)>5)
	{
		mass_t n_mass=0;
		int i;
		for (i=0; i<correct_aas.size(); i++)
		{
			if (fabs(n_mass-seq_path.c_term_mass)<5)
				break;
			n_mass+=aa2mass[correct_aas[i]];
		}
		if (i==correct_aas.size())
		{
			cout << "Error: seq_path doesn't have correct ending node!" << endl;
			exit(1);
		}

		for ( ; i<correct_aas.size(); i++)
			full_set_of_aas.push_back(correct_aas[i]);
	}
	

	sol.pep.set_peptide_aas(full_set_of_aas);
	sol.pep.set_n_gap(0);
	sol.pep.calc_mass(config);


	sol.charge = seq_path.charge;
	if (sol.charge==0)
	{
		cout << "Error: must supply seq path with charge other than 0!" << endl;
		exit(1);
	}
	sol.reaches_n_terminal = true;
	sol.reaches_c_terminal = true;
	sol.pm_with_19 = sol.pep.get_mass_with_19();
}


void  PeakRankModel::init_peak_rank_model_with_defaults(Config *_config, char *name,
														int feature_type)
{
	feature_set_type = feature_type;

	config = _config;

	set_peak_rank_model_name((name ? string(name) : "Rank"));

	set_size_thresholds();

	set_mass_detection_defaults();

	init_aa_defaults();

	convert_session_aas_to_model_aas();

	if (feature_set_type == 0)
	{
	//	set_binary_feature_names();

	//	set_real_feature_names();
		binary_feature_names.clear();
		set_simple_feature_names();
	}
	else if (feature_set_type == 1)
	{
		binary_feature_names.clear();
		this->set_advanced_feature_names();
	}
	else if (feature_set_type == 2)
	{
		binary_feature_names.clear();
		set_partial_denovo_feature_names();
	}
	else if (feature_set_type == 3)
	{
		binary_feature_names.clear();
		real_feature_names.clear();
	}
	else if (feature_set_type == 4)
	{
		binary_feature_names.clear();
		real_feature_names.clear();
	}
	else if (feature_set_type == 5)
	{
		binary_feature_names.clear();
		real_feature_names.clear();
	}
	else
	{
		cout << "Error: feture set type " << feature_set_type << " not supported!" << endl;
		exit(1);
	}

}

/******************************************************************************
*******************************************************************************/
void PeakRankModel::calc_peptide_predicted_scores(
							  const PeptideSolution& sol,
							  PeptidePeakPrediction& ppp,
							  int specific_size,
							  const vector<int>* ptr_frag_type_idxs) const
{
	const Peptide& pep = sol.pep;
	const mass_t pm_with_19 = sol.pm_with_19;
	const int spec_charge = sol.charge;
	const int mobility = get_proton_mobility(pep,spec_charge);
	const int size_idx = (specific_size>=0 ? specific_size : get_size_group(spec_charge,pm_with_19));
	
	if (! partition_models[spec_charge][size_idx][mobility])
	{
		cout << "Error: no rank partition model for " <<
			spec_charge << " " << size_idx << " " << mobility << endl;
		exit(1);
	}

	const mass_t min_detected_mass = calc_min_detected_mass(pm_with_19, spec_charge);
	const mass_t max_detected_mass = get_max_detected_mass();

	partition_models[spec_charge][size_idx][mobility]->calc_peptides_peaks_rank_scores(this, sol, 
		min_detected_mass, max_detected_mass, ppp, feature_set_type, ptr_frag_type_idxs);
}




bool PeakRankModel::has_intialized_models(int charge, int size_idx, int frag_idx) const
{
	int i;
	for (i=MOBILE; i<=NONMOBILE; i++)
	{
		if (! partition_models[charge][size_idx][i] ||
			! partition_models[charge][size_idx][i]->frag_models[frag_idx].get_ind_was_initialized())
			return false;
	}
	return true;
}



void PeptidePeakPrediction::make_rank_tables(const vector< vector<float> >& intens,
		vector< vector<int> >& observed_ranks, vector< vector<int> >& predicted_ranks) const
{
	const int max_num = rank_scores.size();
	predicted_ranks.resize(max_num);
	observed_ranks.resize(max_num);
	int f;
	for (f=0; f<max_num; f++)
	{
		if (rank_scores[f].size()>0)
		{
			find_ranks(rank_scores[f],predicted_ranks[f]);
			find_ranks(intens[f],observed_ranks[f]);
		}
		else
		{
			predicted_ranks[f].clear();
			observed_ranks[f].clear();
		}
	}
}



void PeptidePeakPrediction::print_ranks_vs_intens(const vector< vector<float> >& intens) const
{
	cout << setprecision(1) << fixed;
	
	cout << "Observed intensities:" << endl;
	int f,i;
	cout <<" ";
	for (i=1; i<intens[0].size(); i++)
		cout << "\t" << i;
	cout << endl;
	for (f=0; f<intens.size(); f++)
	{
		if (intens[f].size()==0)
			continue;
		cout << f << "\t";
		int i;
		for (i=1; i<intens[f].size(); i++)
		{
			if (intens[f][i]>0)
				cout << intens[f][i];
			cout << "\t";
		}
		cout << endl;
	}
	cout << endl;

	cout << "Predicted scores:" << endl;
	cout <<" ";
	for (i=1; i<rank_scores[0].size(); i++)
		cout << "\t" << i;
	cout << endl;
	for (f=0; f<rank_scores.size(); f++)
	{
		if (rank_scores[f].size()==0)
			continue;
		cout << f << "\t";
		int i;
		for (i=1; i<rank_scores[f].size(); i++)
		{
			if (rank_scores[f][i]>NEG_INF)
				cout << rank_scores[f][i];
			cout << "\t";
		}
		cout << endl;
	}
	cout << endl;


	vector< vector<int> > obs_table, pred_table;
	make_rank_tables(intens,obs_table,pred_table);

	cout << "Rank comparison:" << endl;
	cout <<" ";
	for (i=1; i<intens[0].size()-3; i++)
		cout << "\t" << i;
	cout << endl;
	for (f=0; f<pred_table.size(); f++)
	{
		if (intens[f].size()==0)
			continue;

		cout << f << "\t";
		int i;
		for (i=1; i<pred_table[f].size(); i++)
		{
			if (pred_table[f][i]>=0)
			{
				cout << obs_table[f][i] << ":" << pred_table[f][i] << "\t";
			}
			else
				cout << "\t";
		}
		cout << endl;
	}
	cout << endl;
	
}


/********************************************************************
Sets the default aa labels that are used in the model.
The aas that are included are the typical 20 amino acids minus Ile.
There are also M+16 and Q-17 that are included.
This list can be overriden if read from the model file.
*********************************************************************/
void PeakRankModel::init_aa_defaults()
{
	int aa;
	const vector<string>& aa2label =  config->get_aa2label();
	
	model_aa_labels.clear();

	for (aa=Ala; aa<=Val; aa++)
	{
		if (aa==Ile)
			continue;

		model_aa_labels.push_back(aa2label[aa]);
	}
	model_aa_labels.push_back("M+16");
	model_aa_labels.push_back("Q-17");
}



/********************************************************************
Creates a conversion table for converting session aas to the model aa
idxs. If there is a session aa that is not present it gets the org_aa
*********************************************************************/
void PeakRankModel::convert_session_aas_to_model_aas()
{
	const vector<string>& aa2label = config->get_aa2label();
	const vector<int>&    org_aas  = config->get_org_aa();
	const int max_session_aa_idx   = config->get_max_session_aa_idx();
	const vector<int>& session_aas = config->get_session_aas();

	session_aas_to_model_aas.clear();
	session_aas_to_model_aas.resize(max_session_aa_idx+1,-999);

	
	int i;
	for (i=0; i<session_aas.size(); i++)
	{
		const int aa = session_aas[i];
		const string label = aa2label[aa];

		// look for match
		int j;
		for (j=0; j<model_aa_labels.size(); j++)
			if (model_aa_labels[j] == label)
				break;

		if (j<model_aa_labels.size())
		{
			session_aas_to_model_aas[aa]=j;
			continue;
		}

		// try the org aa for this one
		const int org_aa = org_aas[aa];
		const string org_label = aa2label[org_aa];
		
		for (j=0; j<model_aa_labels.size(); j++)
			if (model_aa_labels[j] == org_label)
				break;

		if (j<model_aa_labels.size())
		{
			session_aas_to_model_aas[aa]=j;
			continue;
		}

		// check if we can use I/L
		if (org_label == "I")
		{
			const string l_label = "L";
			for (j=0; j<model_aa_labels.size(); j++)
			if (model_aa_labels[j] == l_label)
				break;

			if (j<model_aa_labels.size())
			{
				session_aas_to_model_aas[aa]=j;
				continue;
			}
		}

		// Error
		cout << "Error: couldn't find aa for " << label << " (" << aa << ")" << endl;
		exit(1);
	}


/*	cout << "++++++++++++++++++++++++++++++++++++++++++" << endl;
	cout << "AA conversion table:" << endl;
	for (i=0; i<session_aas_to_model_aas.size(); i++)
	{
		cout << i << " " << aa2label[i] << " --> " << session_aas_to_model_aas[i];
		if (session_aas_to_model_aas[i]>=0)
			cout << "  " << model_aa_labels[session_aas_to_model_aas[i]];
		cout << endl;
	}
	cout << "-----------------------------------------" << endl;*/
}


/***********************************************************************
Changes the original sequence aas (with their int encoding) to the model's
own amino acid int codes. This enables the model to be applied to peptides
with PTMs that were not in the original training set and also takes care
of discrepancies that could result due to changes in amino acid encoding.
************************************************************************/
void PeakRankModel::convert_aas_to_model_aas(const vector<int>& org_aas, vector<int>& model_aas) const
{
	const int max_session_aa = config->get_max_session_aa_idx();
	int i;
	model_aas.resize(org_aas.size());
	for (i=0; i<org_aas.size(); i++)
	{
		if (org_aas[i]>max_session_aa)
		{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -