⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 denovorankscore.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 5 页
字号:
#include "DeNovoRankScore.h"
#include "BasicDataStructs.h"
#include "FragmentSelection.h"
#include "DeNovoSolutions.h"
#include "auxfun.h"


AdvancedScoreModel	*DeNovoRankScorer::model=NULL;

PeakRankModel		*DeNovoRankScorer::peak_prediction_models[8]={NULL}; // for the 4 types of DeNovoRankScorer

PeptideCompAssigner *DeNovoRankScorer::comp_assigner=NULL;


/*****************************************************************************************
The main function for rank scoring a complete peptide (co).
******************************************************************************************/
void DeNovoRankScorer::fill_complete_peptide_rbs(
							   const PeptideSolution& sol,
							   QCPeak* peaks, 
							   int num_peaks, 
							   AnnotatedSpectrum& as,
							   const vector<PmcSqsChargeRes>& pmcsqs_res,
							   RankBoostSample& rbs,
							   int size_idx) const
{
	vector< vector<intensity_t> > ann_intens;
	vector< vector<mass_t> >	  ann_masses;

	Peptide org_peptide = as.get_peptide();
	as.set_peptide(sol.pep);
	as.annotate_spectrum(sol.pm_with_19, true);
	as.extract_annotated_intens_and_masses(ann_intens,ann_masses);

	rbs.clear();
	const int charge=sol.charge;
	PeakRankModel *&peak_model = peak_prediction_models[model_type];
	if (size_idx<0)
		size_idx=peak_model->get_size_group(charge,sol.pm_with_19);

	DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];

	if (! part_model || ! part_model->ind_was_initialized)
	{
		cout << "Error: de novo partition model was not initialized, charge " <<
			charge << " size " << size_idx << endl;
		exit(1);
	}

//	cout << "PEP: " << sol.pep.as_string(model->get_config()) << endl;

	PrmGraph prm;
	SeqPath  sol_seq_path;
	if (part_model->use_prm_features)
	{
		prm.create_graph_for_peptide_and_spectrum(model,&as,sol.pep.get_mass_with_19(),sol.charge,sol.pep);
		model->score_graph_edges(prm);
		sol_seq_path = prm.get_path_from_peptide_prm_graph(sol.pep);
	}

	if (model_type == 0 || model_type == 2)
	{
		if (part_model->use_PTM_peak_features)
			part_model->fill_PTM_peak_features(model->get_config(),sol,ann_masses,ann_intens,as,rbs);

		if (part_model->use_tryp_terminal_features)
			part_model->fill_tryp_terminal_features(sol,sol_seq_path,rbs);

		if (part_model->use_ann_peak_features)
			part_model->fill_ann_peak_features(sol,ann_masses,ann_intens,as,rbs);

		if (part_model->use_inten_balance_features)
			part_model->fill_inten_balance_features(prm.get_config(),sol,sol_seq_path,rbs);

		if (part_model->use_peak_offset_features)
			part_model->fill_peak_offset_features(as.get_config(),sol,ann_masses,ann_intens,rbs);

		if (part_model->use_comp_features)
			part_model->fill_composition_features(sol,as.get_config(),comp_assigner, sol_seq_path, rbs);

		if (part_model->use_pmc_features)
			part_model->fill_pmcsqs_features(sol,pmcsqs_res,model->get_pmcsqs_ptr(),rbs);

		if (part_model->use_ppp_features && peak_model->get_feature_set_type()<=2)
			part_model->fill_peak_prediction_features(sol, ann_intens,peak_model,rbs,size_idx);

		if (part_model->use_prm_features)
			part_model->fill_prm_features(sol, sol_seq_path, model_type, rbs);
			
		if (part_model->use_combined_ppp_features && peak_model->get_feature_set_type()>2)
			part_model->fill_combined_peak_prediction_features(sol, ann_intens,peak_model,rbs,size_idx);
		
		as.set_peptide(org_peptide);

		return;
	}
	else
	{
		cout << "Error: fill_complete_peptide_rbs should only be used with model types 0 or 2, not " <<
			model_type << endl;
		exit(1);
	}
}



/*****************************************************************************************
The main function for rank scoring a partial peptide.
******************************************************************************************/
void DeNovoRankScorer::fill_denovo_peptide_rbs(
							   PeptideSolution& sol,
							   const SeqPath& path,
							   QCPeak* peaks, 
							   int num_peaks, 
							   AnnotatedSpectrum& as,
							   const vector<PmcSqsChargeRes>& pmcsqs_res,
							   RankBoostSample& main_rbs,
							   int size_idx) const
{
	const PrmGraph *prm = path.prm_ptr;

	vector< vector<intensity_t> > ann_intens;
	vector< vector<mass_t> >	  ann_masses;

	as.set_peptide(sol.pep);
	as.annotate_spectrum(sol.pm_with_19, true);
	as.extract_annotated_intens_and_masses(ann_intens,ann_masses);

	main_rbs.clear();
	const int charge=sol.charge;

	PeakRankModel *&peak_model = peak_prediction_models[model_type];
	if (size_idx<0)
		size_idx=peak_model->get_size_group(charge,sol.pm_with_19);

	DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];

	if (! part_model || ! part_model->ind_was_initialized)
	{
		cout << "Error: de novo partition model was not initialized, charge " <<
			charge << " size " << size_idx << endl;
		exit(1);
	}

	if (model_type == 1)
	{
		if (part_model->use_PTM_peak_features)
			part_model->fill_PTM_peak_features(model->get_config(),sol,ann_masses,ann_intens,as,main_rbs);

		if (part_model->use_tryp_terminal_features)
			part_model->fill_tryp_terminal_features(sol,path,main_rbs);

		if (part_model->use_ann_peak_features)
			part_model->fill_ann_peak_features(sol,ann_masses,ann_intens,as,main_rbs);

		if (part_model->use_inten_balance_features)
			part_model->fill_inten_balance_features(model->get_config(),sol,path,main_rbs);

		if (part_model->use_peak_offset_features)
			part_model->fill_peak_offset_features(as.get_config(),sol,ann_masses,ann_intens,main_rbs);

		if (part_model->use_comp_features)
			part_model->fill_composition_features(sol,as.get_config(),comp_assigner, path, main_rbs);

		if (part_model->use_pmc_features && sol.reaches_n_terminal && sol.reaches_c_terminal)
			part_model->fill_pmcsqs_features(sol,pmcsqs_res,model->get_pmcsqs_ptr(),main_rbs);

		if (part_model->use_prm_features)
			part_model->fill_prm_features(sol, path, model_type, main_rbs);
			
		if (part_model->use_combined_ppp_features) 
		{
			if (peak_model->get_feature_set_type() != 4)
			{
				cout << "Error: feature type for filling peak predictions should be 4 with this function,";
				cout << " not " << peak_model->get_feature_set_type() << endl;
				exit(1);
			}

			// depending on wether the peptide reaches all the way to the N- and C-terminals
			// we will examine different combos of the missing n and c basic amino acids

		
			part_model->fill_combined_peak_prediction_features(sol, ann_intens,peak_model,main_rbs,size_idx);
		}
		return;

	}
	else
	{
		cout << "Error: fill_denovo_peptide_rbs should only be used with model type 1, not " <<
			model_type << endl;
		exit(1);
	}
}




/*****************************************************************************************
The main function for rank scoring a complete peptide (co).
This function assumes that in case of partial de novo sequences, we don't know the most 
basic amino acids on the N- and C-terminal sides of the predicted (partial) peptide so
we store features for the different posssibilities. The scoring function will choose
the combo that had the highes score. The main set of features which is good for all
combos (these features don't depend on the aa combo), are stored separately in main_rbs.
******************************************************************************************/
void DeNovoRankScorer::fill_denovo_peptide_rbs_with_combos(
							   PeptideSolution& sol,
							   const SeqPath& path,
							   QCPeak* peaks, 
							   int num_peaks, 
							   AnnotatedSpectrum& as,
							   const vector<PmcSqsChargeRes>& pmcsqs_res,
							   RankBoostSample& main_rbs,
							   vector<RankBoostSample>& peak_prediction_rbs,
							   int size_idx) const
{
	const int missing_aas[]={0,Arg,Lys};
	const int num_missing_aas = sizeof(missing_aas)/sizeof(int);
	const PrmGraph *prm = path.prm_ptr;

	vector< vector<intensity_t> > ann_intens;
	vector< vector<mass_t> >	  ann_masses;

	as.set_peptide(sol.pep);
	as.annotate_spectrum(sol.pm_with_19, true);
	as.extract_annotated_intens_and_masses(ann_intens,ann_masses);

	main_rbs.clear();
	const int charge=sol.charge;

	PeakRankModel *&peak_model = peak_prediction_models[model_type];
	if (size_idx<0)
		size_idx=peak_model->get_size_group(charge,sol.pm_with_19);

	DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];

	if (! part_model || ! part_model->ind_was_initialized)
	{
		cout << "Error: de novo partition model was not initialized, charge " <<
			charge << " size " << size_idx << endl;
		exit(1);
	}

	if (model_type == 1)
	{
		if (part_model->use_PTM_peak_features)
			part_model->fill_PTM_peak_features(model->get_config(),sol,ann_masses,ann_intens,as,main_rbs);

		if (part_model->use_tryp_terminal_features)
			part_model->fill_tryp_terminal_features(sol,path,main_rbs);

		if (part_model->use_ann_peak_features)
			part_model->fill_ann_peak_features(sol,ann_masses,ann_intens,as,main_rbs);

		if (part_model->use_inten_balance_features)
			part_model->fill_inten_balance_features(model->get_config(),sol,path,main_rbs);

		if (part_model->use_peak_offset_features)
			part_model->fill_peak_offset_features(as.get_config(),sol,ann_masses,ann_intens,main_rbs);

		if (part_model->use_comp_features)
			part_model->fill_composition_features(sol,as.get_config(),comp_assigner, path, main_rbs);

		if (part_model->use_pmc_features && sol.reaches_n_terminal && sol.reaches_c_terminal)
			part_model->fill_pmcsqs_features(sol,pmcsqs_res,model->get_pmcsqs_ptr(),main_rbs);

		if (part_model->use_prm_features)
			part_model->fill_prm_features(sol, path, model_type, main_rbs);
			
		if (part_model->use_combined_ppp_features) 
		{
			if (peak_model->get_feature_set_type() != 4)
			{
				cout << "Error: feature type for filling peak predictions should be 4 with this function,";
				cout << " not " << peak_model->get_feature_set_type() << endl;
				exit(1);
			}

			// depending on wether the peptide reaches all the way to the N- and C-terminals
			// we will examine different combos of the missing n and c basic amino acids

			vector<int> n_aas,c_aas;
			n_aas.clear();
			c_aas.clear();

			if (sol.reaches_n_terminal && sol.reaches_c_terminal)
			{
				peak_prediction_rbs.resize(1);
				peak_prediction_rbs[0].clear();
				n_aas.push_back(0);
				c_aas.push_back(0);
			}
			else if (sol.reaches_n_terminal && ! sol.reaches_c_terminal)
			{
				peak_prediction_rbs.resize(num_missing_aas);
				int i;
				for (i=0; i<num_missing_aas; i++)
				{
					peak_prediction_rbs[i].clear();
					n_aas.push_back(0);
					c_aas.push_back(missing_aas[i]);
				}
			}
			else if (! sol.reaches_n_terminal && sol.reaches_c_terminal)
			{
				peak_prediction_rbs.resize(num_missing_aas);
				int i;
				for (i=0; i<num_missing_aas; i++)
				{
					peak_prediction_rbs[i].clear();
					n_aas.push_back(missing_aas[i]);
					c_aas.push_back(0);
				}
			}
			else
			{	
				peak_prediction_rbs.resize(num_missing_aas*num_missing_aas);
				int i;
				for (i=0; i<num_missing_aas; i++)
				{
					int j;
					for (j=0; j<num_missing_aas; j++)
					{
						peak_prediction_rbs[i*num_missing_aas+j].clear();
						n_aas.push_back(missing_aas[i]);
						c_aas.push_back(missing_aas[j]);
					}
				}
			}

			int i;
			for (i=0; i<peak_prediction_rbs.size(); i++)
			{
				sol.most_basic_aa_removed_from_n=n_aas[i];
				sol.most_basic_aa_removed_from_c=c_aas[i];
				part_model->fill_combined_peak_prediction_features(sol, ann_intens,peak_model,peak_prediction_rbs[i],size_idx);
			}
		}
		return;

	}
	else
	{
		cout << "Error: fill_denovo_peptide_rbs with combos should only be used with model type 1, not " <<
			model_type << endl;
		exit(1);
	}
}



void DeNovoRankScorer::fill_tag_rbs(PeptideSolution& sol,
					  const SeqPath& path,
					  QCPeak* peaks, 
					  int num_peaks, 
					  AnnotatedSpectrum& as,
					  RankBoostSample&	rbs,
					  int size_idx) const
{
	const PrmGraph *prm = path.prm_ptr;
	vector< vector<intensity_t> > ann_intens;
	vector< vector<mass_t> >	  ann_masses;

	as.set_peptide(sol.pep);
	as.annotate_spectrum(sol.pm_with_19, true);
	as.extract_annotated_intens_and_masses(ann_intens,ann_masses);

	rbs.clear();
	const int charge=sol.charge;

	PeakRankModel *&peak_model = peak_prediction_models[model_type];
	if (size_idx<0)
		size_idx=peak_model->get_size_group(charge,sol.pm_with_19);

	DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];

	if (! part_model || ! part_model->ind_was_initialized)
	{
		cout << "Error: de novo partition model was not initialized, charge " <<
			charge << " size " << size_idx << endl;
		exit(1);
	}

	if (model_type == 3)
	{
		if (part_model->use_PTM_peak_features)
			part_model->fill_PTM_peak_features(model->get_config(),sol,ann_masses,ann_intens,as,rbs);

		if (part_model->use_tryp_terminal_features)
			part_model->fill_tryp_terminal_features(sol,path,rbs);

		if (part_model->use_ann_peak_features)
			part_model->fill_ann_peak_features(sol,ann_masses,ann_intens,as,rbs);

		if (part_model->use_peak_offset_features)
			part_model->fill_peak_offset_features(as.get_config(),sol,ann_masses,ann_intens,rbs);

		if (part_model->use_comp_features)
			part_model->fill_composition_features(sol,as.get_config(),comp_assigner, path, rbs);

		if (part_model->use_prm_features)
			part_model->fill_prm_features(sol, path, model_type, rbs);
			
		if (part_model->use_combined_ppp_features) 
		{
			if (peak_model->get_feature_set_type() != 4)
			{
				cout << "Error: feature type for filling peak predictions should be 4 with this function,";
				cout << " not " << peak_model->get_feature_set_type() << endl;
				exit(1);
			}

			if (! sol.reaches_c_terminal)
				sol.most_basic_aa_removed_from_c=Lys;

			part_model->fill_combined_peak_prediction_features(sol, ann_intens,peak_model,rbs,size_idx);
		}
		return;
	}
	else
	{
		cout << "Error: fill_tag_rbs should only be used with model type 3, not " <<
			model_type << endl;
		exit(1);
	}
}


void DeNovoRankScorer::score_complete_sequences(
								const vector<PeptideSolution>& peptide_sols,
								SingleSpectrumFile *ssf,
								QCPeak* peaks, 
								int num_peaks,
								vector<score_pair>& score_pairs,
								int forced_size_idx) const
{
	PeakRankModel *&peak_model = peak_prediction_models[model_type];
	AnnotatedSpectrum as;
	vector<PmcSqsChargeRes> pmc_sqs_res;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -