⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 denovorankscore.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 5 页
字号:
	score_pairs.clear();
	if (peptide_sols.size()==0)
		return;

	int charge1=0,charge2=0;
	mass_t mz1=0,mz2=0;
	float prob1=0,prob2=0;

	BasicSpectrum bs;
	bs.peaks = peaks;
	bs.num_peaks = num_peaks;
	bs.ssf = ssf;

	Config *config = model->get_config();
	score_pairs.clear();

	model->get_best_mz_charge(config,bs, &mz1, &charge1, &prob1, 
							  &mz2, &charge2, &prob2, &pmc_sqs_res);

	as.init_from_QCPeaks(config,peaks,num_peaks,ssf);

	
	vector<ScalingFactor> same_scaling_factors;
	same_scaling_factors.resize(10);

	int i;
	for (i=0; i<peptide_sols.size(); i++)
	{
		const PeptideSolution& sol = peptide_sols[i];
		const Peptide& pep = sol.pep;
		const mass_t mass_with_19 = pep.get_mass_with_19();
		const int charge = sol.charge;
		const int size_idx = (forced_size_idx>=0 ? forced_size_idx : 
							  peak_model->get_size_group(charge,mass_with_19) );
		
		
		DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];
		if (! part_model|| ! part_model->ind_was_initialized)
		{
			cout << "Error: no de novo rank model for charge " << charge << " size " << size_idx << endl;
			exit(1);
		}
		if (same_scaling_factors[charge].score_scale == 1.0)  // makes sure the same scale is used for all sequences with this charge
			same_scaling_factors[charge] = part_model->get_scaling_factor(mass_with_19);	
	
		RankBoostSample rbs;
		as.set_peptide(pep);
		as.annotate_spectrum(mass_with_19);
		fill_complete_peptide_rbs(sol, peaks, num_peaks, as, pmc_sqs_res, rbs, size_idx);

		float score=part_model->boost_model.calc_rank_score(rbs);
		score += same_scaling_factors[charge].score_shift;
		score *= same_scaling_factors[charge].score_scale;


		if (pep.get_num_aas()<8)
		{
			score -=1;
			if (pep.get_num_aas()<7)
				score -=1;
			if (pep.get_num_aas()<6)
				score -=5;
		}
		score_pairs.push_back(score_pair(i,score));
	}
}



void DeNovoRankScorer::score_denovo_sequences(
								const vector<SeqPath>& seq_paths,
								SingleSpectrumFile *ssf,
								QCPeak* peaks, 
								int num_peaks,
								vector<score_pair>& score_pairs,
								int forced_size_idx,
								int max_idx_for_ranking) const
{
	PeakRankModel *&peak_model = peak_prediction_models[model_type];
	AnnotatedSpectrum as;
	vector<PmcSqsChargeRes> pmc_sqs_res;

	int charge1=0,charge2=0;
	mass_t mz1=0,mz2=0;
	float prob1=0,prob2=0;

	BasicSpectrum bs;
	bs.peaks = peaks;
	bs.num_peaks = num_peaks;
	bs.ssf = ssf;

	Config *config = model->get_config();
	score_pairs.clear();

	model->get_best_mz_charge(config,bs, &mz1, &charge1, &prob1, 
							  &mz2, &charge2, &prob2, &pmc_sqs_res);

	const mass_t corr_mass_with_19 = mz1*charge1 - (charge1-1.0);
	as.init_from_QCPeaks(config,peaks,num_peaks,ssf);
	as.set_corrected_pm_with_19(corr_mass_with_19);

	vector<ScalingFactor> scaling_factors;
	scaling_factors.resize(10);

	int i;
	for (i=0; i<seq_paths.size(); i++)
	{
		if (max_idx_for_ranking>0 && i>=max_idx_for_ranking)
		{
			score_pairs.push_back(score_pair(i,-(float)i));
			continue;
		}

		const SeqPath& path = seq_paths[i];
		// create a peptide solution to represent the path
		PeptideSolution sol;

		vector<int> amino_acids;
		path.get_amino_acids(amino_acids);

		sol.charge = path.charge;
		sol.pm_with_19 = path.pm_with_19;
		sol.pep.set_peptide_aas(amino_acids);
		sol.pep.set_n_gap(path.n_term_mass);
		sol.reaches_n_terminal = (sol.pep.get_n_gap()<1.0);
		sol.pep.calc_mass(config);
		sol.reaches_c_terminal = (sol.pep.get_mass_with_19()>sol.pm_with_19 - 7);
		sol.type = -1;
		
		// choose pm according to if peptide reaches both ends

		const mass_t mass_with_19 =( (sol.reaches_n_terminal && sol.reaches_c_terminal) ?
			sol.pep.get_mass_with_19() : corr_mass_with_19);
	
		const int charge   = path.charge;
		const int size_idx = (forced_size_idx>=0 ? forced_size_idx : 
							  peak_model->get_size_group(charge,mass_with_19) );

		RankBoostSample		    main_rbs;			// holds the features that are common to all variants of the SeqPath
		vector<RankBoostSample> combo_variant_rbs; // holds features that might depend on the identity of the
												  // missing amino acids on the n and c-terminal sides

		as.set_peptide(sol.pep);
		as.annotate_spectrum(mass_with_19);
		fill_denovo_peptide_rbs_with_combos(sol, path, peaks, num_peaks, as, pmc_sqs_res, main_rbs, combo_variant_rbs, size_idx);

		DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];
		if (! part_model || ! part_model->ind_was_initialized)
		{
			cout << "Error: no de novo rank model for charge " << charge << " size " << size_idx << endl;
			exit(1);
		}
		if (scaling_factors[charge].score_scale == 1.0)  // makes sure the same scale is used for all sequences with this charge
			scaling_factors[charge] = part_model->get_scaling_factor(mass_with_19);	

		float max_score = NEG_INF;
		int v_idx;
		for (v_idx = 0; v_idx<combo_variant_rbs.size(); v_idx++)
		{
			RankBoostSample  rbs = main_rbs;
			RankBoostSample& var_rbs = combo_variant_rbs[v_idx];
			int j;
			for (j=0; j<var_rbs.real_active_idxs.size(); j++)
				rbs.add_real_feature(var_rbs.real_active_idxs[j],var_rbs.real_active_values[j]);

			const float score=part_model->boost_model.calc_rank_score(rbs);
			if (score>max_score)
				max_score=score;
		}

		max_score += scaling_factors[charge].score_shift;
		max_score *= scaling_factors[charge].score_scale;
		score_pairs.push_back(score_pair(i,max_score));
	}
}



void DeNovoRankScorer::list_feature_differences(const vector<SeqPath>& seq_paths,
								  SingleSpectrumFile *ssf,
								  QCPeak* peaks, 
								  int num_peaks) const
{
	PeakRankModel *&peak_model = peak_prediction_models[model_type];
	AnnotatedSpectrum as;
	vector<PmcSqsChargeRes> pmc_sqs_res;

	int charge1=0,charge2=0;
	mass_t mz1=0,mz2=0;
	float prob1=0,prob2=0;

	BasicSpectrum bs;
	bs.peaks = peaks;
	bs.num_peaks = num_peaks;
	bs.ssf = ssf;

	Config *config = model->get_config();

	model->get_best_mz_charge(config,bs, &mz1, &charge1, &prob1, 
							  &mz2, &charge2, &prob2, &pmc_sqs_res);

	const mass_t corr_mass_with_19 = mz1*charge1 - (charge1-1.0);
	as.init_from_QCPeaks(config,peaks,num_peaks,ssf);
	as.set_corrected_pm_with_19(corr_mass_with_19);

	vector<ScalingFactor> scaling_factors;
	scaling_factors.resize(10);

	const mass_t mass_with_19 = seq_paths[0].pm_with_19;
	const int charge   = seq_paths[0].charge;
	const int size_idx = peak_model->get_size_group(charge,mass_with_19);
	

	DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];
	if (! part_model || ! part_model->ind_was_initialized)
	{
		cout << "Error: no de novo rank model for charge " << charge << " size " << size_idx << endl;
		exit(1);
	}
	if (scaling_factors[charge].score_scale == 1.0)  // makes sure the same scale is used for all sequences with this charge
		scaling_factors[charge] = part_model->get_scaling_factor(mass_with_19);	

	vector<RankBoostSample> boost_samples;
	int i;
	for (i=0; i<seq_paths.size(); i++)
	{
	

		const SeqPath& path = seq_paths[i];
		// create a peptide solution to represent the path
		PeptideSolution sol;

		vector<int> amino_acids;
		path.get_amino_acids(amino_acids);

		sol.charge = path.charge;
		sol.pm_with_19 = path.pm_with_19;
		sol.pep.set_peptide_aas(amino_acids);
		sol.pep.set_n_gap(path.n_term_mass);
		sol.reaches_n_terminal = (sol.pep.get_n_gap()<1.0);
		sol.pep.calc_mass(config);
		sol.reaches_c_terminal = (sol.pep.get_mass_with_19()>sol.pm_with_19 - 7);
		sol.type = -1;
		
		// choose pm according to if peptide reaches both ends

		RankBoostSample		    main_rbs;			// holds the features that are common to all variants of the SeqPath
		vector<RankBoostSample> combo_variant_rbs; // holds features that might depend on the identity of the
												  // missing amino acids on the n and c-terminal sides
		as.set_peptide(sol.pep);
		as.annotate_spectrum(mass_with_19);
		fill_denovo_peptide_rbs_with_combos(sol, path, peaks, num_peaks, as, pmc_sqs_res, main_rbs, combo_variant_rbs, size_idx);

		float max_score = NEG_INF;
		int v_idx;
		for (v_idx = 0; v_idx<combo_variant_rbs.size(); v_idx++)
		{
			RankBoostSample  rbs = main_rbs;
			RankBoostSample& var_rbs = combo_variant_rbs[v_idx];
			int j;
			for (j=0; j<var_rbs.real_active_idxs.size(); j++)
				rbs.add_real_feature(var_rbs.real_active_idxs[j],var_rbs.real_active_values[j]);

			boost_samples.push_back(rbs);
		}
	}
	part_model->boost_model.list_feature_vals_according_to_score(boost_samples);
}


void DeNovoRankScorer::score_tag_sequences(
								const vector<SeqPath>& seq_paths,
								SingleSpectrumFile *ssf,
								QCPeak* peaks, 
								int num_peaks,
								vector<score_pair>& score_pairs,
								int forced_size_idx) const
{
	PeakRankModel *&peak_model = peak_prediction_models[model_type];
	AnnotatedSpectrum as;
	vector<PmcSqsChargeRes> pmc_sqs_res;

	int charge1=0,charge2=0;
	mass_t mz1=0,mz2=0;
	float prob1=0,prob2=0;

	BasicSpectrum bs;
	bs.peaks = peaks;
	bs.num_peaks = num_peaks;
	bs.ssf = ssf;

	Config *config = model->get_config();
	

	as.init_from_QCPeaks(config,peaks,num_peaks,ssf);
	
	vector<ScalingFactor> scaling_factors;
	scaling_factors.resize(10);
	score_pairs.clear();

	int i;
	for (i=0; i<seq_paths.size(); i++)
	{
		const SeqPath& path = seq_paths[i];
		// create a peptide solution to represent the path
		PeptideSolution sol;

		vector<int> amino_acids;
		path.get_amino_acids(amino_acids);

		sol.charge = path.charge;
		sol.pm_with_19 = path.pm_with_19;
		sol.pep.set_peptide_aas(amino_acids);
		sol.pep.set_n_gap(path.n_term_mass);
		sol.reaches_n_terminal = (sol.pep.get_n_gap()<1.0);
		sol.pep.calc_mass(config);
		sol.reaches_c_terminal = (sol.pep.get_mass_with_19()>sol.pm_with_19 - 7);
		sol.type = -1;
		
		// choose pm according to if peptide reaches both ends

		const mass_t mass_with_19 =( (sol.reaches_n_terminal && sol.reaches_c_terminal) ?
			sol.pep.get_mass_with_19() : path.pm_with_19);
	
		const int charge   = path.charge;
		const int size_idx = (forced_size_idx>=0 ? forced_size_idx : 
							  peak_model->get_size_group(charge,mass_with_19) );

		RankBoostSample		    rbs;			// holds the features that are common to all variants of the SeqPath
	
		as.set_peptide(sol.pep);
		as.annotate_spectrum(mass_with_19);
		fill_tag_rbs(sol,path,peaks,num_peaks,as,rbs,size_idx);

		DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];
		if (! part_model || ! part_model->ind_was_initialized)
		{
			cout << "Error: no de novo rank model for charge " << charge << " size " << size_idx << endl;
			exit(1);
		}
		if (scaling_factors[charge].score_scale == 1.0)  // makes sure the same scale is used for all sequences with this charge
			scaling_factors[charge] = part_model->get_scaling_factor(mass_with_19);	

		float score = part_model->boost_model.calc_rank_score(rbs);

		score += scaling_factors[charge].score_shift;
		score *= scaling_factors[charge].score_scale;
		score_pairs.push_back(score_pair(i,score));
	}
}



void DeNovoRankScorer::read_denovo_rank_scorer_model(const char *path, string type_string, bool silent_ind)
{
	ifstream ifs(path);
	if (! ifs.good() || ! ifs.is_open())
	{
		cout << "Error: couldn't open file for reading: " << path << endl;
		exit(1);
	}

	ifs >> dnv_model_name >> model_type;
	if (model_type<0 || model_type>10)
	{
		cout << "Error: bad model type specified!" << endl;
		exit(1);
	}


	string model_str, peak_model_str, comp_assign_str;
	int max_charge;
	ifs >> max_charge >> model_str >> peak_model_str >> comp_assign_str;

	if (! model)
	{
		cout << "Error: must set score model first!" << endl;
		exit(1);
	}


	if (! peak_prediction_models[model_type])
	{
		peak_prediction_models[model_type] = new PeakRankModel;
		if (! peak_prediction_models[model_type]->read_peak_rank_model(model->get_config(),peak_model_str.c_str(),true))
		{
			cout << "Error: couldn't read peak model " << peak_model_str << endl;
			exit(1);
		}
	}

	if (! comp_assigner)
	{
		comp_assigner = new PeptideCompAssigner;
		comp_assigner->read_and_init_from_tables(model->get_config(),comp_assign_str.c_str());
	}

	string score_model_name = model->get_model_name();

	int num_models_read=0;
	init_tables(silent_ind);
	int c=0;
	for (c=0; c<dnv_part_models.size(); c++)
	{
		int size_idx;
		for (size_idx=0; size_idx<dnv_part_models[c].size(); size_idx++)
		{
			ostringstream oss;
			oss << model->get_config()->get_resource_dir() << "/" <<score_model_name << "_" << 
				type_string << "/" << dnv_model_name << "_" << c << "_" << size_idx << "_model.txt";

		//	cout << oss.str() << endl;

			ifstream ifs(oss.str().c_str());
			if (! ifs.is_open())
				continue;
			ifs.close();
			dnv_part_models[c][size_idx]=new DeNovoPartitionModel;
			if (dnv_part_models[c][size_idx]->read_denovo_part_model(oss.str().c_str(),model->get_config()))
			{
				if (! silent_ind)
					cout << "Read de novo rank model " << c << " " << size_idx << endl;
				num_models_read++;
			}
		}
	}

	if (! silent_ind)
		cout << "Read " << num_models_read << " de novo rank models..." << endl;
}



void DeNovoRankScorer::write_denovo_rank_scorer_model(char *name)
{
	if (! model || ! model->get_ind_pmcsqs_was_intialized())
	{
		cout << "Error: model not initialized!" << endl;
		exit(1);
	}

	if (! peak_prediction_models[model_type] )

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -