⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 peakrankmodel.h

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 H
📖 第 1 页 / 共 2 页
字号:
#ifndef __PEAKRANKMODEL_H__
#define __PEAKRANKMODEL_H__

#include "includes.h"
#include "AnnotatedSpectrum.h"
#include "BasicDataStructs.h"
#include "RankBoost.h"


#define MOBILE 1
#define PARTIALLYMOBILE 2
#define NONMOBILE 3


void convert_seq_path_to_peptide_soluition(Config *config, const SeqPath& seq_path, PeptideSolution& sol);

void convert_seq_path_to_peptide_soluition_and_fill_in_aas(Config *config, 
														   const Peptide& correct_pep,
														   const SeqPath& seq_path, 
														   PeptideSolution& sol);

void push_back_all_RHK_pairs(vector<string>& real_feature_names,
							 string prefix_label);

int get_proton_mobility(const Peptide& pep, int charge);

int get_proton_mobility(int charge, int num_arg, int num_his, int num_lys);

int calc_RKH_combo_idx (int r, int k, int h);

void find_ranks(const vector<intensity_t>& intens, vector<int>& ranks);

void normalize_intens(vector<intensity_t>& intens);

void calc_combined_peak_ranks(const vector< vector<float> >& intens, 
							  vector< vector<int> >& peak_ranks);

struct PeakScore {
	PeakScore() : cut_idx(NEG_INF), frag_idx(NEG_INF), rank_score(NEG_INF) {};
	int cut_idx;
	int frag_idx;
	float rank_score;
};

struct PeptidePeakPrediction {
	
	PeptidePeakPrediction() : num_frags(0), most_basic_missing_on_n(0), most_basic_missing_on_c(0) {}

	void print_ranks_vs_intens(const vector< vector<float> >& intens) const;

	void make_rank_tables(const vector< vector<float> >& intens,
		vector< vector<int> >& observed_ranks, vector< vector<int> >& predicted_ranks) const;

	void make_rank_tables_for_combined_peak_predictions(
		const vector< vector<float> >& intens,
		vector< vector<int> >& observed_ranks, 
		vector< vector<int> >& predicted_ranks) const;

	int num_frags;
	int most_basic_missing_on_n, most_basic_missing_on_c;
	vector<int> amino_acids;
	vector<int> frag_idxs;
	vector< vector<float> > rank_scores;
	vector< PeakScore> combined_peak_scores; // used for the combined peak models (feature_type_set >= 3)
};




struct PeakStart {
	PeakStart() : peptide_sample_idx(-1), peak_start_idx(-1), num_peaks(0) {};

	int peptide_sample_idx;
	int peak_start_idx;
	int num_peaks;
};


struct TrainingPeptide;
class  PeakRankModel;


/***************************************************************************
// model for a specific charge/size_idx/mobility
// has models for {set of fragments}
****************************************************************************/
class PartitionModel {
	friend class PeakRankModel;
public:

	PartitionModel() : partition_name("empty"), feature_set_type(NEG_INF), 
		charge(NEG_INF), size_idx(NEG_INF), mobility(NEG_INF), max_frag_idx(NEG_INF),
		num_features_per_frag(0) {};

	const vector<int>& get_fragment_type_idxs() const { return fragment_type_idxs; }

	
	// works for all models (both separate frags and the combined model)
	void calc_peptides_peaks_rank_scores(const PeakRankModel *prank,
						  const PeptideSolution& sol,
						  mass_t min_detected_mass, 
						  mass_t max_detected_mass,
						  PeptidePeakPrediction& ppp,
						  int   feature_set_type = 1,
						  const vector<int>* ptr_frag_type_idxs = NULL) const;

	
	int read_combined_partition_model(const string& path, Config *config,
							 int _charge, int _size_idx, int _mobility);
	

	int write_combined_partition_model(const string& path);



	void train_combined_partition_model(
								const PeakRankModel *prank, 
								char *sample_file_path,
								int	_charge,
								int  _size_idx,
								int  _mobility,
								int  num_frags = 3, 
								char *report_dir = NULL,
								int  max_rounds = -1,
								char *test_set = NULL,
								int	  test_peptide_length=-1,
								char *stop_signal_file = NULL,
								weight_t max_weight_ratio = 5.0);


	void fill_combined_peak_features(
								 const PeakRankModel *prank,
								 const  vector<int>& amino_acids,
								 const int    cut_idx,
								 const mass_t cut_mass,
								 const PeptideSolution& sol,
								 const FragmentType& frag,
								 const int   frag_pos_idx,
								 RankBoostSample& sample) const;

	void fill_combined_simple_peak_features(
								 const PeakRankModel *prank,
								 const  vector<int>& amino_acids,
								 const int    cut_idx,
								 const mass_t cut_mass,
								 const PeptideSolution& sol,
								 const FragmentType& frag,
								 const int   frag_pos_idx,
								 RankBoostSample& sample,
								 bool	verbose = false) const;

	void fill_combined_dnv_peak_features(
								 const PeakRankModel *prank,
								 const mass_t n_mass,   // this is where the possibly partial peptide starts
								 const mass_t c_mass,
								 const  vector<int>& amino_acids,
								 const int    cut_idx,
								 const mass_t cut_mass,
								 const PeptideSolution &sol,
								 const FragmentType& frag,
								 const int position_idx_in_model_fragment_type_idxs,
								 RankBoostSample& sample) const;


	void set_combined_feature_names_in_rankboost_model(const PeakRankModel *prank);
	void set_combined_simple_feature_names_in_rankboost_model(const PeakRankModel *prank);
	void set_combined_dnv_feature_names_in_rankboost_model(const PeakRankModel *prank);

	void train_partition_model(PeakRankModel *prank, 
							char *sample_file_path,
								int	_charge,
								int  _size_idx,
								int  _mobility,
								int frag_idx = -1, 
								char *report_dir = NULL,
								int  max_rounds = -1,
								char *test_set = NULL,
								int	  test_peptide_length=-1,
								char *stop_signal_file = NULL,
								weight_t max_weight_ratio = 5.0);


	int read_partition_model(const string& path, Config *config,
							 int _charge, int _size_idx, int _mobility);

	int write_partition_model(const string& path);





	void print_combined_peak_pairs(const vector<idx_weight_pair>& pair_idxs, 
						  const vector<TrainingPeptide>& tps,
						  const RankBoostDataset& ds,
						  PeakRankModel *prank,
						  int max_examples=-1,
						  ostream& os = cout) const;

	void simple_print_peak_pairs(const vector<idx_weight_pair>& pair_idxs, 
						  const vector<TrainingPeptide>& tps,
						  const RankBoostDataset& ds,
						  PeakRankModel *prank,
						  int frag,
						  int max_examples=-1,
						  ostream& os = cout) const;

	const string& get_partition_name() const { return partition_name; }

	void set_partition_name(const string& peak_rank_model_name, int charge,
							int size_idx, int mobility);

	void set_feature_set_type(int t) { feature_set_type = t; }

	void   print_model_stats() const;

private:

	string partition_name; // name_charge_sizeidx_mobility

	int feature_set_type;

	int charge;
	int size_idx;
	int mobility;

	int max_frag_idx;

	vector<int> fragment_type_idxs;
	vector<RankBoostModel> frag_models;

	int				num_features_per_frag;
	RankBoostModel	combined_frag_boost_model;
};




/***********************************************************************
Container class for all charge/size/mobility partition models
************************************************************************/
class PeakRankModel {
public:

	// 
	bool read_peak_rank_model(Config *_config, const char *name, bool silent_ind=false,
		int specific_charge=-1, int specific_size=-1, int specific_mobility=-1);

	// 
	void write_peak_rank_model(char *name, char *out_dir = NULL);


	void calc_peptide_predicted_scores(const PeptideSolution& sol,
									   PeptidePeakPrediction& ppp,
									   int specific_size = -1,
									   const vector<int>* ptr_frag_type_idxs = NULL) const;

	bool make_peak_prediction_table(
			const PeptideSolution& sol,
			const vector< vector<intensity_t> >& intens,
			int num_peaks) const;


	
	void   set_max_detected_mass(mass_t m) { max_detected_mass = m; }

	mass_t get_max_detected_mass() const { return max_detected_mass; }

	int		get_num_model_aas() const { return model_aa_labels.size(); }

	const vector<string>& get_model_aa_labels() const { return model_aa_labels; }

	mass_t calc_min_detected_mass(mass_t pm_with_19, int charge) const;

	void    set_config(Config *con) { config = con; }

	Config *get_config() const { return config; }

	const vector<string>& get_binary_names() const { return binary_feature_names; }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -