📄 peakrankmodel.h

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 H
📖 第 1 页 / 共 2 页
字号:
上一页 12
	const vector<string>& get_real_names()   const { return real_feature_names; }

	const vector<int>&	  get_real_feature_stage_idxs() const { return real_feature_stage_idxs; }

	PartitionModel * get_non_const_model_ptr(int charge, mass_t pm_with_19, int mobility) const
	{
		const int size_idx = get_size_group(charge, pm_with_19);
		return partition_models[charge][size_idx][mobility];
	}

	const PartitionModel * const get_model_ptr(int charge, mass_t pm_with_19, int mobility) const
	{
		const int size_idx = get_size_group(charge, pm_with_19);
		return partition_models[charge][size_idx][mobility];
	}

	const PartitionModel * const get_model_ptr(int charge, int size_idx, int mobility) const
	{
		return partition_models[charge][size_idx][mobility];
	}


	int get_size_group(int charge, mass_t pm_with_19) const
	{
		const vector<mass_t> & thresholds = size_thresholds[charge];
		int size_idx;
		for (size_idx=0; size_idx<thresholds.size(); size_idx++)
			if (pm_with_19<thresholds[size_idx])
				return size_idx;
		return thresholds.size();
	}

	int get_num_size_thresholds(int charge) const { return size_thresholds[charge].size(); }


	void fill_simple_peak_features(
								 const  vector<int>& amino_acids,
								 int    cut_idx,
								 mass_t cut_mass,
								 mass_t pm_with_19,
								 int	spec_charge,
								 const FragmentType& frag,
								 RankBoostSample& sample) const;


	void fill_advanced_peak_features(
								 const  vector<int>& amino_acids,
								 int    cut_idx,
								 mass_t cut_mass,
								 mass_t pm_with_19,
								 int	spec_charge,
								 const FragmentType& frag,
								 RankBoostSample& sample) const;


	void fill_partial_denovo_peak_features(
								 mass_t n_mass,   // this is wehere the possibly partial peptide starts
								 mass_t c_mass,
								 const  vector<int>& amino_acids,
								 int    cut_idx,
								 mass_t cut_mass,
								 mass_t pm_with_19,
								 int	spec_charge,
								 const FragmentType& frag,
								 int most_basic_on_n_side, // if the n side does not reach the terminal
								 int most_basic_on_c_side, // if the c side does not reach the terminal
								 RankBoostSample& sample) const;

	void set_simple_feature_names();

	void set_advanced_feature_names();


	void set_partial_denovo_feature_names();

	void partition_training_samples(const vector< vector<TrainingPeptide> >& all_tps,
									char *file_path_prefix = NULL,
									char *test_path_prefix = NULL,
									int   minimal_size = 4750,
									float prop_ts = 0.25) const;

	void read_training_peptides_into_rank_boost_dataset(
										int frag_type_idx,
										int spec_charge,
										const vector<TrainingPeptide>& sample_tps,
										RankBoostDataset& rank_ds,
										vector<float>& peak_intens,
										vector<PeakStart>& peak_starts,
										vector<float>& max_annotated_intens) const;

	void train_all_partition_models(
								int frag_fill_type,
								char *prefix_path,
								int	  charge=-1,
								int   size_idx=-1,
								int	  mobility=-1,
								int	  frag_type_idx=-1,
								char *report_dir = NULL,
								int   num_rounds = -1,
								char *test_set = NULL,
								int	  test_peptide_length=-1,
								char *stop_signal_file = NULL,
								weight_t max_weight_ratio = 5.0);

	void read_training_peptides_into_combined_rank_boost_dataset(
										int spec_charge,
										int size_idx,
										int mobility,
										const vector<TrainingPeptide>& sample_tps,
										RankBoostDataset& rank_ds,
										vector<float>& peak_intens,
										vector<PeakStart>& peak_starts,
										vector<int>& peak_frag_types) const;

	void train_all_combined_partition_models(
								int frag_fill_type,
								char *prefix_path,
								int	  charge=-1,
								int   size_idx=-1,
								int	  mobility=-1,
								int	  num_frags=3,
								char *report_dir = NULL,
								int   num_rounds = -1,
								char *test_set = NULL,
								int	  test_peptide_length=-1,
								char *stop_signal_file = NULL,
								weight_t max_weight_ratio = 5.0);

	void convert_aas_to_model_aas(const vector<int>& org_aas, vector<int>& model_aas) const;

	void set_binary_feature_names();

	void set_real_feature_names();

	void set_size_thresholds();

	void set_mass_detection_defaults();

	void init_peak_rank_model_with_defaults(Config *_config, char *name, int feature_type = 1);


	const string& get_peak_rank_model_name() const { return peak_rank_model_name; }

	void set_peak_rank_model_name(string name)    { peak_rank_model_name = name; }

	void list_all_model_idxs();

	void print_model_init_stats() const;

	const vector< vector<mass_t> >& get_size_thresholds() const { return size_thresholds; }

	bool has_intialized_models(int charge, int size_idx, int frag_idx) const;

	int get_feature_set_type() const { return feature_set_type; }

	const vector<int>& get_session_aas_to_model_aas() const { return session_aas_to_model_aas; }


private:

	int feature_set_type; // 0 - regular , 1 - advanced, 2 - partial de novo

	vector< vector< vector<PartitionModel *> > > partition_models; // charge / size_group / moility

	vector< vector<mass_t> > size_thresholds; // charge / threshes

	vector<string> binary_feature_names;

	vector<string> real_feature_names;

	vector<int>    real_feature_stage_idxs;

	vector<int>	   session_aas_to_model_aas; // conversion of the session aas to the model aas code

	vector<string> model_aa_labels;

	Config *config;

	mass_t max_detected_mass;

	string peak_rank_model_name;

	vector<mass_t> charge_min_mass_coefficients;


	void init_aa_defaults();
	void convert_session_aas_to_model_aas();

};






struct TrainingPeptide {

	TrainingPeptide() : n_mass(0),  best_n_removed(0), best_c_removed(0),
		length(0), charge(0), mobility(0), pm_with_19(0) {};

	void get_ranks_for_frag_idx(int frag_idx, vector<int>& ranks) const;

	void create_training_peptide(const PeakRankModel& rm, const AnnotatedSpectrum& as);

	void write_to_stream(ofstream& ofs) const;

	bool read_from_stream(ifstream& ifs);

	void print(Config *config, ostream& ofs = cout) const;

	float get_basicity_score() const; // made up score, higher means more basic amino acids

	int   get_frag_idx_pos(int frag_idx) const {
		int i;
		for (i=0; i<frag_idxs.size(); i++)
			if (frag_idxs[i]==frag_idx)
				return i;
		return -1;
	}


	mass_t n_mass;

	int best_n_removed, best_c_removed; // the strongest amino acids R>K>H>Other that was removed from
									    // each end because of the sequence being partial de novo
	int length;
	int charge;
	int mobility;
	mass_t pm_with_19;

	vector<int> amino_acids;
	vector<int> frag_idxs;         // these say what type of fragment's peaks appear in each row, row i
								   // doesn't have fragments of type i, but it has type frag_idxs[i], which
								   // is a bit confusing...
	vector< vector<float> > intens; // frag,cut idx
};


void read_data_into_training_peptides(const FileManager& fm, Config *config, 
									  PeakRankModel& rm, vector<TrainingPeptide>& tps);


void read_training_peptides_from_file(char *file, vector<TrainingPeptide>& all_tps,
									  int num_tp = -1);

void convert_tps_to_partial_denovo(Config *config, 
								   vector<TrainingPeptide>& all_tps, 
								   int num_to_add = 0,
								   bool verbose = false);


void write_training_peptides_to_file(char *file, const vector<TrainingPeptide>& all_tps);


void convert_list_to_trianing_peptide_file(char *list, char *tp_file,
										  char *model=NULL, char *ptm_line=NULL);


void select_training_peptides(const vector<TrainingPeptide>& all_tps, 
							  vector<int>& selected_idxs,
							  int charge=0, 
							  int mobility=0, 
							  int min_length=-1, 
							  int max_length=100, 
							  mass_t min_pm_with_19=-1, 
							  mass_t max_pm_with_19=10000);


void generate_size_reports();

void create_training_sets();

void train_all();


#endif
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -