📄 peakrankmodel.h
字号:
#ifndef __PEAKRANKMODEL_H__
#define __PEAKRANKMODEL_H__
#include "includes.h"
#include "AnnotatedSpectrum.h"
#include "BasicDataStructs.h"
#include "RankBoost.h"
#define MOBILE 1
#define PARTIALLYMOBILE 2
#define NONMOBILE 3
void convert_seq_path_to_peptide_soluition(Config *config, const SeqPath& seq_path, PeptideSolution& sol);
void convert_seq_path_to_peptide_soluition_and_fill_in_aas(Config *config,
const Peptide& correct_pep,
const SeqPath& seq_path,
PeptideSolution& sol);
void push_back_all_RHK_pairs(vector<string>& real_feature_names,
string prefix_label);
int get_proton_mobility(const Peptide& pep, int charge);
int get_proton_mobility(int charge, int num_arg, int num_his, int num_lys);
int calc_RKH_combo_idx (int r, int k, int h);
void find_ranks(const vector<intensity_t>& intens, vector<int>& ranks);
void normalize_intens(vector<intensity_t>& intens);
void calc_combined_peak_ranks(const vector< vector<float> >& intens,
vector< vector<int> >& peak_ranks);
struct PeakScore {
PeakScore() : cut_idx(NEG_INF), frag_idx(NEG_INF), rank_score(NEG_INF) {};
int cut_idx;
int frag_idx;
float rank_score;
};
struct PeptidePeakPrediction {
PeptidePeakPrediction() : num_frags(0), most_basic_missing_on_n(0), most_basic_missing_on_c(0) {}
void print_ranks_vs_intens(const vector< vector<float> >& intens) const;
void make_rank_tables(const vector< vector<float> >& intens,
vector< vector<int> >& observed_ranks, vector< vector<int> >& predicted_ranks) const;
void make_rank_tables_for_combined_peak_predictions(
const vector< vector<float> >& intens,
vector< vector<int> >& observed_ranks,
vector< vector<int> >& predicted_ranks) const;
int num_frags;
int most_basic_missing_on_n, most_basic_missing_on_c;
vector<int> amino_acids;
vector<int> frag_idxs;
vector< vector<float> > rank_scores;
vector< PeakScore> combined_peak_scores; // used for the combined peak models (feature_type_set >= 3)
};
struct PeakStart {
PeakStart() : peptide_sample_idx(-1), peak_start_idx(-1), num_peaks(0) {};
int peptide_sample_idx;
int peak_start_idx;
int num_peaks;
};
struct TrainingPeptide;
class PeakRankModel;
/***************************************************************************
// model for a specific charge/size_idx/mobility
// has models for {set of fragments}
****************************************************************************/
class PartitionModel {
friend class PeakRankModel;
public:
PartitionModel() : partition_name("empty"), feature_set_type(NEG_INF),
charge(NEG_INF), size_idx(NEG_INF), mobility(NEG_INF), max_frag_idx(NEG_INF),
num_features_per_frag(0) {};
const vector<int>& get_fragment_type_idxs() const { return fragment_type_idxs; }
// works for all models (both separate frags and the combined model)
void calc_peptides_peaks_rank_scores(const PeakRankModel *prank,
const PeptideSolution& sol,
mass_t min_detected_mass,
mass_t max_detected_mass,
PeptidePeakPrediction& ppp,
int feature_set_type = 1,
const vector<int>* ptr_frag_type_idxs = NULL) const;
int read_combined_partition_model(const string& path, Config *config,
int _charge, int _size_idx, int _mobility);
int write_combined_partition_model(const string& path);
void train_combined_partition_model(
const PeakRankModel *prank,
char *sample_file_path,
int _charge,
int _size_idx,
int _mobility,
int num_frags = 3,
char *report_dir = NULL,
int max_rounds = -1,
char *test_set = NULL,
int test_peptide_length=-1,
char *stop_signal_file = NULL,
weight_t max_weight_ratio = 5.0);
void fill_combined_peak_features(
const PeakRankModel *prank,
const vector<int>& amino_acids,
const int cut_idx,
const mass_t cut_mass,
const PeptideSolution& sol,
const FragmentType& frag,
const int frag_pos_idx,
RankBoostSample& sample) const;
void fill_combined_simple_peak_features(
const PeakRankModel *prank,
const vector<int>& amino_acids,
const int cut_idx,
const mass_t cut_mass,
const PeptideSolution& sol,
const FragmentType& frag,
const int frag_pos_idx,
RankBoostSample& sample,
bool verbose = false) const;
void fill_combined_dnv_peak_features(
const PeakRankModel *prank,
const mass_t n_mass, // this is where the possibly partial peptide starts
const mass_t c_mass,
const vector<int>& amino_acids,
const int cut_idx,
const mass_t cut_mass,
const PeptideSolution &sol,
const FragmentType& frag,
const int position_idx_in_model_fragment_type_idxs,
RankBoostSample& sample) const;
void set_combined_feature_names_in_rankboost_model(const PeakRankModel *prank);
void set_combined_simple_feature_names_in_rankboost_model(const PeakRankModel *prank);
void set_combined_dnv_feature_names_in_rankboost_model(const PeakRankModel *prank);
void train_partition_model(PeakRankModel *prank,
char *sample_file_path,
int _charge,
int _size_idx,
int _mobility,
int frag_idx = -1,
char *report_dir = NULL,
int max_rounds = -1,
char *test_set = NULL,
int test_peptide_length=-1,
char *stop_signal_file = NULL,
weight_t max_weight_ratio = 5.0);
int read_partition_model(const string& path, Config *config,
int _charge, int _size_idx, int _mobility);
int write_partition_model(const string& path);
void print_combined_peak_pairs(const vector<idx_weight_pair>& pair_idxs,
const vector<TrainingPeptide>& tps,
const RankBoostDataset& ds,
PeakRankModel *prank,
int max_examples=-1,
ostream& os = cout) const;
void simple_print_peak_pairs(const vector<idx_weight_pair>& pair_idxs,
const vector<TrainingPeptide>& tps,
const RankBoostDataset& ds,
PeakRankModel *prank,
int frag,
int max_examples=-1,
ostream& os = cout) const;
const string& get_partition_name() const { return partition_name; }
void set_partition_name(const string& peak_rank_model_name, int charge,
int size_idx, int mobility);
void set_feature_set_type(int t) { feature_set_type = t; }
void print_model_stats() const;
private:
string partition_name; // name_charge_sizeidx_mobility
int feature_set_type;
int charge;
int size_idx;
int mobility;
int max_frag_idx;
vector<int> fragment_type_idxs;
vector<RankBoostModel> frag_models;
int num_features_per_frag;
RankBoostModel combined_frag_boost_model;
};
/***********************************************************************
Container class for all charge/size/mobility partition models
************************************************************************/
class PeakRankModel {
public:
//
bool read_peak_rank_model(Config *_config, const char *name, bool silent_ind=false,
int specific_charge=-1, int specific_size=-1, int specific_mobility=-1);
//
void write_peak_rank_model(char *name, char *out_dir = NULL);
void calc_peptide_predicted_scores(const PeptideSolution& sol,
PeptidePeakPrediction& ppp,
int specific_size = -1,
const vector<int>* ptr_frag_type_idxs = NULL) const;
bool make_peak_prediction_table(
const PeptideSolution& sol,
const vector< vector<intensity_t> >& intens,
int num_peaks) const;
void set_max_detected_mass(mass_t m) { max_detected_mass = m; }
mass_t get_max_detected_mass() const { return max_detected_mass; }
int get_num_model_aas() const { return model_aa_labels.size(); }
const vector<string>& get_model_aa_labels() const { return model_aa_labels; }
mass_t calc_min_detected_mass(mass_t pm_with_19, int charge) const;
void set_config(Config *con) { config = con; }
Config *get_config() const { return config; }
const vector<string>& get_binary_names() const { return binary_feature_names; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -