📄 peakrankmodel.cpp
字号:
{
cut_mass+=aa2mass[tp.amino_acids[cut_idx-1]];
if (intens[cut_idx]>=0)
{
RankBoostSample rbs;
const FragmentType& frag = config->get_fragment(frag_type_idx);
if (feature_set_type == 0)
{
fill_simple_peak_features(tp.amino_acids, cut_idx,
cut_mass, tp.pm_with_19, spec_charge, frag, rbs);
}
else if (feature_set_type == 1)
{
fill_advanced_peak_features(tp.amino_acids, cut_idx,
cut_mass, tp.pm_with_19, spec_charge, frag, rbs);
}
else if (feature_set_type == 2)
{
fill_partial_denovo_peak_features(tp.n_mass, c_term_mass, tp.amino_acids, cut_idx,
cut_mass, tp.pm_with_19, spec_charge, frag, tp.best_n_removed, tp.best_c_removed, rbs);
}
else
{
cout << "Error: feature set type not supported: " << feature_set_type << endl;
exit(1);
}
rbs.group_idx = i;
rbs.rank_in_group = cut_ranks[cut_idx];
rbs.tag1 = rank_ds.get_num_samples(); // sample idx
rbs.tag2 = cut_idx; // cut idx
rbs.tag3 = tp.amino_acids.size(); // peptide length, this tag is used to filter
// the error estimates for a specific length
rank_ds.add_sample(rbs);
peak_intens.push_back(intens[cut_idx]);
}
}
ps.num_peaks = rank_ds.get_num_samples() - ps.peak_start_idx;
peak_starts.push_back(ps);
max_annotated_intens.push_back(max_ann_inten);
}
rank_ds.set_num_groups(sample_tps.size());
}
/***************************************************************************
Initializes the models according to specified max charge defaults (if not
already initialized). Assumes all training files have the same prefix
and differ on the charge/size/mobility idxs).
****************************************************************************/
void PeakRankModel::train_all_partition_models(
int frag_fill_type,
char *prefix_path,
int sel_charge,
int sel_size_idx,
int sel_mobility,
int frag_type_idx,
char *report_dir,
int num_rounds,
char *test_set,
int test_peptide_length,
char *stop_signal_file,
weight_t max_weight_ratio)
{
const int max_charge = size_thresholds.size() -1;
this->feature_set_type = frag_fill_type;
if (partition_models.size()<max_charge+1)
partition_models.resize(max_charge+1);
int charge;
for (charge=1; charge<=max_charge; charge++)
{
if (sel_charge>0 && sel_charge != charge)
continue;
const int num_size_idxs = size_thresholds[charge].size()+1;
if (partition_models[charge].size()<num_size_idxs)
partition_models[charge].resize(num_size_idxs);
cout << "FRAG FILL TYPE: " << frag_fill_type << endl;
cout << "CHARGE " << charge << " , # size idxs: " << num_size_idxs << endl;
cout << "Test peptide length: " << test_peptide_length << endl;
int size_idx;
for (size_idx=0; size_idx<num_size_idxs; size_idx++)
{
if (partition_models[charge][size_idx].size()<=NONMOBILE)
partition_models[charge][size_idx].resize(NONMOBILE+1,NULL);
int mobility;
for (mobility=MOBILE; mobility<=NONMOBILE; mobility++)
{
if ((sel_charge>=0 && sel_charge != charge) ||
(sel_size_idx>=0 && sel_size_idx != size_idx) ||
(sel_mobility>=0 && sel_mobility != mobility) )
continue;
char tr_file[256];
sprintf(tr_file,"%s_%d_%d_%d.txt",prefix_path,charge,size_idx,mobility);
if (! partition_models[charge][size_idx][mobility])
partition_models[charge][size_idx][mobility] = new PartitionModel;
cout << "Training models for charge " << charge << " size " << size_idx<< " mobility " <<
mobility << endl;
cout << "Max weight ratio " << max_weight_ratio << endl;
partition_models[charge][size_idx][mobility]->set_partition_name(get_peak_rank_model_name(),
charge, size_idx, mobility);
partition_models[charge][size_idx][mobility]->train_partition_model(this,
tr_file,charge,size_idx,mobility,frag_type_idx,report_dir,
num_rounds,test_set, test_peptide_length, stop_signal_file,
max_weight_ratio);
cout << endl;
}
}
}
}
/**********************************************************************
Lists the idxs of the partition models
charge size_idx mobility (not fragment)
***********************************************************************/
void PeakRankModel::list_all_model_idxs()
{
int charge;
for (charge=1; charge<size_thresholds.size(); charge++)
{
int size_idx;
for (size_idx = 0; size_idx<=size_thresholds[charge].size(); size_idx++)
{
int mobility;
for (mobility=MOBILE; mobility<=NONMOBILE; mobility++)
{
cout << charge << " " << size_idx << " " << mobility << endl;
}
}
}
}
void PeakRankModel::print_model_init_stats() const
{
if (this->feature_set_type<=2)
{
const int num_tabs = 2+(peak_rank_model_name.length()+1)/8;
string gap_str ="";
int i;
for (i=0; i<num_tabs; i++)
gap_str += "\t";
cout << gap_str;
for (i=0; i<10; i++)
cout << " " << i;
cout << endl;
int charge;
for (charge=1; charge<size_thresholds.size(); charge++)
{
int size_idx;
for (size_idx = 0; size_idx<=size_thresholds[charge].size(); size_idx++)
{
int mobility;
for (mobility=MOBILE; mobility<=NONMOBILE; mobility++)
{
if (this->partition_models[charge][size_idx][mobility])
partition_models[charge][size_idx][mobility]->print_model_stats();
}
}
}
cout << endl;
}
}
// set intens so maximal intensity of the fragments is 1.0
void find_ranks(const vector<intensity_t>& intens, vector<int>& ranks)
{
vector<inten_pair> pairs;
pairs.resize(intens.size());
int num_with_inten=0;
int i;
for (i=0; i<intens.size(); i++)
{
pairs[i].idx=i;
pairs[i].inten = intens[i];
if (intens[i]>=0)
num_with_inten++;
}
sort(pairs.begin(),pairs.end());
const int missing_peak_rank = (intens.size()+num_with_inten)/2;
ranks.resize(intens.size());
for (i=0; i<pairs.size(); i++)
{
if (pairs[i].inten <0)
{
ranks[pairs[i].idx]=-1;
}
else if (pairs[i].inten == 0)
{
ranks[pairs[i].idx]=missing_peak_rank;
}
else
ranks[pairs[i].idx]=i+1;
}
}
void normalize_intens(vector<intensity_t>& intens)
{
if (intens.size()<=1)
return;
float max_inten = intens[1];
int i;
for (i=2; i<intens.size(); i++)
if (intens[i]>max_inten)
max_inten = intens[i];
if (max_inten<=0.0)
return;
float one_over = 1.0 / max_inten;
for (i=1; i<intens.size(); i++)
intens[i] *= one_over;
}
void PeakRankModel::set_mass_detection_defaults()
{
max_detected_mass = 2000.0;
charge_min_mass_coefficients.resize(4,0);
charge_min_mass_coefficients[1]=0.24;
charge_min_mass_coefficients[2]=0.12;
charge_min_mass_coefficients[3]=0.08;
}
void PeakRankModel::set_size_thresholds()
{
size_thresholds.resize(4);
size_thresholds[1].push_back(1150.0);
size_thresholds[1].push_back(1400.0);
size_thresholds[2].push_back(1100.0);
size_thresholds[2].push_back(1300.0);
size_thresholds[2].push_back(1600.0);
size_thresholds[2].push_back(1900.0);
size_thresholds[2].push_back(2400.0);
size_thresholds[3].push_back(1950.0);
size_thresholds[3].push_back(2450.0);
size_thresholds[3].push_back(3000.0);
}
mass_t PeakRankModel::calc_min_detected_mass(mass_t pm_with_19, int charge) const
{
if (charge>= charge_min_mass_coefficients.size())
charge = charge_min_mass_coefficients.size()-1;
return (charge_min_mass_coefficients[charge]*pm_with_19);
}
bool PeakRankModel::read_peak_rank_model(Config *_config, const char *name, bool silent_ind,
int specific_charge, int specific_size, int specific_mobility)
{
config = _config;
peak_rank_model_name = string(name);
const string model_name_prefix = config->get_resource_dir() + "/" + string(name);
const string main_model_file = model_name_prefix + "_rank_model.txt";
ifstream main_stream(main_model_file.c_str());
if (! main_stream.is_open() || ! main_stream.good())
{
cout << "Error: couldn't open file for reading: " << main_model_file << endl;
exit(1);
}
string m_name;
int num_aa_labels=0;
feature_set_type = -1;
main_stream >> m_name;
main_stream >> feature_set_type;
main_stream >> num_aa_labels;
if (feature_set_type<0 || feature_set_type>5 || num_aa_labels<19)
{
cout << "Error: bad input parameters in PeakRankModel!" << endl;
exit(1);
}
model_aa_labels.resize(num_aa_labels);
int i;
for (i=0; i<num_aa_labels; i++)
main_stream >> model_aa_labels[i];
convert_session_aas_to_model_aas();
max_detected_mass=-1;
main_stream >> this->max_detected_mass;
int num_min_ratios=-1;
main_stream >> num_min_ratios;
this->charge_min_mass_coefficients.resize(num_min_ratios,1);
for (i=0; i<num_min_ratios; i++)
main_stream >> charge_min_mass_coefficients[i];
int num_charges=0;
main_stream >> num_charges;
size_thresholds.resize(num_charges);
partition_models.resize(num_charges);
for (i=0; i<num_charges; i++)
{
int charge=-1;
int num_sizes=0;
main_stream >> charge;
main_stream >> num_sizes;
if (charge != i)
{
cout << "Error: charge mismatch is size thresholds!" << endl;
exit(1);
}
size_thresholds[i].resize(num_sizes);
int j;
for (j=0; j<num_sizes; j++)
main_stream >> size_thresholds[i][j];
if (num_sizes>0)
{
partition_models[i].resize(num_sizes+1);
for (j=0; j<partition_models[i].size(); j++)
partition_models[i][j].resize(4);
}
}
if (! silent_ind)
cout << "Peak rank feature set type: " << feature_set_type << endl;
// read parition models
int num_models_read=0;
int charge;
for (charge=1; charge<partition_models.size(); charge++)
{
if (specific_charge>=0 && charge != specific_charge)
continue;
int size_idx;
for (size_idx=0; size_idx<partition_models[charge].size(); size_idx++)
{
if (specific_size>=0 && size_idx != specific_size)
continue;
int mobility;
for (mobility=MOBILE; mobility<=NONMOBILE; mobility++)
{
if (specific_mobility>=0 && mobility != specific_mobility)
continue;
partition_models[charge][size_idx][mobility] = new PartitionModel;
partition_models[charge][size_idx][mobility]->set_feature_set_type(feature_set_type);
num_models_read+=partition_models[charge][size_idx][mobility]->read_partition_model(model_name_prefix,
config,charge,size_idx,mobility);
partition_models[charge][size_idx][mobility]->set_partition_name(name,charge,size_idx,mobility);
}
}
}
if (! silent_ind)
cout << "Read " << num_models_read << " fragment rank models..." << endl;
return true;
}
void PeakRankModel::write_peak_rank_model(char *name, char *out_dir)
{
const string model_name_prefix = (out_dir? string(out_dir) : config->get_resource_dir())
+ "/" + string(name);
const string main_model_file = model_name_prefix + "_rank_model.txt";
ofstream main_stream(main_model_file.c_str());
if (! main_stream.good())
{
cout << "Error: couldn't open file for writing: " << main_model_file << endl;
exit(1);
}
// name
main_stream << name << " " << this->feature_set_type << endl;
// aa labels
main_stream << model_aa_labels.size();
int a;
for (a=0; a<model_aa_labels.size(); a++)
main_stream << " " << model_aa_labels[a];
main_stream << endl;
// min max detected masses
int charge;
main_stream << setprecision(5) << max_detected_mass << endl;
main_stream << charge_min_mass_coefficients.size();
for (charge=0; charge<charge_min_mass_coefficients.size(); charge++)
main_stream << " " << charge_min_mass_coefficients[charge];
main_stream << endl;
// size thresholds
main_stream << size_thresholds.size() << endl;
for (charge=0; charge<size_thresholds.size(); charge++)
{
main_stream << charge << " " << size_thresholds[charge].size();
int size_idx;
for (size_idx=0; size_idx<size_thresholds[charge].size(); size_idx++)
main_stream << " " << setprecision(5) << size_thresholds[charge][size_idx];
main_stream << endl;
}
// write the parition models
int models_written = 0;
for (charge=0; charge<partition_models.size(); charge++)
{
int size_idx;
for (size_idx=0; size_idx<partition_models[charge].size(); size_idx++)
{
int mobility;
for (mobility=MOBILE; mobility<=NONMOBILE; mobility++)
{
if (partition_models[charge][size_idx][mobility])
{
int n=partition_models[charge][size_idx][mobility]->write_partition_model(model_name_prefix);
models_written += n;
}
}
}
}
cout << "Wrote rank model: " << name << endl;
cout << "A total of " << models_written << " fragment models were written..." << endl;
}
void TrainingPeptide::create_training_peptide(const PeakRankModel& rm,
const AnnotatedSpectrum& as)
{
Config *config = as.get_config();
amino_acids = as.get_peptide().get_amino_acids();
length = amino_acids.size();
charge = as.get_charge();
mobility = get_proton_mobility(as.get_peptide(),charge);
pm_with_19 = as.get_true_mass_with_19();
const mass_t min_detected_mass = rm.calc_min_detected_mass(as.get_true_mass_with_19(),charge);
const mass_t max_detected_mass = rm.get_max_detected_mass();
const vector<Breakage>& breakages = as.get_breakages();
const bool verbose=false;
// find all participating fragment types
frag_idxs.clear();
int b;
for (b=1; b<breakages.size()-1; b++)
{
int f;
for (f=0; f<breakages[b].fragments.size(); f++)
{
const int frag_idx = breakages[b].fragments[f].frag_type_idx;
if (breakages[b].fragments[f].intensity>0.01 &&
this->get_frag_idx_pos(frag_idx)<0)
frag_idxs.push_back(frag_idx);
}
}
intens.resize(frag_idxs.size());
// cout << "Frags: " << ranks.size() << endl;
if (verbose)
cout << as.get_peptide().as_string(config) << endl;
int f;
for (f=0; f<frag_idxs.size(); f++)
{
const int frag_idx = frag_idxs[f];
const FragmentType& frag = config->get_fragment(frag_idx);
vector<mass_t>& frag_intens = intens[f];
frag_intens.resize(length,0);
int num_inten_peaks=0;
int i;
for (i=0; i<length; i++)
{
const int pos = breakages[i].get_position_of_frag_idx(frag_idx);
if (pos>=0)
{
frag_intens[i]=breakages[i].fragments[pos].intensity;
num_inten_peaks++;
}
}
frag_intens[0]=num_inten_peaks;
for (i=1; i<frag_intens.size(); i++)
{
const mass_t exp_mass = frag.calc_expected_mass(breakages[i].mass,pm_with_19);
if (exp_mass<min_detected_mass || exp_mass>max_detected_mass)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -