📄 config.cpp
字号:
#include "Config.h"
#include "auxfun.h"
void Config::init_with_defaults()
{
int i;
mass_spec_type=ESI_MASS_SPEC; // default type
config_file = "";
model_name = "";
ind_read_PTM_file = false;
max_n_term_mod = 0;
max_c_term_mod = 0;
min_n_term_mod = 0;
min_c_term_mod = 0;
tolerance = 0.5;
pm_tolerance = 2.5;
local_window_size=200;
max_number_peaks_per_local_window=15;
number_of_strong_peaks_per_local_window=10;
// random_prob = 0.1;
max_combo_mass = 0;
max_edge_length = 2;
max_charge_for_size=10; // the size of size_thresholds
set_digest_type(0);
need_to_estimate_pm = 1;
use_spectrum_charge = 0;
use_spectrum_mz = 0;
filter_flag = 1;
need_to_normalize = 1;
itraq_mode = 0;
terminal_score = 10;
digest_score = 10;
forbidden_pair_penalty = 25;
strong_type1_idx =-1;
strong_type2_idx =-1;
min_ranges.clear();
max_ranges.clear();
min_exclude_range=999999;
max_exclude_range=NEG_INF;
init_standard_aas();
session_aas = standard_aas;
init_model_size_and_region_thresholds();
int c;
for (c=max_charge_for_size; c>=0; c--)
init_regional_fragment_set_defaults(0,c);
// these all operate on the original aas
session_tables.init_for_standard_aas();
// insert labels of original aas
label2aa.clear();
const vector<string>& aa2label = get_aa2label();
for (i=0; i<aa2label.size(); i++)
label2aa.insert(STRING2INT_MAP::value_type(aa2label[i],i));
calc_aa_combo_masses();
set_aa_variants();
fill_allowed_double_edges();
init_allowed_node_masses();
char PTM_file[256];
strcpy(PTM_file,get_resource_dir().c_str());
strcat(PTM_file,"/");
strcat(PTM_file,"PepNovo_PTMs.txt");
read_PTM_file(PTM_file);
// all_fragments.print();
}
void Config::init_standard_aas()
{
int i;
standard_aas.clear();
for (i=Ala; i<=Val; i++)
standard_aas.push_back(i);
}
void Config::init_model_size_and_region_thresholds()
{
size_thresholds.clear();
size_thresholds.resize(max_charge_for_size+1);
// region thresholds
region_thresholds.resize(2);
region_thresholds[0]=0.225;
region_thresholds[1]=0.775;
}
void Config::add_exclude_range(mass_t min_range, mass_t max_range)
{
if (min_range>=max_range)
return;
this->min_ranges.push_back(min_range);
this->max_ranges.push_back(max_range);
if (this->min_exclude_range>min_range)
min_exclude_range = min_range;
if (this->max_exclude_range<max_range)
max_exclude_range = max_range;
}
/********************************************************
Sets the diegest type.
Assumes that each digest type has a set of N or C terminal
preferred amino acids
*********************************************************/
void Config::set_digest_type(int type)
{
digest_type = type;
n_term_digest_aas.clear();
c_term_digest_aas.clear();
if (digest_type == NON_SPECIFIC_DIGEST)
return;
if (digest_type == TRYPSIN_DIGEST)
{
c_term_digest_aas.push_back(Lys);
c_term_digest_aas.push_back(Arg);
return;
}
cout << "Error: digest type not supported: " << type << endl;
exit(1);
}
/*********************************************************
Inits the regional fragment sets (resizes according to the
charges, size_idxs, and region_idxs). If set_type = 0,
each region is given all fragments that are relevant.
If set type = 0, uses all
**********************************************************/
void Config::init_regional_fragment_set_defaults(int set_type, int charge)
{
const int num_regions = region_thresholds.size()+1;
if (max_charge_for_size <= charge)
{
max_charge_for_size = charge;
regional_fragment_sets.resize(max_charge_for_size+1);
}
regional_fragment_sets[charge].resize(size_thresholds[charge].size());
int j;
for (j=0; j<size_thresholds[charge].size(); j++)
{
int k;
regional_fragment_sets[charge][j].resize(num_regions);
for (k=0; k<num_regions; k++)
{
if (set_type == 0)
{
regional_fragment_sets[charge][j][k].init_with_all_types(charge,this);
}
else
{
cout << "Error: bad option for init!"<< endl;
exit(1);
}
}
}
}
// selects the fragments to be used, uses a cutoff that is X times random prob)
void Config::select_fragments_in_sets(score_t min_prob_coef, int max_num_frags)
{
int c;
for (c=1; c<regional_fragment_sets.size(); c++)
{
int s;
for (s=0; s<regional_fragment_sets[c].size(); s++)
{
int r;
for (r=0; r<regional_fragment_sets[c][s].size(); r++)
{
float min_prob = min_prob_coef * get_regional_random_probability(c,s,r);
regional_fragment_sets[c][s][r].select_fragments_with_minimum_prob(min_prob,
max_num_frags);
}
}
}
}
// For each regional fragments selects all fragments that have a minimal probability
// to be strong.
// also look for the two strongest fragment types and store them in the
// strong_type1_idx and strong_type2_idx variables
void Config::select_strong_fragments(int charge,
score_t min_prob, int max_num_strong, bool verbose)
{
const vector<FragmentType>& all_fragment_types = get_all_fragments();
int s;
for (s=0; s<regional_fragment_sets[charge].size(); s++)
{
int r;
for (r=0; r<regional_fragment_sets[charge][s].size(); r++)
{
regional_fragment_sets[charge][s][r].select_strong_fragments(this,min_prob,max_num_strong);
const vector<int>& strong_type_idxs = regional_fragment_sets[charge][s][r].get_strong_frag_type_idxs();
// see if these frags should be put in the strong two fragments
int i;
for (i=0; i<strong_type_idxs.size(); i++)
{
int frag_idx = strong_type_idxs[i];
if (strong_type1_idx == -1)
{
strong_type1_idx = frag_idx;
continue;
}
if (strong_type2_idx == -1 &&
frag_idx != strong_type1_idx)
{
strong_type2_idx = frag_idx;
continue;
}
// make sure that type2 has the lower probability
if (all_fragment_types[strong_type1_idx].prob<
all_fragment_types[strong_type2_idx].prob)
{
int tmp;
tmp=strong_type1_idx;
strong_type1_idx=strong_type1_idx;
strong_type1_idx=tmp;
}
if (all_fragment_types[frag_idx].prob>
all_fragment_types[strong_type2_idx].prob)
{
strong_type2_idx = frag_idx;
}
}
}
}
}
// returns the region idx for a (breakge) mass
int Config::calc_region_idx(mass_t break_mass, mass_t pm_with_19, int charge,
mass_t min_peak_mass, mass_t max_peak_mass) const
{
// return 0;
const int n_threshes = region_thresholds.size();
int i;
vector<mass_t> threshes;
threshes.resize(region_thresholds.size());
for (i=0; i<region_thresholds.size(); i++)
threshes[i] = pm_with_19 * region_thresholds[i];
if (charge<=2)
{
mass_t alt_first_thresh = (min_peak_mass>0) ? min_peak_mass + 150 : threshes[0];
mass_t alt_last_thresh = (max_peak_mass>0) ? max_peak_mass - 150 : threshes[n_threshes-1];
if (break_mass<threshes[0] || break_mass<alt_first_thresh)
return 0;
if (break_mass>threshes[n_threshes-1] || break_mass>alt_last_thresh)
return n_threshes;
for (i=1; i<n_threshes; i++)
if (break_mass<threshes[i])
break;
return i;
}
else
{
if (break_mass<threshes[0])
return 0;
if (break_mass>threshes[n_threshes-1])
return n_threshes;
for (i=1; i<n_threshes; i++)
if (break_mass<threshes[i])
break;
return i;
}
}
void Config::set_size_thresholds_according_to_set_of_masses(int charge,
vector<mass_t>& spectra_masses)
{
const int num_spectra = spectra_masses.size();
if (size_thresholds.size()<=charge)
size_thresholds.resize(charge+1);
size_thresholds[charge].clear();
if (num_spectra<1500)
return;
sort(spectra_masses.begin(),spectra_masses.end());
if (num_spectra<3000)
{
int idx = num_spectra/2;
size_thresholds[charge].push_back(spectra_masses[idx]);
return;
}
if (num_spectra<10000)
{
// use 3 sizes
int idx1 = num_spectra/3;
int idx2 = idx1+idx1;
size_thresholds[charge].push_back(spectra_masses[idx1]);
size_thresholds[charge].push_back(spectra_masses[idx2]);
}
// for large datasets of spectra, use the following rule
const mass_t min_pm_diff = 200.0;
const int min_num_spectra_per_model = 25000;
const int num_models = spectra_masses.size()/min_num_spectra_per_model;
if (num_models <=1)
{
size_thresholds[charge].clear();
size_thresholds[charge].push_back(POS_INF);
return;
}
int i=0;
int prev_i=0;
int next_idx = min_num_spectra_per_model;
mass_t last_mass =0;
size_thresholds[charge].clear();
while (i<spectra_masses.size())
{
if (spectra_masses[i]-last_mass>min_pm_diff &&
i>= next_idx)
{
size_thresholds[charge].push_back(spectra_masses[i]);
cout << "charge " << charge << "\t size " << size_thresholds[charge].size() << " \tmasss " <<
setprecision(1) << fixed << size_thresholds[charge][size_thresholds[charge].size()-1] <<
"\t (spectra " << i-prev_i << ")" << endl;
last_mass = spectra_masses[i];
prev_i = i;
int md_count = size_thresholds[charge].size();
if (md_count == num_models -1 || spectra_masses.size() - i < 1.5*min_num_spectra_per_model)
break;
next_idx = i + (spectra_masses.size() - i)/(num_models-md_count);
}
i++;
}
size_thresholds[charge].push_back(POS_INF);
cout << "charge " << charge << "\t size " << size_thresholds[charge].size() <<
" \tmasss " << setprecision(1) << fixed << size_thresholds[charge][size_thresholds[charge].size()-1] <<
"\t (spectra " << spectra_masses.size()-prev_i << ")" << endl;
}
void Config::print_size_thresholds() const
{
int charge;
for (charge=0; charge<size_thresholds.size(); charge++)
{
cout << charge << "\t" << size_thresholds[charge].size();
int t;
for (t=0; t<size_thresholds[charge].size(); t++)
cout << "\t" << size_thresholds[charge][t];
cout << endl;
}
}
int Config::determine_charge(Peptide& pep, mass_t m_over_z) const
{
int c;
pep.calc_mass(this);
mass_t pep_mass_with_19 = pep.get_mass()+MASS_OHHH;
for (c=1; c<10; c++)
{
mass_t calc_mass_with_19 = m_over_z * c - (1.0023 * (c-1));
if (fabs(calc_mass_with_19- pep_mass_with_19)<10)
return c;
}
return -1;
}
int Config::get_max_session_aa_idx() const
{
const vector<int>& aas = get_session_aas();
int max_aa=-1;
int i;
for (i=0; i<aas.size(); i++)
if (aas[i]>max_aa)
max_aa=aas[i];
return max_aa;
}
/***********************************************************
// returns the idx of an aa from its label
// -1 if label is not found
***********************************************************/
int Config::get_aa_from_label(const string& label) const
{
STRING2INT_MAP::const_iterator iter = label2aa.find(label);
if (iter == label2aa.end())
return -1;
return (*iter).second;
}
int Config::get_aa_with_position_from_label(const string& label, int position) const
{
STRING2INT_MAP::const_iterator iter;
for (iter = label2aa.begin();iter != label2aa.end(); iter++)
{
int aa_idx = (*iter).second;
if (session_tables.get_aa_position(aa_idx) != position ||
label[0] != (*iter).first[0])
continue;
if (label == session_tables.get_aa2label(aa_idx))
return aa_idx;
}
return -1;
}
void Config::print_fragments(ostream &os) const
{
int i;
os << "#ALL FRAGMENTS " << all_fragments.fragments.size() << endl;
for (i=0; i<all_fragments.fragments.size(); i++)
all_fragments.fragments[i].write_fragment(os);
}
void Config::print_regional_fragment_sets(ostream &os) const
{
os << "#FRAGMENT SETS" << endl;
int c;
for (c=regional_fragment_sets.size()-1; c>=0; c--)
{
int s;
for (s=regional_fragment_sets[c].size()-1; s>=0; s--)
{
int r;
for (r=regional_fragment_sets[c][s].size()-1; r>=0; r--)
{
int f;
const vector<int>& frag_idxs = regional_fragment_sets[c][s][r].get_frag_type_idxs();
if (frag_idxs.size()>0)
{
os << c << " " << s << " " << r << " " << frag_idxs.size() << " " <<
setprecision(5) << regional_fragment_sets[c][s][r].get_rand_prob() << endl;
int i;
// write fragments
for (f=0; f<frag_idxs.size(); f++)
{
os << left << setw(9) << all_fragments.get_fragment(frag_idxs[f]).label
<< " " << setprecision(4) << regional_fragment_sets[c][s][r].get_frag_prob(f)<< " " << endl;
// setprecision(5) << all_fragments.get_fragment(frag_idxs[f]).offset << endl;
}
// write strong
const vector<int>& strong = regional_fragment_sets[c][s][r].get_strong_frag_type_idxs();
os << "strong " << strong.size();
for (i=0; i<strong.size(); i++)
os << " " << get_fragment(strong[i]).label;
os << endl;
// write combo
const vector<FragmentCombo>& combos = regional_fragment_sets[c][s][r].get_frag_type_combos();
os << "combos " << combos.size() << endl;
for (i=0; i<combos.size(); i++)
combos[i].print_combo(this,os);
os << endl;
}
}
}
}
}
/************************************************************
Reads the fragments from the stream
*************************************************************/
void Config::read_fragments(istream& is)
{
int i,num_frags=-1;
char buff[128];
is.getline(buff,128);
if (sscanf(buff,"#ALL FRAGMENTS %d",&num_frags) != 1)
{
cout << "Error: bad line in fragments file: " << buff << endl;
if (strlen(buff)<4)
{
cout << "This error can possibly be due to Wnix/Windows issues. Try running dos2unix on all \".txt\" files in the Models directory." << endl;
}
exit(1);
}
all_fragments.clear_set();
all_strong_fragment_type_idxs.clear();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -