📄 discretepeakmodel.cpp
字号:
for (t=0; t<all_tables[size_idx][region_idx][f].size(); t++)
{
if ( ! all_tables[size_idx][region_idx][f][t].are_relevant_fragments_visible(&breakages[b]) )
continue;
all_tables[size_idx][region_idx][f][t].add_instance(&breakages[b]);
}
}
}
cout << endl;
}
// calculate probabilities and choose the best table for each fragment
// (the one that has maximal dkl).
// also assign for each fragment a table of independent probabilities
// (what you get if the fragment has no parents).
for (size_idx =0; size_idx < all_tables.size(); size_idx++)
{
int region_idx;
for (region_idx=0; region_idx<all_tables[size_idx].size(); region_idx++)
{
const vector<int>& frag_type_idxs = regional_fragments[charge][size_idx][region_idx].get_frag_type_idxs();
int f;
for (f=0; f<all_tables[size_idx][region_idx].size(); f++)
{
int t;
vector<double> dkl_sums;
dkl_sums.resize(all_tables[size_idx][region_idx][f].size(),0);
for (t=0; t<all_tables[size_idx][region_idx][f].size(); t++)
all_tables[size_idx][region_idx][f][t].calc_probs();
vector<score_t> ind_probs;
ind_probs.resize(num_peak_levels);
for (t=0; t<num_peak_levels; t++)
ind_probs[t] = all_tables[size_idx][region_idx][f][0].score_probs[t];
cout << "Tables for " << config.get_fragment(frag_type_idxs[f]).label << " ( charge: " <<
charge << " size: " << size_idx << " region: " << region_idx << " )" << endl;
all_tables[size_idx][region_idx][f][0].print_pretty(&config);
cout<< endl;
regional_models[charge][size_idx][region_idx].charge = charge;
regional_models[charge][size_idx][region_idx].size_idx = size_idx;
regional_models[charge][size_idx][region_idx].region_idx = region_idx;
// set the independent probs
regional_models[charge][size_idx][region_idx].independent_frag_tables[f] =
all_tables[size_idx][region_idx][f][0];
// use the independent model if this fragment has no parents
if (all_tables[size_idx][region_idx][f].size() == 1)
{
regional_models[charge][size_idx][region_idx].single_breakage_tables[f] =
all_tables[size_idx][region_idx][f][0];
continue;
}
vector<idx_dkl> pairs;
pairs.clear();
for (t=1; t<all_tables[size_idx][region_idx][f].size(); t++)
{
idx_dkl p;
p.idx = t;
p.dkl_sum = all_tables[size_idx][region_idx][f][t].calc_dkl_sum(ind_probs);
// all_tables[size_idx][region_idx][f][t].print_pretty(&config);
// cout << "DKL SUM: " << all_tables[size_idx][region_idx][f][t].calc_dkl_sum(ind_probs) << endl << endl;
pairs.push_back(p);
}
sort(pairs.begin(),pairs.end());
int i;
for (i=0; i<pairs.size() && i<7; i++)
{
string name;
all_tables[size_idx][region_idx][f][pairs[i].idx].make_table_name(&config,name);
cout << setw(6) << setprecision(4) << pairs[i].dkl_sum << " " << name << endl;
}
cout << endl;
// find combo with highest dkl, might want to give a bit preference to
// tables with 1 parent, or strong parents...
int two_frag_idx=-1, one_frag_idx =-1;
double max_two_frag_dkl=0, max_one_frag_dkl=0;
for (i=0; i<pairs.size(); i++)
{
const vector<int>& fields = all_tables[size_idx][region_idx][f][pairs[i].idx].get_fields();
if (fields[1]>=0 && fields[2]>=0)
{
if (pairs[i].dkl_sum>max_two_frag_dkl)
{
max_two_frag_dkl = pairs[i].dkl_sum;
two_frag_idx = pairs[i].idx;
}
}
else
{
if (pairs[i].dkl_sum>max_one_frag_dkl)
{
max_one_frag_dkl = pairs[i].dkl_sum;
one_frag_idx = pairs[i].idx;
}
}
}
int selected_table_idx=-1;
if (max_two_frag_dkl>max_one_frag_dkl && (max_two_frag_dkl/max_one_frag_dkl > 1.1))
{
selected_table_idx = two_frag_idx;
}
else
selected_table_idx = one_frag_idx;
regional_models[charge][size_idx][region_idx].single_breakage_tables[f] =
all_tables[size_idx][region_idx][f][selected_table_idx];
string name;
all_tables[size_idx][region_idx][f][selected_table_idx].make_table_name(&config,name);
cout << "Selected: " << name << endl << endl;
}
}
}
}
// writes all relevant info:
// thresholds and tables
void DiscretePeakModel::write_score_model(const char *name) const
{
string file_name = config.get_resource_dir() + "/" + string(name) + "_break_score.txt";
ofstream os(file_name.c_str());
if (! os.is_open() || ! os.good())
{
cout << "Error: couldn't open score model for writing: " << file_name << endl;
exit(1);
}
write_model_specifics(os);
int c;
int max_charge = 0;
for (c=0; c<regional_models.size(); c++)
if (regional_models[c].size()>0)
max_charge = c;
os << "#MAX_CHARGE " << max_charge << endl;
for (c=1; c<=max_charge; c++)
{
if (regional_models[c].size() == 0)
continue;
os << c << " " << regional_models[c].size() << endl; // num sizes
int size_idx;
for (size_idx=0; size_idx< regional_models[c].size(); size_idx++)
{
os << regional_models[c][size_idx].size() << endl; // num regions
int region_idx;
for (region_idx=0; region_idx<regional_models[c][size_idx].size(); region_idx++)
{
regional_models[c][size_idx][region_idx].write_regional_model(os);
}
}
}
os.close();
}
// converts all the probabilities to socres in the tables
void DiscretePeakModel::convert_probs_to_scores()
{
int c,s,r;
for (c=0; c<regional_models.size(); c++)
for (s=0; s<regional_models[c].size(); s++)
for (r=0; r<regional_models[c][s].size(); r++)
regional_models[c][s][r].convert_to_scores(q_rand);
}
void DiscretePeakModel::read_score_model(const char *name, bool silent_ind)
{
char buff[128];
string file_name = config.get_resource_dir() + "/" + string(name) + "_break_score.txt";
ifstream is(file_name.c_str());
if (! is.good() || ! is.is_open())
{
cout << "Error: couldn't open break score model for reading: " << file_name << endl;
exit(1);
}
read_model_specifics(is);
is.getline(buff,128);
if (sscanf(buff,"#MAX_CHARGE %d",&max_score_model_charge) != 1)
{
cout << "Error: bad line in score model: " << buff << endl;
exit(1);
}
this->config.set_max_charge_for_size(max_score_model_charge);
regional_models.resize(max_score_model_charge+3);
int charge = 0;
while (charge<max_score_model_charge)
{
int num_sizes,size_idx;
is.getline(buff,128);
if (sscanf(buff,"%d %d",&charge,&num_sizes) != 2)
{
cout << "Error: bad line in score model: " << buff << endl;
exit(1);
}
regional_models[charge].resize(num_sizes);
for (size_idx=0; size_idx<num_sizes; size_idx++)
{
int num_regions, region_idx;
is.getline(buff,128);
if (sscanf(buff,"%d",&num_regions) != 1)
{
cout << "Error: bad line in score model: " << buff << endl;
exit(1);
}
regional_models[charge][size_idx].resize(num_regions);
for (region_idx=0; region_idx<num_regions; region_idx++)
{
regional_models[charge][size_idx][region_idx].read_regional_model(&config,is);
int f;
for (f=0; f<regional_models[charge][size_idx][region_idx].single_breakage_tables.size(); f++)
{
regional_models[charge][size_idx][region_idx].single_breakage_tables[f].set_model_tolerance(config.get_tolerance());
regional_models[charge][size_idx][region_idx].single_breakage_tables[f].config=&config;
}
}
}
}
// copy models for other charges. Prefer charge 2 model
int min_model_idx = -1;
int max_model_idx = -1;
if (max_score_model_charge<4)
max_score_model_charge=4;
int c;
for (c=0; c<=max_score_model_charge; c++)
if (regional_models[c].size() >0)
{
min_model_idx=c;
break;
}
for (c=max_score_model_charge; c>=0; c--)
if (regional_models[c].size() >0)
{
max_model_idx=c;
break;
}
if (min_model_idx<0)
{
cout << "Error: no score models were read!" << endl;
exit(1);
}
bool has_charge_2 = (regional_models[2].size()>0);
if (regional_models[1].size()==0)
{
if (has_charge_2)
{
regional_models[1]=regional_models[2];
}
else
regional_models[1]=regional_models[min_model_idx];
}
for (c=2; c<=max_score_model_charge; c++)
{
if (regional_models[c].size() == 0)
regional_models[c]=regional_models[max_model_idx];
}
// read edge models..
edge_model.read_edge_models(&config,(char *)config.get_model_name().c_str());
is.close();
}
// calcs the score of participating fragments that have istopic peak scores
void DiscretePeakModel::add_breakage_iso_score(Spectrum *spec,
Breakage *breakage) const
{
if (! breakage)
return;
const vector<int>& strong_frags = config.get_regional_fragment_sets()
[breakage->parent_charge][breakage->parent_size_idx][breakage->region_idx].get_strong_frag_type_idxs();
int i;
for (i=0; i<strong_frags.size(); i++)
{
int pos = breakage->get_position_of_frag_idx(strong_frags[i]);
if (pos>=0)
{
float iso_level = spec->get_peak_iso_level(breakage->fragments[pos].peak_idx);
if (iso_level>0)
breakage->score -= (2 + iso_level * 3);
}
}
}
void DiscretePeakModel::print_all_table_names(ostream& os) const
{
int i,j,k;
for (i=0; i<regional_models.size(); i++)
for (j=0; j<regional_models[i].size(); j++)
for (k=0; k<regional_models[i][j].size(); k++)
regional_models[i][j][k].print_table_names(&config,os);
}
// prints joint scores of two top fragments
void DiscretePeakModel::print_joint_scores(ostream& os) const
{
print_level_legend(os);
RegionalPepNovoModel rpm = regional_models[2][0][1];
int f1_idx = rpm.frag_type_idxs[0];
int f2_idx = rpm.frag_type_idxs[1];
os << "joint score for fragments " << config.get_fragment(f1_idx).label << " and " <<
config.get_fragment(f2_idx).label << endl;
vector< vector<score_t> > joint_scores;
joint_scores.resize(num_peak_levels);
int i;
for (i=0; i<num_peak_levels; i++)
joint_scores[i].resize(num_peak_levels,NEG_INF);
for (i=0; i<num_peak_levels; i++)
{
int j;
for (j=0; j<num_peak_levels; j++)
{
table_entry e;
e[0]=i;
e[1]=j;
e[2]=0;
e[3]=0;
e[4]=0;
int idx1= rpm.single_breakage_tables[0].calc_table_idx(e);
int idx1b= rpm.independent_frag_tables[0].calc_table_idx(e);
e[0]=j;
e[1]=i;
int idx2= rpm.single_breakage_tables[1].calc_table_idx(e);
int idx2b= rpm.independent_frag_tables[1].calc_table_idx(e);
joint_scores[i][j]= rpm.single_breakage_tables[0].score_probs[idx1] +
rpm.single_breakage_tables[1].score_probs[idx2];
joint_scores[i][j]-= rpm.independent_frag_tables[0].score_probs[idx1b] +
rpm.independent_frag_tables[1].score_probs[idx2b];
}
}
os << " ";
for (i=0; i<num_peak_levels; i++)
os << setw(3) << right << i << " ";
os << endl << endl;
for (i=0; i<num_peak_levels; i++)
{
int j;
os << left << setw(3) << i;
for (j=0; j<num_peak_levels; j++)
{
os << setw(6) << right << joint_scores[i][j] << " ";
}
os << endl << endl;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -