📄 advancedscoremodel.cpp
字号:
#include "AdvancedScoreModel.h"
#include "DeNovoRankScore.h"
void AdvancedScoreModel::read_score_model(const char *name, bool silent_ind)
{
rank_models[0]=NULL;
rank_models[1]=NULL;
rank_models[2]=NULL;
int i;
for (i=0; i<10; i++)
rank_tag_models[i]=NULL;
// resize regional breakage score models according to regional fragment sets
const vector< vector< vector< RegionalFragments > > >& all_rfs = config.get_regional_fragment_sets();
int c;
regional_breakage_score_models.resize(all_rfs.size());
for (c=0; c<all_rfs.size(); c++)
{
regional_breakage_score_models[c].resize(all_rfs[c].size());
int s;
for (s=0; s<all_rfs[c].size(); s++)
{
regional_breakage_score_models[c][s].resize(all_rfs[c][s].size());
int r;
for (r=0; r<regional_breakage_score_models[c][s].size(); r++)
if (! regional_breakage_score_models[c][s][r].was_initialized)
regional_breakage_score_models[c][s][r].init(&config,c,s,r);
}
}
int num_models_read=0;
for (c=0; c<regional_breakage_score_models.size(); c++)
{
int s;
for (s=0; s<regional_breakage_score_models[c].size(); s++)
{
int r;
for (r=0; r<regional_breakage_score_models[c][s].size(); r++)
{
if (regional_breakage_score_models[c][s][r].read_regional_score_model(name,silent_ind))
{
if (! silent_ind)
cout << "Read breakage model: " << c << " " << s << " " << r << endl;
num_models_read++;
}
}
}
}
if (! silent_ind)
cout << "Read " << num_models_read << " regional breakage models.." << endl;
// read rank models if they exist (same name as the score model, with suffix CDNV, PDNV and DB
//read_rank_models(name,silent_ind);
}
void AdvancedScoreModel::write_score_model(const char *name) const
{
int num_models_read=0;
int c;
for (c=0; c<regional_breakage_score_models.size(); c++)
{
int s;
for (s=0; s<regional_breakage_score_models[c].size(); s++)
{
int r;
for (r=0; r<regional_breakage_score_models[c][s].size(); r++)
{
if (regional_breakage_score_models[c][s][r].was_initialized)
regional_breakage_score_models[c][s][r].write_regional_score_model(name);
}
}
}
}
void AdvancedScoreModel::read_rank_models(const char *name, bool silent_ind)
{
const string suffixes[]={"DB","DNVPART","DNVCOMP"};
const int num_models = sizeof(rank_models)/sizeof(void *);
int i;
for (i=0; i<num_models; i++)
{
const string suffix = suffixes[i];
DeNovoRankScorer *rank_model = (DeNovoRankScorer *)rank_models[i];
string rank_name = string(name) + "_" + suffix;
string path = config.get_resource_dir() + "/" + rank_name + "/" + suffix + "_rank_model.txt";
ifstream fs(path.c_str());
if (! fs.is_open() || ! fs.good())
{
if (! silent_ind)
cout << "No " << path << endl;
continue;
}
fs.close();
rank_model = new DeNovoRankScorer;
rank_model->set_type(i);
rank_model->set_model(this);
rank_model->read_denovo_rank_scorer_model(path.c_str(),suffix,silent_ind);
rank_models[i] = (void *)rank_model;
if (! silent_ind)
cout << "Read " << path << endl;
}
// read tag models
for (i=3; i<10; i++)
{
DeNovoRankScorer *rank_model = (DeNovoRankScorer *)rank_tag_models[i];
ostringstream oss;
oss << "TAG" << i;
string suffix = oss.str();
string rank_name = string(name) + "_" + suffix;
string path = config.get_resource_dir() + "/" + rank_name + "/" + suffix + "_rank_model.txt";
ifstream fs(path.c_str());
if (! fs.is_open() || ! fs.good())
{
if (! silent_ind)
cout << "No " << path << endl;
continue;
}
fs.close();
rank_model = new DeNovoRankScorer;
rank_model->set_type(3);
rank_model->set_model(this);
rank_model->read_denovo_rank_scorer_model(path.c_str(),suffix,silent_ind);
rank_tag_models[i] = (void *)rank_model;
if (! silent_ind)
cout << "Read " << path << endl;
}
// cout << endl;
}
// simple Dancik-style score
void AdvancedScoreModel::score_breakage(Spectrum *spec, Breakage *breakage, bool verbose) const
{
const RegionalScoreModel& breakage_model = regional_breakage_score_models[breakage->parent_charge]
[breakage->parent_size_idx][breakage->region_idx];
if (0 || breakage_model.has_all_breakage_models)
{
}
else
{
score_t breakage_score=0;
int i;
for (i=0; i<breakage_model.frag_type_idxs.size(); i++)
{
const int frag_idx = breakage_model.frag_type_idxs[i];
if (breakage->is_frag_type_visible(frag_idx))
{
if (breakage->get_position_of_frag_idx(frag_idx)>=0)
{
breakage_score += breakage_model.frag_inten_scores[frag_idx];
}
else
breakage_score += breakage_model.frag_no_inten_scores[frag_idx];
}
}
breakage->score = breakage_score;
}
}
void AdvancedScoreModel::score_graph_edges(PrmGraph& prm) const
{
edge_model.score_graph_edges(prm);
}
int AdvancedScoreModel::get_max_score_model_charge() const
{
return regional_breakage_score_models.size();
}
void AdvancedScoreModel::train_score_model(const char *name, const FileManager& fm,
int charge, int size_idx, int region_idx)
{
// resize regional breakage score models according to regional fragment sets
const vector< vector< vector< RegionalFragments > > >& all_rfs = config.get_regional_fragment_sets();
int c;
regional_breakage_score_models.resize(all_rfs.size());
for (c=0; c<all_rfs.size(); c++)
{
regional_breakage_score_models[c].resize(all_rfs[c].size());
int s;
for (s=0; s<all_rfs[c].size(); s++)
{
regional_breakage_score_models[c][s].resize(all_rfs[c][s].size());
int r;
for (r=0; r<regional_breakage_score_models[c][s].size(); r++)
if (! regional_breakage_score_models[c][s][r].was_initialized)
regional_breakage_score_models[c][s][r].init(&config,c,s,r);
}
}
// train models
for (c=0; c<regional_breakage_score_models.size(); c++)
{
if (regional_breakage_score_models.size() == 0 || (charge>0 && charge != c))
continue;
int s;
for (s=0; s<regional_breakage_score_models[c].size(); s++)
{
if (size_idx>=0 && s != size_idx)
continue;
int r;
for (r=0; r<regional_breakage_score_models[c][s].size(); r++)
{
if (region_idx>=0 && r != region_idx)
continue;
regional_breakage_score_models[c][s][r].train_regional_score_model(this,name,fm);
}
}
}
}
/***********************************************************************
Creates a score table for each node that has an enry for each possible
entrance and exit combination of amino aicds.
************************************************************************/
void AdvancedScoreModel::score_all_node_combos(PrmGraph *prm) const
{
const vector<int>& org_aas = config.get_org_aa();
const vector<MultiEdge>& multi_edges = prm->get_multi_edges();
const int num_nodes = prm->get_num_nodes();
const mass_t mid_mass = prm->get_pm_with_19() * 0.5;
int i;
for (i=0; i<num_nodes; i++)
{
Node& node = prm->get_non_const_node(i);
const RegionalScoreModel& score_model =
regional_breakage_score_models[prm->get_charge()][prm->get_size_idx()][node.breakage.region_idx];
vector<BreakageInfo> infos;
BreakageInfo double_gap_info;
prm->fill_breakage_info(this,&double_gap_info,i,NEG_INF,NEG_INF,NEG_INF,NEG_INF);
infos.push_back(double_gap_info);
int j;
for (j=0; j<node.in_edge_idxs.size(); j++)
{
const int in_edge_idx = node.in_edge_idxs[j];
const MultiEdge& in_edge = multi_edges[in_edge_idx];
const int num_in_variants = in_edge.get_num_variants();
int k;
for (k=0; k<num_in_variants; k++)
{
BreakageInfo c_gap_info;
prm->fill_breakage_info(this,&c_gap_info,i,in_edge_idx,k,NEG_INF,NEG_INF);
infos.push_back(c_gap_info);
}
}
for (j=0; j<node.out_edge_idxs.size(); j++)
{
const int out_edge_idx = node.out_edge_idxs[j];
const MultiEdge& out_edge = multi_edges[out_edge_idx];
const int num_out_variants = out_edge.get_num_variants();
int k;
for (k=0; k<num_out_variants; k++)
{
BreakageInfo n_gap_info;
prm->fill_breakage_info(this,&n_gap_info,i,NEG_INF,NEG_INF,out_edge_idx,k);
infos.push_back(n_gap_info);
}
}
for (j=0; j<node.in_edge_idxs.size(); j++)
{
const int in_edge_idx = node.in_edge_idxs[j];
const MultiEdge& in_edge = multi_edges[in_edge_idx];
const int num_in_variants = in_edge.get_num_variants();
int k;
for (k=0; k<num_in_variants; k++)
{
BreakageInfo c_gap_info;
prm->fill_breakage_info(this,&c_gap_info,i,in_edge_idx,k,NEG_INF,NEG_INF);
infos.push_back(c_gap_info);
int a;
for (a=0; a<node.out_edge_idxs.size(); a++)
{
const int out_edge_idx = node.out_edge_idxs[a];
const MultiEdge& out_edge = multi_edges[out_edge_idx];
const int num_out_variants = out_edge.get_num_variants();
int b;
for (b=0; b<num_out_variants; b++)
{
BreakageInfo info;
prm->fill_breakage_info(this,&info,i,in_edge_idx,k,out_edge_idx,b);
infos.push_back(info);
}
}
}
}
node.score_combos.clear();
for (j=0; j<infos.size(); j++)
infos[j].score = score_model.score_a_single_breakage_combo(prm, node, &node.breakage,infos[j]);
for (j=0; j<infos.size(); j++)
node.score_combos[ScoreComboLoc(infos[j])]=infos[j].score;
score_t max_score = NEG_INF;
if (i>0 && i<num_nodes-1)
{
if (node.mass>mid_mass)
{
int j;
for (j=0; j<infos.size(); j++)
if (infos[j].n_aa >= Ala && infos[j].c_aa <= Gap && infos[j].score>max_score)
max_score=infos[j].score;
}
else
{
int j;
for (j=0; j<infos.size(); j++)
if (infos[j].n_aa <= Gap && infos[j].c_aa >= Ala && infos[j].score>max_score)
max_score=infos[j].score;
}
}
else
max_score = infos[0].score;
node.score = max_score;
node.breakage.score = node.score;
}
prm->set_has_node_combo_scores(true);
}
void AdvancedScoreModel::score_peptide_node_combos(PrmGraph *prm, const Peptide& peptide ) const
{
const vector<int>& org_aas = config.get_org_aa();
const vector<mass_t>& aa2mass = config.get_aa2mass();
const vector<MultiEdge>& multi_edges = prm->get_multi_edges();
const int num_nodes = prm->get_num_nodes();
const vector<int>& pep_aas = peptide.get_amino_acids();
const int num_pep_aas = pep_aas.size();
mass_t p_mass=0;
int aa_idx=0;
int i;
for (i=0; i<num_nodes; i++)
{
Node& node = prm->get_non_const_node(i);
const RegionalScoreModel& score_model =
regional_breakage_score_models[prm->get_charge()][prm->get_size_idx()][node.breakage.region_idx];
int in_edge_idx=NEG_INF, in_edge_variant=NEG_INF;
int out_edge_idx=NEG_INF, out_edge_variant=NEG_INF;
// cout << "N: " << node.mass << endl;
while (aa_idx<pep_aas.size() && fabs(p_mass-node.mass)>0.1)
{
p_mass += aa2mass[pep_aas[aa_idx]];
aa_idx++;
// cout << aa_idx << "\t" << p_mass << endl;
}
if (aa_idx == pep_aas.size() && i != num_nodes-1)
{
int j;
for (j=0; j<num_nodes; j++)
cout << j << "\t" << prm->get_node(j).mass << endl;
cout << endl << "PEP:" << endl;
vector<mass_t> exp_masses;
peptide.calc_expected_breakage_masses((Config *)&config,exp_masses);
for (j=0; j<exp_masses.size(); j++)
cout << j << "\t" << exp_masses[j] << endl;
cout << "Error: mismatch between nodes and peptide!" << endl;
exit(1);
}
if (node.in_edge_idxs.size()>0)
{
int j;
for (j=0; j<node.in_edge_idxs.size(); j++)
{
const int edge_idx = node.in_edge_idxs[j];
const MultiEdge& in_edge = multi_edges[edge_idx];
const int num_aa = in_edge.num_aa;
if (num_aa>aa_idx)
continue;
const int var_idx = in_edge.get_variant_idx(num_aa,&pep_aas[aa_idx-num_aa]);
if (var_idx<0)
continue;
in_edge_idx = edge_idx;
in_edge_variant = var_idx;
break;
}
}
if (node.out_edge_idxs.size()>0)
{
int j;
for (j=0; j<node.out_edge_idxs.size(); j++)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -