📄 multipath.cpp
字号:
if (nodes[n_idx].type == NODE_N_TERM && nodes[c_idx].type == NODE_C_TERM)
{
if (! config->get_need_to_estimate_pm() )
{
const vector<mass_t>& aa2mass = config->get_aa2mass();
vector<int> aas;
variants[j].get_amino_acids(aas);
mass_t pep_mass = 0;
int a;
for (a=0; a<aas.size(); a++)
pep_mass += aa2mass[aas[a]];
pep_mass+=MASS_OHHH;
if (fabs(pep_mass-source_spectrum->get_org_pm_with_19())>config->get_pm_tolerance())
{
// cout << "Rejected: " << variants[j].seq_str << endl;
// cout << "aa_mass: " << fixed << setprecision(3) << pep_mass << " spec_mass: " << source_spectrum->get_org_pm_with_19();
// cout << " (" << pep_mass-source_spectrum->get_org_pm_with_19() << ")" << endl;
continue;
}
// add bonus score since the amino acids match the mass
// variants[j].path_score += BONUS_FOR_COMPLETE_PEPTIDE;
// cout << "Added bonus!: " << fixed << setprecision(3) << variants[j].path_score << endl;
}
}
// check that there are no PTMs that are specific to the +1, -1 positions
path_heap.add_path(variants[j]);
}
}
paths = path_heap.get_paths();
sort(paths.begin(),paths.end(),comp_SeqPath_path_score);
while (paths.size()>0)
{
int idx = paths.size()-1;
if (paths[idx].path_score<= NEG_INF || paths[idx].get_num_aa() < 1)
{
paths.pop_back();
}
else
break;
}
// int i;
// for (i=0; i<paths.size(); i++)
// cout << i << "\t" << paths[i].path_score << "\t" << paths[i].seq_str << endl;
// cout << "Considered: " << num_vars << endl;
}
bool MultiPath::check_if_correct(const Peptide& p, Config *config) const
{
const mass_t tolerance = config->get_tolerance() * 1.25;
vector<mass_t> break_masses;
int idx=0;
int i;
p.calc_expected_breakage_masses(config,break_masses);
for (i=0; i<breakages.size(); i++)
{
const mass_t& mass = breakages[i]->mass;
const mass_t max_mass = mass + tolerance;
const mass_t min_mass = mass - tolerance;
while (idx < break_masses.size() && break_masses[idx] < min_mass)
idx++;
if (break_masses[idx]>max_mass)
return false;
}
return true;
}
int MultiPath::get_num_correct_aas(const PrmGraph& prm, const Peptide& p, Config *config) const
{
const mass_t tolerance = config->get_tolerance() * 1.25;
vector<mass_t> break_masses;
int idx=0;
int num_correct=0;
int i;
p.calc_expected_breakage_masses(config,break_masses);
for (i=0; i<breakages.size(); i++)
{
const mass_t& mass = breakages[i]->mass;
const mass_t max_mass = mass + tolerance;
const mass_t min_mass = mass - tolerance;
while (idx < break_masses.size() && break_masses[idx] < min_mass)
idx++;
if (break_masses[idx]>max_mass)
continue;
if (idx<breakages.size()-1 && edge_idxs[idx]>=0)
num_correct += prm.get_multi_edge(edge_idxs[idx]).num_aa;
}
return num_correct;
}
int MultiPath::get_num_aas() const
{
return (breakages.size()-1);
}
// returns the number of b/y ions
int SeqPath::get_num_frags(const vector<int>& frag_idxs) const
{
int num_frags=0;
int i;
for (i=0; i<positions.size(); i++)
{
Breakage *bb = positions[i].breakage;
if (positions[i].breakage && positions[i].breakage->fragments.size()>0)
{
int j;
for (j=0; j<frag_idxs.size(); j++)
{
int k;
for (k=0; k<positions[i].breakage->fragments.size(); k++)
{
if (positions[i].breakage->fragments[k].frag_type_idx == frag_idxs[j])
num_frags++;
}
}
}
}
return num_frags;
}
mass_t SeqPath::calculate_peptide_mass(Config *config) const
{
const vector<mass_t>& aa2mass = config->get_aa2mass();
mass_t pep_mass=0;
int i;
for (i=0; i<positions.size()-1; i++)
{
pep_mass+=aa2mass[positions[i].aa];
}
return pep_mass;
}
int SeqPath::get_num_correct_aas(const Peptide& pep, Config *config) const
{
const vector<mass_t>& aa2mass = config->get_aa2mass();
const vector<int>& pep_aas = pep.get_amino_acids();
int num_correct=0;
int i;
vector<mass_t> pep_masses;
vector<int> path_aas;
get_amino_acids(path_aas);
pep_masses.resize(pep_aas.size(),0);
for (i=1; i<pep_aas.size(); i++)
pep_masses[i]=pep_masses[i-1]+aa2mass[pep_aas[i-1]];
mass_t path_mass = n_term_mass;
for (i=0; i<path_aas.size(); i++)
{
const int path_aa = path_aas[i];
int j;
for (j=0; j<pep_aas.size(); j++)
{
const int pep_aa = pep_aas[j];
if (fabs(pep_masses[j]-path_mass)<1.0 && pep_aas[j] == path_aas[i])
{
num_correct++;
break;
}
}
path_mass += aa2mass[path_aas[i]];
}
return num_correct;
}
void PrmGraph::parse_seq_path_to_smaller_ones(const SeqPath& org_path,
int min_length,
int max_length,
vector<SeqPath>& new_paths)
{
const vector<PathPos>& org_positions = org_path.positions;
new_paths.clear();
const int num_org_positions = org_positions.size();
const int last_org_position = num_org_positions-1;
if (num_org_positions<=min_length)
return;
const int num_iters = org_positions.size()-min_length;
new_paths.resize(num_iters);
int np_idx=0;
int i;
for (i=0; i<num_iters; i++)
{
if (org_positions[i].aa>=0 && org_positions[i].edge_idx>=0)
{
int j=0;
SeqPath& path = new_paths[np_idx];
path.positions.clear();
if (i==0)
path.n_term_aa = org_path.n_term_aa;
path.n_term_mass = org_positions[i].mass;
path.path_score = 0;
path.multi_path_rank = org_path.multi_path_rank;
const int start_pos=i;
bool score_this_path = false;
int pos=i;
while (pos<num_org_positions )
{
path.positions.push_back(org_positions[pos]);
pos++;
while (pos<num_org_positions && org_positions[pos].node_idx<0)
path.positions.push_back(org_positions[pos++]);
const int length = pos-start_pos;
if (pos<num_org_positions && length>=min_length && length<=max_length)
{
path.c_term_mass = org_positions[pos].mass;
if (pos==last_org_position)
path.c_term_aa= org_path.c_term_aa;
path.positions.push_back(org_positions[pos]);
score_this_path = true;
break;
}
}
if (path.positions.size()>max_length+1 || ! score_this_path)
{
path.positions.clear();
continue;
}
np_idx++; // stores the path
// compute combo scores
vector<PathPos>& new_positions = path.positions;
const int num_new_positions = new_positions.size();
const int last_new_position = num_new_positions-1;
path.path_score=0;
// special treatment for scores at the begining/end of the parsed tag
// (need to use the Gap score on the terminal ends)
if (new_positions[0].node_idx == org_positions[0].node_idx)
{
new_positions[0].node_score = org_positions[0].node_score;
path.path_score += new_positions[0].node_score;
path.path_score += new_positions[0].edge_variant_score;
}
else
{
// find edge variant
const int aa = new_positions[0].aa;
const MultiEdge& edge = multi_edges[new_positions[0].edge_idx];
const int var_idx = edge.get_variant_idx(aa);
if (var_idx<0)
{
cout << "Error: bad aa in var idx! 1" << endl;
exit(1);
}
new_positions[0].node_score = model->get_node_combo_score(this,new_positions[0].node_idx,
-1,0,new_positions[0].edge_idx,var_idx);
path.path_score += new_positions[0].node_score;
path.path_score += new_positions[0].edge_variant_score;
}
if (new_positions[last_new_position].node_idx == org_positions[last_org_position].node_idx)
{
new_positions[last_new_position].node_score = org_positions[last_org_position].node_score;
path.path_score += new_positions[last_new_position].node_score;
path.path_score += new_positions[last_new_position].edge_variant_score;
}
else
{
int k=last_new_position-1;
while (k>0 && new_positions[k].edge_idx<0)
k--;
if (k==0)
{
cout << "Error: bad parse of tag!" << endl;
exit(1);
}
const int aa = new_positions[k].aa;
const int edge_idx = new_positions[k].edge_idx;
const MultiEdge& edge = multi_edges[edge_idx];
const int var_idx = edge.get_variant_idx(aa);
if (var_idx<0)
{
cout << "Error: bad aa in var idx! 2" << endl;
exit(1);
}
new_positions[last_new_position].node_score = model->get_node_combo_score(this,
new_positions[last_new_position].node_idx,edge_idx,var_idx,-1,0);
path.path_score += new_positions[last_new_position].node_score;
//// no edge score for last position!
//path.path_score += new_positions[last_new_position].edge_variant_score;
new_positions[last_new_position].edge_variant_score =0;
}
for (j=1; j<last_new_position; j++)
{
path.positions[j].node_score = org_positions[i+j].node_score;
path.path_score += new_positions[j].node_score;
path.path_score += new_positions[j].edge_variant_score;
}
path.make_seq_str(config);
path.charge = charge;
path.pm_with_19 = pm_with_19;
path.prm_ptr = this;
path.sort_key = path.path_score;
}
}
while(new_paths.size()>0 && new_paths[new_paths.size()-1].positions.size()==0)
new_paths.pop_back();
}
bool SeqPath::check_if_correct(const string& str, Config *config) const
{
const vector<mass_t>& aa2mass = config->get_aa2mass();
const char *path_str = seq_str.c_str();
const char *corr_str = str.c_str();
int len_path_str = strlen(path_str);
int len_corr_str = strlen(corr_str);
if (len_path_str>len_corr_str)
return false;
int i;
for (i=0; i<=len_corr_str-len_path_str; i++)
{
int j;
bool correct_seq = true;
for (j=0; j<len_path_str; j++)
if (! (path_str[j] == corr_str[i+j] ||
(path_str[j] == 'I' && corr_str[i+j]== 'L') ||
(path_str[j] == 'L' && corr_str[i+j]== 'I') ||
(path_str[j] == 'Q' && corr_str[i+j]== 'K') ||
(path_str[j] == 'K' && corr_str[i+j]== 'Q') ) )
{
correct_seq = false;
break;
}
if (correct_seq)
{
// check prefix mass
Peptide pep;
pep.parse_from_string(config,corr_str);
const vector<int>& aas= pep.get_amino_acids();
mass_t mass=0;
int j;
if (n_term_mass == 0 && i==0)
return true;
for (j=0; j<aas.size(); j++)
{
mass+=aa2mass[aas[j]];
if (fabs(mass-this->n_term_mass)<6)
return true;
if (mass>n_term_mass)
break;
}
}
}
return false;
}
bool SeqPath::check_if_cut_correct(const vector<mass_t>& exp_cut_masses, mass_t tolerance) const
{
int c_idx=0;
int i;
for (i=0; i<positions.size()-1; i++)
{
if (positions[i].node_idx<0)
continue;
const mass_t pos_mass = positions[i].mass;
while (c_idx<exp_cut_masses.size() && exp_cut_masses[c_idx]< pos_mass - tolerance)
c_idx++;
if (c_idx == exp_cut_masses.size() || exp_cut_masses[c_idx]-tolerance > pos_mass)
return false;
}
return true;
}
void SeqPath::make_seq_str(Config *config)
{
const vector<string>& aa2label = config->get_aa2label();
int i;
seq_str = "";
if (n_term_aa>N_TERM)
seq_str += aa2label[n_term_aa] ;
if (positions.size()>0)
for (i=0; i<positions.size()-1; i++)
seq_str += aa2label[positions[i].aa];
if (c_term_aa>C_TERM)
seq_str += aa2label[c_term_aa];
}
void MultiPath::print(Config *config, ostream& os) const
{
os << "MultiPath: " << n_term_mass << " - " << c_term_mass <<
" score: " << path_score << " ";
int i;
cout << " Nodes:";
for (i=0; i<node_idxs.size(); i++)
cout << " " << node_idxs[i];
cout << " Edges:";
for (i=0; i<edge_idxs.size(); i++)
cout << " " << edge_idxs[i];
cout << endl;
}
void SeqPath::print(ostream& os) const
{
os << setprecision(5);
os << n_term_mass << " " << seq_str << " " << c_term_mass << " (s: " <<
this->path_score << ")" << endl;
}
void SeqPath::print_with_probs(ostream& os) const
{
os << setprecision(5);
os << n_term_mass << " " << seq_str << " (s: " <<
this->path_score << ")";
int i;
os << setprecision(2);
for (i=0; i<this->positions.size(); i++)
{
os << " X " ;
}
os << endl;
}
void SeqPath::print_full(Config *config, ostream &os) const
{
const vector<string>& aa2label = config->get_aa2label();
os << n_term_mass << " " << seq_str << " " << c_term_mass << " (s: " <<
this->path_score << ") NF: " << this->num_forbidden_nodes << endl;
int i;
if (positions.size()==0)
return;
for (i=0; i<positions.size()-1; i++)
{
cout << left << setw(3) << i;
cout << setw(5) << left << aa2label[positions[i].aa] << "\t" <<
positions[i].node_idx << "\t" << positions[i].edge_idx<< "\t" <<
fixed << setprecision(2) << positions[i].mass << "\t" <<
positions[i].node_score << "\t" << positions[i].edge_variant_score << endl;
}
cout << "Exact score: " << path_score << "\t" << "Approx score: " << setprecision(4) << multi_path_score << endl;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -