📄 prmgraph.cpp
字号:
#include "PrmGraph.h"
#include "AnnotatedSpectrum.h"
#include "auxfun.h"
bool comp_SeqPath_sort_key(const SeqPath& a, const SeqPath& b)
{
return (a.sort_key>b.sort_key);
}
bool comp_SeqPath_path_score (const SeqPath& a, const SeqPath& b)
{
return (a.path_score>b.path_score);
}
/********************************************************
*********************************************************/
void PrmGraph::create_graph_from_spectrum(Model *_model, Spectrum *spectrum,
mass_t _pm_with_19, int spec_charge,
bool add_all_pepitde_nodes, bool only_basic_score)
{
if (nodes.size()>0)
this->clear();
model = _model;
config = model->get_config();
source_spectrum = spectrum;
pm_with_19 = _pm_with_19;
charge = spec_charge;
has_node_combo_scores=false;
if (charge==0)
charge = source_spectrum->get_charge();
int org_size_idx = spectrum->get_size_idx();
size_idx = config->calc_size_idx(charge,pm_with_19);
spectrum->set_size_idx(size_idx);
digest_node_score = config->get_digest_score();
model->init_model_for_scoring_spectrum(source_spectrum);
// config->print_session_aas();
if (pm_with_19<10)
{
cout << "Error: supplied negative/low PM for graph!" << endl;
exit(1);
}
create_nodes();
add_digest_nodes();
score_nodes(model);
set_significant_masses();
fill_single_multi_edges();
fill_double_multi_edges();
int l;
for (l=3; l<=config->get_max_edge_length(); l++)
fill_longer_multi_edges(l);
if (! only_basic_score)
model->initial_combos_score(this);
// model->score_all_node_combos(this);
// source_spectrum->print_expected_by();
// this->print_with_combo_tables();
// print();
// exit(0);
rank_nodes_according_to_score();
set_idxs_max_in_out_for_nodes();
// restore size idx
spectrum->set_size_idx(org_size_idx);
}
/***********************************************************************
Creates a graph tailored for a peptide.
Contains only nodes and edges that correspond to that peptide.
************************************************************************/
void PrmGraph::create_graph_for_peptide_and_spectrum(Model *_model, Spectrum *spectrum,
mass_t _pm_with_19, int spec_charge, const Peptide& peptide)
{
if (nodes.size()>0)
this->clear();
model = _model;
config = model->get_config();
source_spectrum = spectrum;
pm_with_19 = _pm_with_19;
charge = spec_charge;
has_node_combo_scores=false;
if (charge==0)
charge = source_spectrum->get_charge();
int org_size_idx = spectrum->get_size_idx();
size_idx = config->calc_size_idx(charge,pm_with_19);
spectrum->set_size_idx(size_idx);
digest_node_score = config->get_digest_score();
model->init_model_for_scoring_spectrum(source_spectrum);
if (pm_with_19<10)
{
cout << "Error: supplied negative/low PM for graph!" << endl;
exit(1);
}
create_nodes_for_peptide(peptide);
const vector<int>& pep_aas = peptide.get_amino_acids();
mass_t n_digest_mass = NEG_INF;
mass_t c_digest_mass = NEG_INF;
const vector<mass_t>& aa2mass = config->get_aa2mass();
const vector<int>& n_digest_aas = config->get_n_term_digest_aas();
if (n_digest_aas.size()>0)
{
int j;
for (j=0; j<n_digest_aas.size(); j++)
if (pep_aas[0] == n_digest_aas[j])
break;
if (j<n_digest_aas.size())
n_digest_mass = aa2mass[n_digest_aas[j]];
}
const vector<int>& c_digest_aas = config->get_c_term_digest_aas();
if (c_digest_aas.size()>0)
{
int j;
for (j=0; j<c_digest_aas.size(); j++)
if (pep_aas[pep_aas.size()-1] == c_digest_aas[j])
break;
if (j<c_digest_aas.size())
c_digest_mass = peptide.get_mass() - aa2mass[c_digest_aas[j]];
}
if (n_digest_mass>0 || c_digest_mass>0)
add_digest_nodes(n_digest_mass,c_digest_mass);
score_nodes(model);
set_significant_masses();
fill_single_multi_edges();
fill_double_multi_edges();
int l;
for (l=3; l<=config->get_max_edge_length(); l++)
fill_longer_multi_edges(l);
model->score_peptide_node_combos(this,peptide);
rank_nodes_according_to_score();
set_idxs_max_in_out_for_nodes();
// restore size idx
spectrum->set_size_idx(org_size_idx);
}
/***********************************************************************
Finds the optimal value for pm_with_19
Chooses the one for which there is a maximal score for the top 5-10 nodes
************************************************************************/
mass_t PrmGraph::find_optimal_pm_with_19_for_graph(Model *_model,
Spectrum *spectrum, mass_t base_pm_with_19,
int spec_charge)
{
const mass_t margin = 1.5;
const mass_t increment = 0.1;
model = _model;
config = model->get_config();
source_spectrum = spectrum;
charge = spec_charge;
int org_size_idx = spectrum->get_size_idx();
size_idx = config->calc_size_idx(charge,base_pm_with_19);
spectrum->set_size_idx(size_idx);
digest_node_score = config->get_digest_score();
model->init_model_for_scoring_spectrum(source_spectrum);
const mass_t node_tol = config->get_tolerance()*1.25;
mass_t best_mass_with_19=-1;
score_t best_score=NEG_INF;
int num_top_nodes = 5;
num_top_nodes+= (int)((base_pm_with_19 - 800.0)/400.0);
if (num_top_nodes>8)
num_top_nodes=8;
vector<score_t> mass_scores;
vector<mass_t > masses_with_19;
mass_scores.clear();
masses_with_19.clear();
for (pm_with_19 = base_pm_with_19 - margin;
pm_with_19<= base_pm_with_19+ margin;
pm_with_19 += increment)
{
create_nodes();
add_digest_nodes();
score_nodes(model);
const int num_nodes = nodes.size();
static bool *forbidden_node_map = NULL;
static int forbidden_map_size = 0;
if (forbidden_map_size ==0 || forbidden_map_size<num_nodes)
{
forbidden_map_size = 400;
if (forbidden_map_size<num_nodes)
forbidden_map_size = num_nodes * 2;
forbidden_node_map = new bool [ forbidden_map_size];
if (! forbidden_node_map)
{
cout << "Error: couldn't allocate mem for bool node map!" << endl;
exit(1);
}
}
memset(forbidden_node_map,0,num_nodes*sizeof(bool));
score_t node_set_score =0;
int i;
for (i=0; i<num_top_nodes; i++)
{
int top_node_idx = -1;
score_t top_node_score=NEG_INF;
const int max_node_idx= num_nodes - 3;
int j;
for (j=1; j<max_node_idx; j++)
if (! forbidden_node_map[j] && nodes[j].score>top_node_score)
{
top_node_score = nodes[j].score;
top_node_idx = j;
}
// mark the node and all masses in +- 25 Da range (including the mirror in a
// +- 10 Da range
if (top_node_idx>=0)
{
const mass_t min_mass = nodes[top_node_idx].mass - 25.0;
const mass_t max_mass = nodes[top_node_idx].mass + 25.0;
forbidden_node_map[top_node_idx]=true;
int k=top_node_idx;
while (k>=0 && nodes[k].mass>min_mass)
forbidden_node_map[k--]=true;
k=top_node_idx;
while (k<num_nodes && nodes[k].mass<max_mass)
forbidden_node_map[k++]=true;
const mass_t mirror_node_mass = pm_with_19 - MASS_OHHH - nodes[top_node_idx].mass;
const mass_t min_mirror_mass = mirror_node_mass - 10.0;
const mass_t max_mirror_mass = mirror_node_mass + 10.0;
k=0;
while (k<num_nodes && nodes[k].mass<min_mirror_mass)
k++;
while (k<num_nodes && nodes[k].mass<max_mirror_mass)
forbidden_node_map[k++]=true;
// add to score
node_set_score += nodes[top_node_idx].score;
// check if this looks like part of a misalligned pair (i.e. there is a
// very close node that also has a high score and a different source
// if so give penaly (count only half of the score)
if (top_node_idx>0 &&
nodes[top_node_idx-1].mass + node_tol > nodes[top_node_idx].mass &&
nodes[top_node_idx-1].source_frag_type_idx != nodes[top_node_idx].source_frag_type_idx)
{
node_set_score -= nodes[top_node_idx].score * 0.5;
}
if (top_node_idx<num_nodes-1 &&
nodes[top_node_idx+1].mass - node_tol < nodes[top_node_idx].mass &&
nodes[top_node_idx+1].source_frag_type_idx != nodes[top_node_idx].source_frag_type_idx)
{
node_set_score -= nodes[top_node_idx].score * 0.5;
}
// cout << " " << top_node_idx << " " << nodes[top_node_idx].score << endl;
}
}
mass_scores.push_back(node_set_score);
masses_with_19.push_back(pm_with_19);
// cout << setprecision(3) << pm_with_19 << "\t" << node_set_score << endl;
if (node_set_score>best_score)
{
best_score = node_set_score;
best_mass_with_19 = pm_with_19;
}
}
// restore size idx
spectrum->set_size_idx(org_size_idx);
int i;
vector<int> good_mass_idxs;
good_mass_idxs.clear();
for (i=0; i<mass_scores.size(); i++)
{
if (mass_scores[i]==best_score)
good_mass_idxs.push_back(i);
}
if (good_mass_idxs.size()==0) // how would this happen?
return best_mass_with_19;
int idx = good_mass_idxs[good_mass_idxs.size()/2];
// cout << " >>> " << masses_with_19[idx] << endl;
return masses_with_19[idx];
}
/*********************************************************************
Initializes the index array.
For each rounded off Dalton m, it gives the index of the closest peak i
with mass m_i>= m.
**********************************************************************/
void PrmGraph::init_index_array()
{
int i,c,size=(int)pm_with_19+2;
const int max_node_idx = nodes.size()-1;
index_array.clear();
index_array.resize(size,-1);
i=0;
int m=(int)nodes[0].mass;
while (i<m)
index_array[i++]=0;
c=0;
while (c< max_node_idx)
{
int curr_m=(int)nodes[c].mass;
int next_m = curr_m;
int next_c = c;
while (next_m == curr_m && next_c<max_node_idx)
next_m=(int)nodes[++next_c].mass;
while (i<next_m)
index_array[i++]=c;
c=next_c;
}
while (i<size)
index_array[i++]=max_node_idx;
}
/***********************************************************
Merges nodes that are close to each other
Gives preference to the prefix fragment, but merge genereally
goes according to the intensities of the source fragments
************************************************************/
void PrmGraph::merge_close_nodes()
{
int i;
const vector<FragmentType>& all_fragments = config->get_all_fragments();
mass_t delta = config->get_tolerance();
if (delta>0.2)
delta *= 0.8;
for (i=0; i<nodes.size()-1; i++)
{
if (nodes[i+1].mass - nodes[i].mass < delta &&
nodes[i+1].source_frag_type_idx != nodes[i].source_frag_type_idx)
{
const int frag1_pos = nodes[i].breakage.get_position_of_frag_idx(nodes[i].source_frag_type_idx);
const int frag2_pos = nodes[i+1].breakage.get_position_of_frag_idx(nodes[i+1].source_frag_type_idx);
if (frag1_pos <0)
{
nodes[i].mass=999999;
continue;
}
if (frag2_pos<0)
{
nodes[i+1].mass=9999999;
continue;
}
if (frag1_pos <0 || frag2_pos<0)
{
Node& node1 = nodes[i];
Node& node2 = nodes[i+1];
cout << i << "\t";
node1.breakage.print_fragments(config);
cout << endl;
cout << i+1 << "\t";
node2.breakage.print_fragments(config);
cout << endl;
cout << "Error: source fragments not found in breakage!: " <<
config->get_fragment(node1.source_frag_type_idx).label << " " <<
config->get_fragment(node2.source_frag_type_idx).label << endl;
print();
exit(1);
}
mass_t new_node_mass;
intensity_t inten1 = nodes[i].breakage.fragments[frag1_pos].intensity;
intensity_t inten2 = nodes[i+1].breakage.fragments[frag2_pos].intensity;
if (all_fragments[nodes[i].source_frag_type_idx].orientation == PREFIX)
{
inten1 *= 2;
}
else if (all_fragments[nodes[i+1].source_frag_type_idx].orientation == PREFIX)
{
inten2 *= 2;
}
mass_t mass_times_inten1 = nodes[i].mass * inten1;
mass_t mass_times_inten2 = nodes[i+1].mass * inten2;
new_node_mass = (mass_times_inten1 + mass_times_inten2)/ (inten1 + inten2);
// create new node, move it to the i+1 position
nodes[i].mass = 99999999;
nodes[i+1].mass = new_node_mass;
// transfer fragments from node i to i+1 if they are not there
int f;
for (f=0; f<nodes[i].breakage.fragments.size(); f++)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -