📄 prmgraph.cpp
字号:
const vector<int>& in_idxs = nodes[i].in_edge_idxs;
for (e=0; e<in_idxs.size(); e++)
{
const int prev_node_idx = multi_edges[in_idxs[e]].n_idx;
if (nodes[prev_node_idx].score>max_in_score)
{
nodes[i].idx_max_in_score_node = prev_node_idx;
max_in_score = nodes[prev_node_idx].score;
}
}
score_t max_out_score = NEG_INF;
nodes[i].idx_max_out_score_node=-1;
const vector<int>& out_idxs = nodes[i].out_edge_idxs;
for (e=0; e<out_idxs.size(); e++)
{
const int next_node_idx = multi_edges[out_idxs[e]].c_idx;
if (nodes[next_node_idx].score>max_out_score)
{
nodes[i].idx_max_out_score_node = next_node_idx;
max_out_score = nodes[next_node_idx].score;
}
}
}
}
// this function performs all the scoring operations on edges
// (amino acid scores, missing cleavage scores etc.)
// *** Most of the scoring is now done through EdgeModel !!!!!
//
score_t PrmGraph::calc_edge_variant_score(const MultiEdge& edge, int num_aa, int *aa) const
{
const Node& n_node = nodes[edge.n_idx];
const Node& c_node = nodes[edge.c_idx];
// give digest score only if there aren't other digest nodes with intensity
if (num_aa == 1 && nodes[edge.n_idx].type == NODE_DIGEST && nodes[edge.c_idx].type == NODE_C_TERM)
{
//cout << config->get_aa2label()[aa[0]] << " ";
// check for digest edge
const vector<int>& c_term_digest_aas = config->get_c_term_digest_aas();
if (c_term_digest_aas.size()>0)
{
int i;
for (i=0; i<c_term_digest_aas.size(); i++)
if (aa[0] == c_term_digest_aas[i])
break;
if (i == c_term_digest_aas.size())
{
return digest_node_score*-0.5;
}
bool other_digest_is_good=false;
for (i=0; i<this->c_digest_node_idxs.size(); i++)
{
if (c_digest_node_idxs[i] == edge.n_idx)
continue;
if (nodes[c_digest_node_idxs[i]].breakage.fragments.size()>0)
other_digest_is_good=true;
}
if (other_digest_is_good)
{
return digest_node_score*0.5;
}
return digest_node_score;
}
}
if (num_aa == 1 && nodes[edge.c_idx].type == NODE_DIGEST && nodes[edge.n_idx].type == NODE_N_TERM)
{
const vector<int>& n_term_digest_aas = config->get_n_term_digest_aas();
if (n_term_digest_aas.size()>0)
{
int i;
for (i=0; i<n_term_digest_aas.size(); i++)
if (aa[0] == n_term_digest_aas[i])
break;
if (i == n_term_digest_aas.size())
return digest_node_score*-0.5;
bool other_digest_is_good=false;
for (i=0; i<this->n_digest_node_idxs.size(); i++)
{
if (n_digest_node_idxs[i] == edge.c_idx)
continue;
if (nodes[n_digest_node_idxs[i]].score>0)
other_digest_is_good=true;
}
if (other_digest_is_good)
return 0;
return digest_node_score;
}
}
if (num_aa >1)
{
// return ((num_aa-1)*model->get_missing_breakage_score(charge,size_idx,edge.c_break->region_idx));
// return ((num_aa-1)*-6);
}
return 0;
}
// removes all edges to and from nodes with the active flag set to 0
void PrmGraph::remove_edges_from_inactive_nodes()
{
int i;
for (i=0; i<nodes.size(); i++)
{
if (nodes[i].active)
continue;
int j;
Node& node = nodes[i];
for (j=0; j<node.in_edge_idxs.size(); j++)
{
// remove edge from other node's list
const int e_idx = node.in_edge_idxs[j];
Node& other_node = nodes[multi_edges[e_idx].n_idx];
int k;
for (k=0; k<other_node.out_edge_idxs.size(); k++)
if (other_node.out_edge_idxs[k] == e_idx)
break;
if (k== other_node.out_edge_idxs.size())
{
cout << "Error: missing out edge idx!" << endl;
exit(1);
}
other_node.out_edge_idxs[k] = other_node.out_edge_idxs[other_node.out_edge_idxs.size()-1];
other_node.out_edge_idxs.pop_back();
}
for (j=0; j<node.out_edge_idxs.size(); j++)
{
// remove edge from other node's list
const int e_idx = node.out_edge_idxs[j];
Node& other_node = nodes[multi_edges[e_idx].c_idx];
int k;
for (k=0; k<other_node.in_edge_idxs.size(); k++)
if (other_node.in_edge_idxs[k] == e_idx)
break;
if (k== other_node.in_edge_idxs.size())
{
cout << "Error: missing in edge idx!" << endl;
exit(1);
}
other_node.in_edge_idxs[k] = other_node.in_edge_idxs[other_node.in_edge_idxs.size()-1];
other_node.in_edge_idxs.pop_back();
}
node.in_edge_idxs.clear();
node.out_edge_idxs.clear();
}
}
struct idx_score {
idx_score() : edge_idx(-1), score(NEG_INF) {}
bool operator< (const idx_score& other) const
{
return score>other.score;
}
int edge_idx;
score_t score;
};
// sorts edges according to the value to which they can possibly lead
// uses the max_gains table which state for each node i and length n, what is the maximal
// score attainable by using i + n amino acids in the graph
void PrmGraph::sort_outgoing_edges_according_to_max_gains(const vector< vector< score_t > >& max_gains)
{
int i;
const int last_size_idx = max_gains[0].size()-1;
for (i=0; i<nodes.size(); i++)
{
if (nodes[i].out_edge_idxs.size()==0)
continue;
int j;
vector<idx_score> pairs;
pairs.resize(nodes[i].out_edge_idxs.size());
for (j=0; j<nodes[i].out_edge_idxs.size(); j++)
{
const int edge_idx = nodes[i].out_edge_idxs[j];
const MultiEdge& edge = multi_edges[edge_idx];
pairs[j].edge_idx= edge_idx;
pairs[j].score = max_gains[edge.c_idx][last_size_idx] +
nodes[edge.c_idx].score + edge.max_variant_score;
}
sort(pairs.begin(),pairs.end());
for (j=0; j<nodes[i].out_edge_idxs.size(); j++)
nodes[i].out_edge_idxs[j]=pairs[j].edge_idx;
}
}
/***********************************************************************
// returns the optimal ordering of nodes for the search
************************************************************************/
void PrmGraph::get_node_ordering_according_to_max_gains(
vector< vector< score_t > >& max_gains_for_length,
vector<int>& node_order) const
{
vector<idx_score> pairs;
pairs.resize(nodes.size());
int i;
const int last_size_idx = max_gains_for_length[0].size()-1;
for (i=0; i<nodes.size(); i++)
{
pairs[i].edge_idx=i;
pairs[i].score = nodes[i].score + max_gains_for_length[i][last_size_idx];
}
sort(pairs.begin(),pairs.end());
node_order.resize(nodes.size());
for (i=0; i<pairs.size(); i++)
node_order[i]=pairs[i].edge_idx;
}
// sorts edges according to the value to which they lead
void PrmGraph::sort_outgoing_edges()
{
int i;
for (i=0; i<nodes.size(); i++)
{
if (nodes[i].out_edge_idxs.size()==0)
continue;
int j;
vector<idx_score> pairs;
pairs.resize(nodes[i].out_edge_idxs.size());
for (j=0; j<nodes[i].out_edge_idxs.size(); j++)
{
pairs[j].edge_idx=nodes[i].out_edge_idxs[j];
pairs[j].score = multi_edges[nodes[i].out_edge_idxs[j]].max_variant_score +
nodes[multi_edges[nodes[i].out_edge_idxs[j]].c_idx].score;
}
sort(pairs.begin(),pairs.end());
for (j=0; j<nodes[i].out_edge_idxs.size(); j++)
nodes[i].out_edge_idxs[j]=pairs[j].edge_idx;
}
}
struct edge_idx_pair {
bool operator< (const edge_idx_pair& other) const
{
return n_idx<other.n_idx;
}
int edge_idx;
int n_idx;
};
/*******************************************************************
// creates a path object from a collection of edges that are assumed
// to correspond to a path in the graph
********************************************************************/
void PrmGraph::create_path_from_edges(vector<int>& edge_idxs, MultiPath& path) const
{
int i;
vector<edge_idx_pair> pairs;
if (edge_idxs.size()==0)
{
path.path_score = 0;
return;
}
for (i=0; i<edge_idxs.size(); i++)
{
edge_idx_pair p;
p.edge_idx = edge_idxs[i];
p.n_idx = multi_edges[edge_idxs[i]].n_idx;
pairs.push_back(p);
}
sort(pairs.begin(),pairs.end());
path.edge_idxs.resize(pairs.size());
for (i=0; i<pairs.size(); i++)
path.edge_idxs[i]=pairs[i].edge_idx;
path.n_term_mass= nodes[multi_edges[edge_idxs[0]].n_idx].mass;
path.c_term_mass= nodes[multi_edges[edge_idxs[edge_idxs.size()-1]].c_idx].mass;
for (i=1; i<path.edge_idxs.size(); i++)
{
if (multi_edges[path.edge_idxs[i]].n_idx != multi_edges[path.edge_idxs[i-1]].c_idx)
{
cout << "Error: inconsistent edges when creating path!" << endl;
exit(1);
}
}
// collect breakage info and edges
path.edge_idxs = edge_idxs;
path.breakages.clear();
path.node_idxs.clear();
for (i=0; i<edge_idxs.size(); i++)
{
path.breakages.push_back(multi_edges[edge_idxs[i]].n_break);
path.node_idxs.push_back(multi_edges[edge_idxs[i]].n_idx);
}
path.breakages.push_back(multi_edges[edge_idxs[i-1]].c_break);
path.node_idxs.push_back(multi_edges[edge_idxs[i-1]].c_idx);
}
// finds the highest scoring continuous subpath for a given peptide in the graph
SeqPath PrmGraph::get_highest_scoring_subpath(const Peptide& peptide, mass_t start_mass) const
{
SeqPath ret_path;
const vector<int>& path_aas = peptide.get_amino_acids();
mass_t pre_mass = pre_mass = start_mass;
mass_t double_tolerance = config->get_tolerance() * 3.0;
vector<bool> use_as_start_pos;
int i;
use_as_start_pos.resize(nodes.size(),true);
ret_path.path_score = 0;
// give start idx double tolerance
for (i=0; i<path_aas.size(); i++)
{
int j;
PeakRange nr = this->get_nodes_in_range(pre_mass - double_tolerance, pre_mass + double_tolerance);
for (j=0; j<nr.num_peaks; j++)
{
int node_idx = nr.low_idx+j;
if (! use_as_start_pos[node_idx])
continue;
// find max correct subpath from this node
SeqPath path;
path.n_term_mass = nodes[node_idx].mass;
path.c_term_mass = nodes[node_idx].mass;
path.path_score = 0;
path.positions.clear();
// loop until end is reached
int k=i;
int lass_good_edge_idx = -1;
while (k<path_aas.size())
{
const Node& node = nodes[node_idx];
int e;
for (e=0; e<node.out_edge_idxs.size(); e++)
{
const int& e_idx = node.out_edge_idxs[e];
const MultiEdge& edge = multi_edges[e_idx];
int var_idx = edge.get_variant_idx(1,(int *)&path_aas[k]);
if (var_idx<0 && i<path_aas.size()-1)
var_idx = edge.get_variant_idx(2,(int *)&path_aas[k]);
if (var_idx>=0)
{
path.add_edge_variant(edge,e_idx,var_idx);
k+=edge.num_aa;
lass_good_edge_idx = e_idx;
break;
}
}
if (e == node.out_edge_idxs.size())
break;
}
// add last position
if (lass_good_edge_idx>=0)
{
const MultiEdge& last_edge = multi_edges[lass_good_edge_idx];
PathPos last_pos;
last_pos.breakage = last_edge.c_break;
last_pos.edge_idx =-1;
last_pos.mass = last_edge.c_break->mass;
last_pos.node_idx = last_edge.c_idx;
last_pos.node_score = last_edge.c_break->score;
path.positions.push_back(last_pos);
path.path_score += last_pos.node_score;
path.c_term_mass = last_pos.mass;
}
// check if this path is better
if (path.path_score>ret_path.path_score)
ret_path = path;
}
pre_mass += config->get_aa2mass()[path_aas[i]];
}
return ret_path;
}
// finds the longest continuous subpath for a given peptide in the graph
SeqPath PrmGraph::get_longest_subpath(const Peptide& peptide, mass_t start_mass, bool verbose)
{
const int max_edge_length = config->get_max_edge_length();
const vector<mass_t>& aa2mass = config->get_aa2mass();
vector<mass_t> prefix_masses;
vector<int> path_aas = peptide.get_amino_acids();
vector<int> path_edges;
vector<int> path_aa_count;
int a_start=-1;
mass_t pre_mass = 0;
mass_t double_tolerance = config->get_tolerance() * 5.0;
int i;
path_edges.clear();
for (i=0; i<path_aas.size(); i++)
if (path_aas[i]==Ile)
path_aas[i]=Leu;
prefix_masses.push_back(start_mass);
for (i=0; i<path_aas.size(); i++)
prefix_masses.push_back(p
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -