📄 prmgraph.cpp
字号:
int i;
for (i=0; i<num_nodes; i++)
{
Node& node = nodes[i];
if (node.type == NODE_C_TERM)
continue;
const mass_t& node_mass = node.mass;
int current_e_idx = -1;
int current_c_idx = -1;
int last_node_idx=i;
int c;
for (c=0; c<num_combos; c++)
{
const int& combo_idx = double_edge_combo_idxs[c];
const AA_combo& combo = aa_edge_combos[combo_idx];
const mass_t& combo_mass = combo.total_mass;
// should this combo be check - if there is a single edge with one of the
// amino acids in the combo skip this combo
bool is_overlap_edge = false;
int k;
for (k=0; k<combo.num_aa; k++)
if (out_aa_ind[combo.amino_acids[k]][i])
break;
if (k<combo.num_aa)
{
const int& overlap_aa = combo.amino_acids[k];
if (i>0 && ! add_overlap_edges &&
overlap_aa != Pro && overlap_aa != Gly && overlap_aa != His && overlap_aa != Ser)
continue;
is_overlap_edge = true;
}
const mass_t exp_connect_mass = node_mass + combo_mass;
const mass_t min_connect_mass1 = exp_connect_mass - half_tolerance;
const mass_t max_connect_mass1 = exp_connect_mass + half_tolerance;
int n_idx;
score_t max_score = NEG_INF;
int max_idx = -1;
for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
{
const mass_t& next_node_mass = nodes[n_idx].mass;
if (next_node_mass<min_connect_mass1)
continue;
if (next_node_mass>max_connect_mass1)
break;
if (nodes[n_idx].score>max_score)
{
max_idx = n_idx;
max_score = nodes[n_idx].score;
}
}
// if couldn't connect with small tolerance, try larger margin
if (max_idx<0)
{
const mass_t min_connect_mass2 = exp_connect_mass - digest_tolerance;
const mass_t max_connect_mass2 = exp_connect_mass + digest_tolerance;
int n_idx;
for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
{
const mass_t& next_node_mass = nodes[n_idx].mass;
if (next_node_mass<min_connect_mass2)
{
last_node_idx++;
continue;
}
if (next_node_mass>max_connect_mass2)
break;
if (nodes[n_idx].score>max_score)
{
max_idx = n_idx;
max_score = nodes[n_idx].score;
}
}
}
if (max_score>NEG_INF)
{
// need to check if distance between peaks is within tolerance
mass_t min_diff = fabs(nodes[max_idx].mass-exp_connect_mass);
// try seeing if there are peaks that can bridge them
if (min_diff>tolerance)
{
const vector<BreakageFragment>& fragments1 = nodes[i].breakage.fragments;
const vector<BreakageFragment>& fragments2 = nodes[max_idx].breakage.fragments;
int f;
for (f=0; f<strong_fragment_idxs.size(); f++)
{
const int& strong_frag_idx = strong_fragment_idxs[f];
const int pos1 = nodes[i].breakage.get_position_of_frag_idx(strong_frag_idx);
if (pos1<0)
continue;
const int pos2 = nodes[max_idx].breakage.get_position_of_frag_idx(strong_frag_idx);
if (pos2<0)
continue;
mass_t mass_diff = fabs(fragments1[pos1].mass - fragments2[pos2].mass);
const int frag_charge = config->get_fragment(strong_frag_idx).charge;
if (frag_charge>1)
mass_diff *= frag_charge;
mass_t diff = fabs(mass_diff - combo_mass);
if (diff < min_diff)
min_diff = diff;
}
}
// larger tolerance is allowed for the digest and terminal nodes because they can be
// created relative to the terminal node
if (nodes[max_idx].type == NODE_DIGEST || nodes[i].type == NODE_DIGEST)
{
if (min_diff>digest_tolerance)
continue;
}
else
if (nodes[max_idx].type == NODE_C_TERM || nodes[i].type == NODE_N_TERM )
{
if (min_diff>digest_tolerance)
continue;
}
else
if (min_diff>tolerance)
continue;
// add combo idx
if (current_c_idx != max_idx)
{
current_e_idx = find_edge_idx_ij(i,max_idx);
current_c_idx = max_idx;
}
// check that the node in the middle is not too good
if (is_overlap_edge)
{
}
if (current_e_idx>=0 && multi_edges[current_e_idx].num_aa == 2)
{
MultiEdge& edge = multi_edges[current_e_idx];
add_and_score_edge_variants(combo,edge);
if (is_overlap_edge)
edge.ind_edge_overlaps = true;
}
else // create new edge
{
MultiEdge new_edge;
new_edge.n_idx = i;
new_edge.c_idx = max_idx;
new_edge.n_break = &nodes[i].breakage;
new_edge.c_break = &nodes[max_idx].breakage;
new_edge.num_aa= 2;
new_edge.ind_edge_overlaps=is_overlap_edge;
add_and_score_edge_variants(combo,new_edge);
if (new_edge.variant_ptrs.size() == 0)
continue;
current_e_idx = multi_edges.size();
nodes[i].out_edge_idxs.push_back(current_e_idx);
nodes[max_idx].in_edge_idxs.push_back(current_e_idx);
multi_edges.push_back(new_edge);
}
}
}
}
}
/********************************************************************
Fills in long edges (similar to the double function, only looks both
ath the out edges of node i and the in edges of node max_idx to see
if there is a possible overlap).
Uses a precomputed list (from config) of aa comobos and masses to quickly
determine if certain edges are present.
Flag add_overlap_edges controls if to add a double edge when the
same edge can be constructed by single edges.
*********************************************************************/
void PrmGraph::fill_longer_multi_edges(int edge_length, bool add_overlap_edges)
{
const vector<AA_combo>& aa_edge_combos = config->get_aa_edge_combos();
const vector<int>& edge_combo_idxs = config->get_combo_idxs_by_length(edge_length);
const vector<int>& strong_fragment_idxs = config->get_all_strong_fragment_type_idxs();
const mass_t tolerance = config->get_tolerance();
const mass_t pm_tolerance = (config->get_pm_tolerance() > tolerance) ? config->get_pm_tolerance() : tolerance * 0.75;
const mass_t digest_tolerance = 1.5 * tolerance;
const mass_t half_tolerance = 0.5 * tolerance;
const int num_nodes = nodes.size();
const int num_combos = edge_combo_idxs.size();
const mass_t max_combos_mass = aa_edge_combos[edge_combo_idxs[edge_combo_idxs.size()-1]].total_mass;;
const mass_t max_node_mass = nodes[num_nodes-1].mass;
int i;
for (i=0; i<num_nodes; i++)
{
if (nodes[i].type == NODE_C_TERM)
continue;
const mass_t& node_mass = nodes[i].mass;
const int length_minus_1 = edge_length-1;
int current_e_idx = -1;
int current_c_idx = -1;
int last_node_idx=i;
int c;
for (c=0; c<num_combos; c++)
{
const int& combo_idx = edge_combo_idxs[c];
const AA_combo& combo = aa_edge_combos[combo_idx];
const mass_t& combo_mass = combo.total_mass;
// should this combo be check - if there is a single edge with one of the
// amino acids in the combo skip this combo
bool is_overlap_edge = false;
int a;
for (a=0; a<edge_length; a++)
{
const int& aa = combo.amino_acids[a];
if (out_aa_ind[aa][i])
break;
}
if (a<edge_length)
{
if (! add_overlap_edges)
continue;
is_overlap_edge = true;
}
const mass_t exp_connect_mass = node_mass + combo_mass;
const mass_t min_connect_mass1 = exp_connect_mass - half_tolerance;
const mass_t max_connect_mass1 = exp_connect_mass + half_tolerance;
int n_idx;
score_t max_score = NEG_INF;
int max_idx = -1;
for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
{
const mass_t& next_node_mass = nodes[n_idx].mass;
if (next_node_mass<min_connect_mass1)
continue;
if (next_node_mass>max_connect_mass1)
break;
if (nodes[n_idx].score>max_score)
{
max_idx = n_idx;
max_score = nodes[n_idx].score;
}
}
// if couldn't connect with small tolerance, try larger margin
if (max_idx<0)
{
const mass_t min_connect_mass2 = exp_connect_mass - digest_tolerance;
const mass_t max_connect_mass2 = exp_connect_mass + digest_tolerance;
int n_idx;
for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
{
const mass_t& next_node_mass = nodes[n_idx].mass;
if (next_node_mass<min_connect_mass2)
{
last_node_idx++;
continue;
}
if (next_node_mass>max_connect_mass2)
break;
if (nodes[n_idx].score>max_score)
{
max_idx = n_idx;
max_score = nodes[n_idx].score;
}
}
}
if (max_score>NEG_INF)
{
// need to check if distance between peaks is within tolerance
mass_t min_diff = fabs(nodes[max_idx].mass-exp_connect_mass);
// try seeing if there are peaks that can bridge them
if (min_diff>tolerance)
{
const vector<BreakageFragment>& fragments1 = nodes[i].breakage.fragments;
const vector<BreakageFragment>& fragments2 = nodes[max_idx].breakage.fragments;
int f;
for (f=0; f<strong_fragment_idxs.size(); f++)
{
const int& strong_frag_idx = strong_fragment_idxs[f];
const int pos1 = nodes[i].breakage.get_position_of_frag_idx(strong_frag_idx);
if (pos1<0)
continue;
const int pos2 = nodes[max_idx].breakage.get_position_of_frag_idx(strong_frag_idx);
if (pos2<0)
continue;
mass_t mass_diff = fabs(fragments1[pos1].mass - fragments2[pos2].mass);
const int frag_charge = config->get_fragment(strong_frag_idx).charge;
if (frag_charge>1)
mass_diff *= frag_charge;
mass_t diff = fabs(mass_diff - combo_mass);
if (diff < min_diff)
min_diff = diff;
}
}
// larger tolerance is allowed for the digest and terminal nodes because they can be
// created relative to the terminal node
if (nodes[max_idx].type == NODE_DIGEST || nodes[i].type == NODE_DIGEST)
{
if (min_diff>digest_tolerance)
continue;
}
else
if (nodes[max_idx].type == NODE_C_TERM || nodes[i].type == NODE_N_TERM )
{
if (min_diff>digest_tolerance)
continue;
}
else
if (min_diff>tolerance)
continue;
// check if this edge overlaps a subpath of shorter edges with similar
// amino acids (already check the node i, so only check what comes in
// node max_idx
int a;
for (a=0; a<edge_length; a++)
{
const int& aa = combo.amino_acids[a];
if (in_aa_ind[aa][max_idx])
break;
}
if (a<edge_length)
{
if (! add_overlap_edges)
continue;
is_overlap_edge = true;
}
// add combo idx
if (current_c_idx != max_idx)
{
current_e_idx = find_edge_idx_ij(i,max_idx);
current_c_idx = max_idx;
}
if (current_e_idx>=0 && multi_edges[current_e_idx].num_aa == edge_length)
{
MultiEdge& edge = multi_edges[current_e_idx];
add_and_score_edge_variants(combo,edge);
if (is_overlap_edge)
edge.ind_edge_overlaps = true;
}
else // create new edge
{
MultiEdge new_edge;
new_edge.n_idx = i;
new_edge.c_idx = max_idx;
new_edge.n_break = &nodes[i].breakage;
new_edge.c_break = &nodes[max_idx].breakage;
new_edge.num_aa= edge_length;
new_edge.ind_edge_overlaps=is_overlap_edge;
add_and_score_edge_variants(combo,new_edge);
if (new_edge.variant_ptrs.size() == 0)
continue;
current_e_idx = multi_edges.size();
nodes[i].out_edge_idxs.push_back(current_e_idx);
nodes[max_idx].in_edge_idxs.push_back(current_e_idx);
multi_edges.push_back(new_edge);
}
}
}
}
}
/***********************************************************
assigns a value to each node's (log) rank field
also sets max_node_score.
************************************************************/
void PrmGraph::rank_nodes_according_to_score()
{
vector<score_pair> pairs;
int i;
pairs.resize(nodes.size());
for (i=0; i<nodes.size(); i++)
{
pairs[i].idx=i;
pairs[i].score = nodes[i].score;
}
sort(pairs.begin(),pairs.end());
for (i=0; i<pairs.size(); i++)
nodes[pairs[i].idx].log_rank = (float)log(2.0+i);
max_node_score = NEG_INF;
for (i=0; i<nodes.size(); i++)
if (nodes[i].score>max_node_score)
max_node_score = nodes[i].score;
}
/**********************************************************
Sets the idx_max_in_score and idx_max_out_score fields
for the nodes.
***********************************************************/
void PrmGraph::set_idxs_max_in_out_for_nodes()
{
int i;
for (i=0; i<nodes.size(); i++)
{
int e;
score_t max_in_score = NEG_INF;
nodes[i].idx_max_in_score_node=-1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -