📄 prmgraph.cpp
字号:
break;
if (j==strong_frag_idxs.size())
continue;
nodes.push_back(node);
}
Node node;
node.mass = pm_with_19 - MASS_OHHH;
node.type = NODE_C_TERM;
node.breakage.mass = node.mass;
node.breakage.parent_charge=charge;
node.breakage.parent_size_idx = size_idx;
node.breakage.region_idx = config->calc_region_idx(node.mass,pm_with_19,charge,
min_peak_mass,max_peak_mass);
nodes.push_back(node);
for (i=0; i<nodes.size(); i++)
{
nodes[i].breakage.parent_charge = source_spectrum->get_charge();
nodes[i].breakage.parent_size_idx = source_spectrum->get_size_idx();
}
}
/**********************************************************
Adds the digest nodes for the digest amino acids
***********************************************************/
void PrmGraph::add_digest_nodes(mass_t n_digest_mass, mass_t c_digest_mass)
{
const vector<mass_t>& aa2mass = config->get_aa2mass();
const vector<int>& n_term_digest_aas = config->get_n_term_digest_aas();
const vector<int>& c_term_digest_aas = config->get_c_term_digest_aas();
const mass_t digest_tolerance = 1.5 * config->get_tolerance();
const mass_t min_peak_mass = source_spectrum->get_min_peak_mass();
const mass_t max_peak_mass = source_spectrum->get_max_peak_mass();
bool added_nodes = false;
if (c_term_digest_aas.size()>0)
{
int c_term_node=nodes.size();
while (--c_term_node>=0)
if (nodes[c_term_node].type == NODE_C_TERM)
break;
if (c_term_node<=0)
{
cout << "Error: couldn't find regular C-terminal!"<< endl;
exit(1);
}
int i;
for (i=0; i<c_term_digest_aas.size(); i++)
{
const int aa = c_term_digest_aas[i];
mass_t exp_node_mass = nodes[c_term_node].mass - aa2mass[aa];
int n=c_term_node-1;
while (n>=0 && nodes[n].mass>exp_node_mass)
n--;
const int min_dis_node = fabs(nodes[n].mass - exp_node_mass) <
fabs(nodes[n+1].mass - exp_node_mass) ? n : n+1;
if (c_digest_mass>0 && fabs(c_digest_mass - exp_node_mass)>1.5)
continue;
if (fabs(nodes[min_dis_node].mass - exp_node_mass)<digest_tolerance)
{
nodes[min_dis_node].type = NODE_DIGEST;
}
else // create the new node
{
Node node;
node.mass = exp_node_mass;
node.type = NODE_DIGEST;
node.breakage.mass = node.mass;
node.breakage.parent_charge=charge;
node.breakage.parent_size_idx = size_idx;
node.breakage.region_idx = config->calc_region_idx(node.mass,pm_with_19,charge,
min_peak_mass,max_peak_mass);
nodes.push_back(node);
added_nodes=true;
}
}
}
if (n_term_digest_aas.size()>0)
{
int n_term_node=0;
int i;
for (i=0; i<n_term_digest_aas.size(); i++)
{
const int aa = n_term_digest_aas[i];
mass_t exp_node_mass =aa2mass[aa];
int n=1;
while (n<nodes.size() && nodes[n].mass<exp_node_mass)
n++;
const int min_dis_node = fabs(nodes[n].mass - exp_node_mass) <
fabs(nodes[n-1].mass - exp_node_mass) ? n : n-1;
if (n_digest_mass>0 && fabs(n_digest_mass - nodes[min_dis_node].mass)>1.5)
continue;
if (fabs(nodes[min_dis_node].mass - exp_node_mass)<digest_tolerance)
{
nodes[min_dis_node].type = NODE_DIGEST;
}
else // create the new node
{
Node node;
node.mass = exp_node_mass;
node.type = NODE_DIGEST;
node.breakage.mass = node.mass;
node.breakage.parent_charge=charge;
node.breakage.parent_size_idx = size_idx;
node.breakage.region_idx = config->calc_region_idx(node.mass,pm_with_19, charge, min_peak_mass,max_peak_mass);
nodes.push_back(node);
added_nodes=true;
}
}
}
if (added_nodes)
{
sort(nodes.begin(),nodes.end());
init_index_array(); // redo the index because digest nodes were added
}
n_digest_node_idxs.clear();
c_digest_node_idxs.clear();
int i;
for (i=0; i<nodes.size(); i++)
{
if (nodes[i].type != NODE_DIGEST)
continue;
if (nodes[i].mass < 200.0)
{
n_digest_node_idxs.push_back(i);
}
else
c_digest_node_idxs.push_back(i);
}
}
/**********************************************************
Does initial scoring of nodes (uses only a node's breakage)
***********************************************************/
void PrmGraph::score_nodes(Model *model)
{
int i;
const int max_node = nodes.size();
for (i=0; i<max_node; i++)
{
bool verbose = false;
if (nodes[i].type == NODE_REG || nodes[i].type == NODE_DIGEST)
{
model->score_breakage(source_spectrum,&nodes[i].breakage,verbose);
nodes[i].score = nodes[i].breakage.score;
}
else if (nodes[i].type == NODE_N_TERM || nodes[i].type == NODE_C_TERM)
{
nodes[i].score = config->get_terminal_score();
nodes[i].breakage.score = config->get_terminal_score();
}
}
}
/**********************************************************
Adds scores from incoming and outgoing edges to nodes
(useful for improving scores with PrmGraphs
***********************************************************/
void PrmGraph::add_edge_scores_to_nodes()
{
int i;
const int max_node = nodes.size();
for (i=0; i<max_node; i++)
{
Node& node = nodes[i];
if (node.type != NODE_REG)
continue;
score_t max_in_score=-2.0;
score_t max_out_score=-2.0;
int j;
for (j=0; j<node.out_edge_idxs.size(); j++)
{
const int edge_idx = node.out_edge_idxs[j];
if (multi_edges[edge_idx].max_variant_score>max_out_score)
max_out_score=multi_edges[edge_idx].max_variant_score;
}
for (j=0; j<node.in_edge_idxs.size(); j++)
{
const int edge_idx = node.in_edge_idxs[j];
if (multi_edges[edge_idx].max_variant_score>max_in_score)
max_in_score=multi_edges[edge_idx].max_variant_score;
}
score_t sum= max_in_score + max_out_score;
if (sum>3.0)
node.score += sum * 0.3 + 2.0;
}
}
/********************************************************************
Fills multi edges.
Connects between nodes with peaks (non-terminal and non digest).
*********************************************************************/
void PrmGraph::fill_single_multi_edges()
{
const vector<AA_combo>& aa_edge_combos = config->get_aa_edge_combos();
const vector<int>& single_edge_combo_idxs = config->get_combo_idxs_by_length(1);
const vector<int>& strong_fragment_idxs = config->get_all_strong_fragment_type_idxs();
const vector<int>& session_aas = config->get_session_aas();
const mass_t tolerance = config->get_tolerance();
const mass_t pm_tolerance = (config->get_pm_tolerance() > tolerance) ? config->get_pm_tolerance() : tolerance * 0.75;
const mass_t digest_tolerance = 1.5 * tolerance;
const mass_t half_tolerance = 0.5 * tolerance;
const int num_nodes = nodes.size();
const int num_combos = single_edge_combo_idxs.size();
const mass_t max_combos_mass = aa_edge_combos[single_edge_combo_idxs[single_edge_combo_idxs.size()-1]].total_mass;;
const mass_t max_node_mass = nodes[num_nodes-1].mass;
const int num_session_aas = config->get_session_aas().size();
const int max_aa = config->get_session_aas()[num_session_aas-1];
// check we need to allocate the single edge indicator arrays
in_aa_ind.resize(max_aa+1);
out_aa_ind.resize(max_aa+1);
int i;
for (i=0; i<session_aas.size(); i++)
{
const int aa = session_aas[i];
in_aa_ind[aa].resize(num_nodes,false);
out_aa_ind[aa].resize(num_nodes,false);
}
// fill single aa edges
for (i=0; i<num_nodes; i++)
{
if (nodes[i].type == NODE_C_TERM)
continue;
const mass_t& node_mass = nodes[i].mass;
int last_node_idx=i;
int current_e_idx = -1;
int current_c_idx = -1;
int c;
for (c=0; c<num_combos; c++)
{
const int& combo_idx = single_edge_combo_idxs[c];
const AA_combo& combo = aa_edge_combos[combo_idx];
const mass_t& combo_mass = combo.total_mass;
const mass_t exp_connect_mass = node_mass + combo_mass;
const mass_t min_connect_mass1 = exp_connect_mass - half_tolerance;
const mass_t max_connect_mass1 = exp_connect_mass + half_tolerance;
int n_idx;
score_t max_score = NEG_INF;
int max_idx = -1;
for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
{
const mass_t& next_node_mass = nodes[n_idx].mass;
if (next_node_mass<min_connect_mass1)
continue;
if (next_node_mass>max_connect_mass1)
break;
if (nodes[n_idx].score>max_score)
{
max_idx = n_idx;
max_score = nodes[n_idx].score;
}
}
// if couldn't connect with small tolerance, try larger margin
if (max_idx<0)
{
const mass_t min_connect_mass2 = exp_connect_mass - digest_tolerance;
const mass_t max_connect_mass2 = exp_connect_mass + digest_tolerance;
int n_idx;
for (n_idx = last_node_idx+1; n_idx<num_nodes; n_idx++)
{
const mass_t& next_node_mass = nodes[n_idx].mass;
if (next_node_mass<min_connect_mass2)
{
last_node_idx++;
continue;
}
if (next_node_mass>max_connect_mass2)
break;
if (nodes[n_idx].score>max_score)
{
max_idx = n_idx;
max_score = nodes[n_idx].score;
}
}
}
if (max_score>NEG_INF)
{
// need to check if distance between peaks is within tolerance
mass_t min_diff = fabs(nodes[max_idx].mass-exp_connect_mass);
// try seeing if there are peaks that can bridge them
if (min_diff>tolerance)
{
const vector<BreakageFragment>& fragments1 = nodes[i].breakage.fragments;
const vector<BreakageFragment>& fragments2 = nodes[max_idx].breakage.fragments;
int f;
for (f=0; f<strong_fragment_idxs.size(); f++)
{
const int& strong_frag_idx = strong_fragment_idxs[f];
const int pos1 = nodes[i].breakage.get_position_of_frag_idx(strong_frag_idx);
if (pos1<0)
continue;
const int pos2 = nodes[max_idx].breakage.get_position_of_frag_idx(strong_frag_idx);
if (pos2<0)
continue;
mass_t mass_diff = fabs(fragments1[pos1].mass - fragments2[pos2].mass);
const int frag_charge = config->get_fragment(strong_frag_idx).charge;
if (frag_charge>1)
mass_diff *= frag_charge;
mass_t diff = fabs(mass_diff - combo_mass);
if (diff < min_diff)
min_diff = diff;
}
}
// larger tolerance is allowed for the digest and terminal nodes because they can be
// created relative to the terminal node
if (nodes[max_idx].type == NODE_DIGEST || nodes[i].type == NODE_DIGEST)
{
if (min_diff>digest_tolerance)
continue;
}
else
if (nodes[max_idx].type == NODE_C_TERM || nodes[i].type == NODE_N_TERM )
{
if (min_diff>digest_tolerance)
continue;
}
else
if (min_diff>tolerance)
continue;
// add combo idx
if (current_c_idx != max_idx)
{
current_e_idx = find_edge_idx_ij(i,max_idx);
current_c_idx = max_idx;
}
if (current_e_idx>=0)
{
MultiEdge& edge = multi_edges[current_e_idx];
// add the variant ptr and scores for this combo
add_and_score_edge_variants(combo,edge);
}
else // create new edge
{
MultiEdge new_edge;
new_edge.n_idx = i;
new_edge.c_idx = max_idx;
new_edge.n_break = &nodes[i].breakage;
new_edge.c_break = &nodes[max_idx].breakage;
new_edge.num_aa = 1;
add_and_score_edge_variants(combo,new_edge);
if (new_edge.variant_ptrs.size() == 0)
continue;
current_e_idx = multi_edges.size();
nodes[i].out_edge_idxs.push_back(current_e_idx);
nodes[max_idx].in_edge_idxs.push_back(current_e_idx);
multi_edges.push_back(new_edge);
}
// set amino acid indicators
int edge_aa = combo.amino_acids[0];
out_aa_ind[edge_aa][i]=true;
in_aa_ind[edge_aa][max_idx]=true;
}
}
}
}
/********************************************************************
Fills in double edges.
Uses a precomputed list (from config) of aa comobos and masses to quickly
determine if certain edges are present.
Flag add_overlap_edges controls if to add a double edge when the
same edge can be constructed by single edges.
*********************************************************************/
void PrmGraph::fill_double_multi_edges(bool add_overlap_edges)
{
const vector<AA_combo>& aa_edge_combos = config->get_aa_edge_combos();
const vector<int>& double_edge_combo_idxs = config->get_combo_idxs_by_length(2);
const vector<int>& strong_fragment_idxs = config->get_all_strong_fragment_type_idxs();
const mass_t tolerance = config->get_tolerance();
const mass_t pm_tolerance = (config->get_pm_tolerance() > tolerance) ? config->get_pm_tolerance() : tolerance * 0.75;
const mass_t digest_tolerance = 1.5 * tolerance;
const mass_t half_tolerance = 0.5 * tolerance;
const int num_nodes = nodes.size();
const int num_combos = double_edge_combo_idxs.size();
const mass_t max_combos_mass = aa_edge_combos[double_edge_combo_idxs[double_edge_combo_idxs.size()-1]].total_mass;;
const mass_t max_node_mass = nodes[num_nodes-1].mass;
const score_t overlap_thresh = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -