📄 multipath.cpp
字号:
last_pos.node_idx = last_edge.c_idx;
last_pos.node_score = last_edge.c_break->score;
template_path.path_score += last_pos.node_score;
const int num_multi_var_edges = var_positions.size();
if (num_multi_var_edges == 0)
{
seq_paths.resize(1);
seq_paths[0]=template_path;
return;
}
vector<var_combo> combo_heap;
vector<int> idxs;
idxs.resize(num_multi_var_edges,0);
const int last_var_pos = num_multi_var_edges-1;
const int last_var_val = num_vars[last_var_pos];
const score_t needed_var_score = min_score - template_path.path_score;
while (1)
{
score_t idxs_score = 0;
int k;
for (k=0; k<idxs.size(); k++)
idxs_score += var_scores[k][idxs[k]];
if (idxs_score>=needed_var_score)
{
if (combo_heap.size()<max_num_paths)
{
var_combo v;
v.path_score = template_path.path_score + idxs_score;
v.var_idxs = idxs;
combo_heap.push_back(v);
if (combo_heap.size()== max_num_paths)
make_heap(combo_heap.begin(),combo_heap.end());
}
else
{
const score_t score = template_path.path_score + idxs_score;
if (score>combo_heap[0].path_score)
{
pop_heap(combo_heap.begin(),combo_heap.end());
combo_heap[max_num_paths-1].path_score = score;
combo_heap[max_num_paths-1].var_idxs = idxs;
push_heap(combo_heap.begin(),combo_heap.end());
}
}
}
int j=0;
while (j<num_multi_var_edges)
{
idxs[j]++;
if (idxs[j]==num_vars[j])
{
idxs[j++]=0;
}
else
break;
}
if (j==num_multi_var_edges)
break;
}
seq_paths.clear();
seq_paths.resize(combo_heap.size(),template_path);
int i;
for (i=0; i<combo_heap.size(); i++)
{
const var_combo& combo = combo_heap[i];
SeqPath& seq_path = seq_paths[i];
// fill in info that changes with multi-variant edges
int j;
for (j=0; j<num_multi_var_edges; j++)
{
const int var_pos = var_positions[j];
const int e_idx = multi_path.edge_idxs[var_pos];
const MultiEdge& edge = get_multi_edge(e_idx);
const int pos_idx = var_edge_positions[j];
if (seq_path.positions[pos_idx].edge_idx != e_idx)
{
cout << "POS: " << pos_idx << endl;
cout << "Error: mismatch in pos_idx and e_idx of a multipath!" << endl;
cout << "looking for " << e_idx << endl;
cout << "edge idxs:" << endl;
int k;
for (k=0; k<seq_path.positions.size(); k++)
cout << k << "\t" << seq_path.positions[k].edge_idx << "\tnidx: " <<
seq_path.positions[k].node_idx << endl;
cout << endl;
multi_path.print(config);
this->print();
exit(1);
}
PathPos& pos = seq_path.positions[pos_idx];
const int variant_idx = combo.var_idxs[j];
int *variant_ptr = edge.variant_ptrs[variant_idx];
const int num_aa = *variant_ptr++;
int *aas = variant_ptr;
if (num_aa != edge.num_aa)
{
cout << "Error: edge and variant mixup!" << endl;
exit(1);
}
pos.edge_variant_score = edge.variant_scores[variant_idx];
pos.aa = aas[0];
// pos.edge_var_idx = variant_idx;
if (edge.num_aa == 1)
continue;
int k;
for (k=1; k<edge.num_aa; k++)
{
PathPos& pos = seq_path.positions[pos_idx+k];
pos.aa = aas[k];
}
}
//seq_path.path_score = max_path_score + combo.path_delta;
seq_path.path_score=combo.path_score;
seq_path.make_seq_str(config);
}
}
/************************************************************************
Expands the single multi path into all possible sequence variants.
Since this turns out to be the time-limiting process for long de nvoo,
this is implemented using a quick branch and bound that works on
edge variant indices. The SeqPaths are created only for the final
set of sequences.
*************************************************************************/
void PrmGraph::fast_expand_multi_path_for_combo_scores(
Model *model,
const MultiPath& multi_path,
vector<SeqPath>& seq_paths,
score_t min_score,
score_t forbidden_pair_penalty,
int max_num_paths)
{
const vector<AA_combo>& aa_edge_comobos = config->get_aa_edge_combos();
vector<score_t> max_attainable_scores;
const int num_edges = multi_path.edge_idxs.size();
max_attainable_scores.resize(num_edges+1,0);
int num_path_variants=1;
int e;
for (e=num_edges-1; e<num_edges; e++)
{
const int e_idx = multi_path.edge_idxs[e];
const MultiEdge& edge = multi_edges[e_idx];
num_path_variants *= edge.variant_ptrs.size();
}
if (num_path_variants == 0)
{
cout << "Error: had an edge with 0 variants!" <<endl;
exit(1);
}
// re-check the max attainable scores, this time compute all combo scores along the path
// this will give a more accurate max attainable score, and also allow quick computation
// of the expanded scores
max_attainable_scores.clear();
max_attainable_scores.resize(num_edges+1,0);
vector<score_t> max_node_scores;
max_node_scores.resize(num_edges+1,NEG_INF);
for (e=0; e<=num_edges; e++)
{
const int n_edge_idx = (e>0 ? multi_path.edge_idxs[e-1] : -1);
const int c_edge_idx = (e<num_edges ? multi_path.edge_idxs[e] : -1);
const int node_idx = (n_edge_idx>=0 ? multi_edges[n_edge_idx].c_idx : multi_edges[c_edge_idx].n_idx);
const int num_n_vars = (e>0 ? multi_edges[n_edge_idx].variant_ptrs.size() : 1);
const int num_c_vars = (e<num_edges ? multi_edges[c_edge_idx].variant_ptrs.size() : 1);
int j,k;
for (j=0; j<num_n_vars; j++)
for (k=0; k<num_c_vars; k++)
{
score_t combo_score = model->get_node_combo_score(this,node_idx,n_edge_idx,j,c_edge_idx,k);
if (max_node_scores[e]<combo_score)
max_node_scores[e]=combo_score;
}
}
max_attainable_scores[num_edges]=max_node_scores[num_edges];
for (e=num_edges-1; e>=0; e--)
{
const int e_idx = multi_path.edge_idxs[e];
const MultiEdge& edge = multi_edges[e_idx];
max_attainable_scores[e]= max_attainable_scores[e+1] +
edge.max_variant_score + max_node_scores[e];
}
score_t max_path_score = max_attainable_scores[0] - multi_path.num_forbidden_nodes * forbidden_pair_penalty;
score_t min_delta_allowed = min_score - max_path_score;
if (min_delta_allowed>0)
return;
// in this expansion method, all edges are stored (not only ones with more than one variant)
// perform expansion using a heap and condensed path representation
vector<int> var_positions; // holds the edge idxs
vector<int> num_vars;
vector<int> var_edge_positions;
vector< vector< score_t > > var_scores;
var_scores.clear();
var_positions.clear();
num_vars.clear();
int num_aas_in_multipath = 0;
for (e=0; e<num_edges; e++)
{
const int e_idx = multi_path.edge_idxs[e];
const MultiEdge& edge = multi_edges[e_idx];
const int num_vars_in_edge = edge.variant_ptrs.size();
vector<score_t> scores;
num_aas_in_multipath+=edge.num_aa;
int i;
for (i=0; i<num_vars_in_edge; i++)
scores.push_back(edge.variant_scores[i]);
var_scores.push_back(scores);
var_positions.push_back(e);
num_vars.push_back(num_vars_in_edge);
var_edge_positions.push_back(num_aas_in_multipath-edge.num_aa);
}
// create the SeqPaths from the edge_combos...
const int num_positions = num_aas_in_multipath+1;
SeqPath template_path; // holds common elements to all paths
template_path.n_term_aa = multi_path.n_term_aa;
template_path.c_term_aa = multi_path.c_term_aa;
template_path.n_term_mass = multi_path.n_term_mass;
template_path.c_term_mass = multi_path.c_term_mass;
template_path.charge = charge;
template_path.multi_path_rank = multi_path.original_rank;
template_path.path_score = 0;
template_path.prm_ptr = (PrmGraph *)this;
template_path.positions.clear();
template_path.positions.resize(num_positions);
template_path.num_forbidden_nodes = multi_path.num_forbidden_nodes;
template_path.path_score -= template_path.num_forbidden_nodes * forbidden_pair_penalty;
int pos_idx=0;
for (e=0; e<multi_path.edge_idxs.size(); e++)
{
const int e_idx = multi_path.edge_idxs[e];
const MultiEdge& edge = get_multi_edge(e_idx);
PathPos& pos = template_path.positions[pos_idx++];
pos.breakage = edge.n_break;
pos.edge_idx = e_idx;
pos.mass = edge.n_break->mass;
pos.node_idx = edge.n_idx;
int *variant_ptr = edge.variant_ptrs[0];
const int num_aa = *variant_ptr++;
int *aas = variant_ptr;
if (edge.variant_ptrs.size() == 1)
{
pos.edge_variant_score = edge.variant_scores[0];
pos.aa = aas[0];
}
if (edge.num_aa == 1)
continue;
int j;
for (j=1; j<edge.num_aa; j++)
{
PathPos& pos = template_path.positions[pos_idx++];
pos.breakage = NULL;
pos.aa = aas[j];
}
}
if (pos_idx != num_aas_in_multipath)
{
cout << "Error: mismatches between positions and multipath length!" << endl;
exit(1);
}
PathPos& last_pos = template_path.positions[num_positions-1];
const int last_e_idx = multi_path.edge_idxs[e-1];
const MultiEdge& last_edge = get_multi_edge(last_e_idx);
last_pos.breakage = last_edge.c_break;
last_pos.edge_idx =-1;
last_pos.mass = last_edge.c_break->mass;
last_pos.node_idx = last_edge.c_idx;
const int num_multi_var_edges = var_positions.size();
vector<var_combo> combo_heap;
vector<int> var_idxs;
var_idxs.resize(num_multi_var_edges,0);
const int last_var_pos = num_multi_var_edges-1;
const int last_var_val = num_vars[last_var_pos];
const score_t needed_var_score = min_score; // this is only the penalty for forbidden pairs
while (1)
{
score_t idxs_score = 0;
int k;
for (k=0; k<var_idxs.size(); k++)
idxs_score += var_scores[k][var_idxs[k]]; // this is the score for the edge variants (usually 0)
const vector<PathPos>& positions = template_path.positions;
score_t path_score = idxs_score + template_path.path_score;
// compute the node scores based on the edge variants
// N-term node score
vector<score_t> node_scores;
node_scores.resize(var_idxs.size()+1,NEG_INF);
node_scores[0] = model->get_node_combo_score(this,positions[0].node_idx,-1,0,
positions[0].edge_idx,var_idxs[0]);
path_score += node_scores[0];
// middle nodes score
int prev_edge_idx = positions[0].edge_idx;
for (k=1; k<var_idxs.size(); k++)
{
const int p=var_edge_positions[k];
const PathPos& pos = positions[p];
node_scores[k] = model->get_node_combo_score(this,pos.node_idx,prev_edge_idx,var_idxs[k-1],
pos.edge_idx,var_idxs[k]);
path_score += node_scores[k];
prev_edge_idx = pos.edge_idx;
}
// C-term node score
node_scores[k] = model->get_node_combo_score(this,last_pos.node_idx,prev_edge_idx,var_idxs[k-1],-1,0);
path_score += node_scores[k];
if (path_score>=needed_var_score)
{
if (combo_heap.size()<max_num_paths)
{
var_combo v;
v.path_score = path_score;
v.var_idxs = var_idxs;
v.node_scores = node_scores;
combo_heap.push_back(v);
if (combo_heap.size()== max_num_paths)
make_heap(combo_heap.begin(),combo_heap.end());
}
else
{
const score_t score = path_score;
if (score>combo_heap[0].path_score)
{
pop_heap(combo_heap.begin(),combo_heap.end());
var_combo& combo = combo_heap[max_num_paths-1];
combo.path_score = path_score;
combo.var_idxs = var_idxs;
combo.node_scores = node_scores;
push_heap(combo_heap.begin(),combo_heap.end());
}
}
}
int j=0;
while (j<num_multi_var_edges)
{
var_idxs[j]++;
if (var_idxs[j]==num_vars[j])
{
var_idxs[j++]=0;
}
else
break;
}
if (j==num_multi_var_edges)
break;
}
seq_paths.clear();
seq_paths.resize(combo_heap.size(),template_path);
int i;
for (i=0; i<combo_heap.size(); i++)
{
const var_combo& combo = combo_heap[i];
SeqPath& seq_path = seq_paths[i];
// fill in info that changes with multi-variant edges
int j;
for (j=0; j<num_multi_var_edges; j++)
{
const int var_pos = var_positions[j];
const int e_idx = multi_path.edge_idxs[var_pos];
const MultiEdge& edge = get_multi_edge(e_idx);
const int pos_idx = var_edge_positions[j];
if (seq_path.positions[pos_idx].edge_idx != e_idx)
{
cout << "POS: " << pos_idx << endl;
cout << "Error: mismatch in pos_idx and e_idx of a multipath!" << endl;
cout << "looking for " << e_idx << endl;
cout << "edge idxs:" << endl;
int k;
for (k=0; k<seq_path.positions.size(); k++)
cout << k << "\t" << seq_path.positions[k].edge_idx << "\tnidx: " <<
seq_path.positions[k].node_idx << endl;
cout << endl;
multi_path.print(config);
this->print();
exit(1);
}
PathPos& pos = seq_path.positions[pos_idx];
const int variant_idx = combo.var_idxs[j];
int *variant_ptr = edge.variant_ptrs[variant_idx];
const int num_aa = *variant_ptr++;
int *aas = variant_ptr;
if (num_aa != edge.num_aa)
{
cout << "Error: edge and variant mixup!" << endl;
exit(1);
}
pos.node_score = combo.node_scores[j];
pos.edge_variant_score = edge.variant_scores[variant_idx];
pos.aa = aas[0];
if (edge.num_aa == 1)
continue;
int k;
for (k=1; k<edge.num_aa; k++)
{
PathPos& pos = seq_path.positions[pos_idx+k];
pos.aa = aas[k];
}
}
seq_path.positions[seq_path.positions.size()-1].node_score = combo.node_scores[num_multi_var_edges];
seq_path.path_score=combo.path_score;
float pen = seq_path.adjust_complete_sequence_penalty(this);
seq_path.make_seq_str(config);
}
}
void PrmGraph::expand_all_multi_paths(Model *model,
const vector<MultiPath>& multi_paths,
vector<SeqPath>& paths,
score_t forbidden_pair_penalty,
int max_num_paths)
{
// expand all variants
PathHeap path_heap;
path_heap.init(max_num_paths);
paths.clear();
int mp_idx;
for (mp_idx=0; mp_idx<multi_paths.size(); mp_idx++ )
{
vector<SeqPath> variants;
if (multi_paths[mp_idx].path_score== NEG_INF)
continue;
if (this->has_node_combo_scores)
{
fast_expand_multi_path_for_combo_scores(model,
multi_paths[mp_idx],
variants,
path_heap.get_min_score(),
forbidden_pair_penalty,
max_num_paths);
}
else
{
if (multi_paths[mp_idx].path_score<=path_heap.get_min_score())
continue;
fast_expand_multi_path(multi_paths[mp_idx], variants, path_heap.get_min_score(),
forbidden_pair_penalty, max_num_paths);
}
if (variants.size()<=0)
continue;
sort(variants.begin(),variants.end(),comp_SeqPath_path_score);
/* cout << mp_idx << "\t" << multi_paths[mp_idx].path_score << "\t" << variants.size() << endl;
int q;
for (q=0; q<variants.size(); q++)
cout << " " << variants[q].seq_str << "\t" << variants[q].path_score << endl;
*/
int j;
for (j=0; j<variants.size(); j++)
{
variants[j].multi_path_rank = mp_idx;
variants[j].multi_path_score = multi_paths[mp_idx].path_score;
// check that the peptide has correct mass if it spans the entire graph
int n_idx = variants[j].positions[0].node_idx;
int c_idx = variants[j].positions[ variants[j].positions.size()-1].node_idx;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -