denovodp.cpp
来自「MS-Clustering is designed to rapidly clu」· C++ 代码 · 共 1,254 行 · 第 1/3 页
CPP
1,254 行
vector<bool> used_nodes; // indicators for each node if it was used in the current path
vector<int> debug_idxs;
bool debug_mode=false;
if (debug_mode)
{
debug_idxs.push_back(127);
debug_idxs.push_back(10);
debug_idxs.push_back(38);
debug_idxs.push_back(56);
// debug_idxs.push_back(576);
// debug_idxs.push_back(2950);
// debug_idxs.push_back(855);
// debug_idxs.push_back(860);
}
int i;
if (max_length>31)
max_length=31;
added_scores.resize(num_nodes,NEG_INF);
out_idx_counters.resize(num_nodes,0);
used_nodes.resize(num_nodes,false);
heap.clear();
alt_heap.clear();
heap.resize(required_num_paths);
find_max_gains_per_length(max_length,max_gains_for_length);
prm->sort_outgoing_edges_according_to_max_gains(max_gains_for_length);
// prm->get_source_spectrum()->print_expected_by();
// prm->print();
// cout << endl;
// exit(0);
clock_t start_t,end_t;
start_t = clock();
if (only_complete_sequences)
{
node_ordering.clear();
node_ordering.push_back(0);
}
else
prm->get_node_ordering_according_to_max_gains(max_gains_for_length, node_ordering);
int ns;
for (ns=0; ns<node_ordering.size(); ns++)
{
const int start_idx = node_ordering[ns];
if (max_gains_for_length[start_idx][max_length]<heap[0].score)
break;
const int num_first_out_edges = nodes[start_idx].out_edge_idxs.size();
edge_idx_set current_path;
int current_node_idx;
current_node_idx=start_idx;
current_path.score = nodes[start_idx].score;
used_nodes[start_idx] = true;
if (try_complete_sequences && start_idx>0)
current_path.score += non_complete_penalty;
while (1)
{
// check if the search is running too long, if so decrease the heap size
// and send the excess paths to the alternate heap
end_t = clock();
const double iteration_time = (end_t - start_t)/(double)CLOCKS_PER_SEC;
if (iteration_time>7)
break;
if (iteration_time>half_life_time)
{
if (alt_heap.size()==0)
alt_heap.resize(heap.size());
// reduce heap only if large enough
if (heap.size()>7)
{
int half_size = heap.size()/2;
while (heap.size()>half_size)
{
pop_heap(heap.begin(),heap.end());
const edge_idx_set& removed_path = heap[heap.size()-1];
if (removed_path.score>min_score_needed && removed_path.score>alt_heap[0].score)
{
pop_heap(alt_heap.begin(),alt_heap.end());
alt_heap[last_alt_heap_pos] = removed_path;
push_heap(alt_heap.begin(),alt_heap.end());
}
heap.pop_back();
}
last_heap_pos = heap.size()-1;
// cout << "Reduced heap to : " << heap.size() << endl;
}
start_t = end_t;
}
if (out_idx_counters[current_node_idx] >= nodes[current_node_idx].out_edge_idxs.size())
{
if (current_node_idx == start_idx)
break; // we've returned all the way back, exhausted this tree
// store path if necessary
bool store_anyway = (nodes[current_node_idx].out_edge_idxs.size() == 0 &&
current_path.num_aa >= min_length &&
current_path.num_aa <= max_length &&
current_path.score > heap[0].score);
if (try_complete_sequences &&
! store_anyway &&
current_path.score > heap[0].score &&
current_node_idx == last_node_idx &&
start_idx == 0)
store_anyway=true;
if (store_anyway)
{
if (only_complete_sequences)
{
if (nodes[current_node_idx].type == NODE_C_TERM)
{
pop_heap(heap.begin(),heap.end());
const edge_idx_set& removed_path = heap[last_heap_pos];
if (alt_heap.size()>0 &&
removed_path.score>min_score_needed &&
removed_path.score>alt_heap[0].score)
{
pop_heap(alt_heap.begin(),alt_heap.end());
alt_heap[last_alt_heap_pos] = removed_path;
push_heap(alt_heap.begin(),alt_heap.end());
}
heap[last_heap_pos] = current_path;
push_heap(heap.begin(),heap.end());
if (debug_mode && current_path.is_good_prefix(debug_idxs))
{
cout << "PUSH0 : ";
current_path.print();
}
}
}
else if (! try_complete_sequences || nodes[current_node_idx].type == NODE_C_TERM)
{
pop_heap(heap.begin(),heap.end());
const edge_idx_set& removed_path = heap[last_heap_pos];
if (removed_path.score>min_score_needed &&
alt_heap.size()>0 &&
removed_path.score>alt_heap[0].score)
{
pop_heap(alt_heap.begin(),alt_heap.end());
alt_heap[last_alt_heap_pos] = removed_path;
push_heap(alt_heap.begin(),alt_heap.end());
}
heap[last_heap_pos] = current_path;
push_heap(heap.begin(),heap.end());
if (debug_mode && current_path.is_good_prefix(debug_idxs))
{
cout << "PUSH1 : ";
current_path.print();
}
}
else
{
const score_t score_with_penalty = current_path.score + non_complete_penalty;
if (score_with_penalty > min_score_needed &&
score_with_penalty > heap[0].score)
{
current_path.score += non_complete_penalty;
pop_heap(heap.begin(),heap.end());
const edge_idx_set& removed_path = heap[last_heap_pos];
if (alt_heap.size()>0 && removed_path.score>alt_heap[0].score)
{
pop_heap(alt_heap.begin(),alt_heap.end());
alt_heap[last_alt_heap_pos] = removed_path;
push_heap(alt_heap.begin(),alt_heap.end());
}
heap[last_heap_pos] = current_path;
push_heap(heap.begin(),heap.end());
current_path.score -= non_complete_penalty;
if (debug_mode && current_path.is_good_prefix(debug_idxs))
{
cout << "PUSH2 : ";
current_path.print();
}
}
}
}
// backtrack
out_idx_counters[current_node_idx] =0;
used_nodes[current_node_idx]=false;
current_path.length--;
const int& path_length = current_path.length;
const MultiEdge& back_edge = multi_edges[current_path.edge_idxs[path_length]];
current_path.num_aa -= back_edge.num_aa;
current_node_idx = back_edge.n_idx;
current_path.score -= added_scores[path_length];
continue;
}
// discard this path if we are using too many edges or
// the score will not be able to improve enough
// since the edges are sorted according to the gain they can bring,
// none of the rest can help so we skip the rest
const int remaining_aas = max_length - current_path.num_aa;
const score_t threshold_score = (min_score_needed > heap[0].score ? min_score_needed : heap[0].score);
const score_t maximal_achievable_score = (remaining_aas>=0 ? current_path.score + max_gains_for_length[current_node_idx][remaining_aas] : NEG_INF);
if (current_path.num_aa > max_length || maximal_achievable_score<threshold_score)
{
out_idx_counters[current_node_idx] = nodes[current_node_idx].out_edge_idxs.size();
continue;
}
// advance on the edge
const int edge_idx = nodes[current_node_idx].out_edge_idxs[out_idx_counters[current_node_idx]];
const MultiEdge& e = multi_edges[edge_idx];
if (debug_mode && current_path.is_good_prefix(debug_idxs) &&
(edge_idx == 127 || edge_idx == 10 || edge_idx == 38 || edge_idx == 56 ))
{
current_path.print();
int qq=1;
}
out_idx_counters[current_node_idx]++;
current_node_idx = e.c_idx;
out_idx_counters[current_node_idx] =0;
used_nodes[current_node_idx]=true;
added_scores[current_path.length] = e.max_variant_score + nodes[e.c_idx].score;
// check if forbidden pair is used..
if (forbidden_idxs[current_node_idx]>=0 && used_nodes[forbidden_idxs[current_node_idx]])
added_scores[current_path.length] -= sym_penalty;
current_path.edge_idxs[current_path.length] = edge_idx;
current_path.num_aa += multi_edges[edge_idx].num_aa;
current_path.score += added_scores[current_path.length];
current_path.length++;
if (debug_mode && current_path.is_good_prefix(debug_idxs))
{
cout << "GP: " << current_path.length << "\t" << current_path.score << endl;
}
//
score_t heap_score = heap[0].score;
score_t added_score = added_scores[current_path.length-1];
// check if the path should be stored at this stage, and if we can mark
if (added_scores[current_path.length-1]>-20.0 &&
nodes[current_node_idx].out_edge_idxs.size()>0 &&
current_path.num_aa <= max_length &&
current_path.num_aa >= min_length &&
current_path.score> heap[0].score )
{
if (only_complete_sequences)
{
if (nodes[current_node_idx].type == NODE_C_TERM)
{
pop_heap(heap.begin(),heap.end());
const edge_idx_set& removed_path = heap[last_heap_pos];
if (alt_heap.size()>0 &&
removed_path.score > min_score_needed &&
removed_path.score > alt_heap[0].score)
{
pop_heap(alt_heap.begin(),alt_heap.end());
alt_heap[last_alt_heap_pos] = removed_path;
push_heap(alt_heap.begin(),alt_heap.end());
}
heap[last_heap_pos] = current_path;
push_heap(heap.begin(),heap.end());
if (debug_mode && current_path.is_good_prefix(debug_idxs))
{
cout << "PUSH3 : ";
current_path.print();
}
}
}
else if (! try_complete_sequences || nodes[current_node_idx].type == NODE_C_TERM)
{
pop_heap(heap.begin(),heap.end());
const edge_idx_set& removed_path = heap[last_heap_pos];
if (alt_heap.size()>0 &&
removed_path.score > min_score_needed &&
removed_path.score>alt_heap[0].score)
{
pop_heap(alt_heap.begin(),alt_heap.end());
alt_heap[last_alt_heap_pos] = removed_path;
push_heap(alt_heap.begin(),alt_heap.end());
}
heap[last_heap_pos] = current_path;
push_heap(heap.begin(),heap.end());
if (debug_mode && current_path.is_good_prefix(debug_idxs))
{
cout << "PUSH4 : ";
current_path.print();
}
}
else
{
if (current_path.score + non_complete_penalty > heap[0].score)
{
current_path.score += non_complete_penalty;
pop_heap(heap.begin(),heap.end());
const edge_idx_set& removed_path = heap[last_heap_pos];
if (alt_heap.size()>0 &&
removed_path.score> min_score_needed &&
removed_path.score>alt_heap[0].score)
{
pop_heap(alt_heap.begin(),alt_heap.end());
alt_heap[last_alt_heap_pos] = removed_path;
push_heap(alt_heap.begin(),alt_heap.end());
}
heap[last_heap_pos] = current_path;
push_heap(heap.begin(),heap.end());
current_path.score -= non_complete_penalty;
if (debug_mode && current_path.is_good_prefix(debug_idxs))
{
cout << "PUSH5 : ";
current_path.print();
}
}
}
}
}
used_nodes[start_idx] = false;
}
// if (debug_mode)
// exit(0);
if (heap.size()==0)
return;
if (alt_heap.size()>0) // transfer all paths that are in current heap
{
int i;
for (i=0; i<heap.size(); i++)
if (heap[i].score>alt_heap[0].score)
{
pop_heap(alt_heap.begin(),alt_heap.end());
alt_heap[last_alt_heap_pos]=heap[i];
push_heap(alt_heap.begin(),alt_heap.end());
}
}
// work on the alt_heap if necessary
vector<edge_idx_set>& final_heap = (alt_heap.size()>0 ? alt_heap : heap);
sort(final_heap.begin(),final_heap.end());
while (final_heap.size()>0 && final_heap[final_heap.size()-1].score<-40)
final_heap.pop_back();
int actual_num_paths = required_num_paths;
if (final_heap.size()<actual_num_paths)
actual_num_paths = final_heap.size();
multi_paths.resize(actual_num_paths);
for (i=0; i<actual_num_paths; i++)
{
int j;
vector<int> edge_idxs;
edge_idxs.resize(final_heap[i].length);
for (j=0; j<final_heap[i].length; j++)
edge_idxs[j]=final_heap[i].edge_idxs[j];
if (0 && debug_mode)
{
cout << i << " >>> \t";
final_heap[i].print();
}
prm->create_path_from_edges(edge_idxs, multi_paths[i]);
multi_paths[i].path_score = final_heap[i].score;
multi_paths[i].original_rank = i;
const vector<int>& node_idxs = multi_paths[i].node_idxs;
const int max_idx = node_idxs.size()-1;
int num_forbidden =0;
for (j=0; j<max_idx; j++)
{
const int n_idx = node_idxs[j];
if (forbidden_idxs[n_idx]<0)
continue;
int k;
for (k=j+1; k<node_idxs.size(); k++)
if (node_idxs[k]==forbidden_idxs[n_idx])
break;
if (k<node_idxs.size())
num_forbidden++;
}
multi_paths[i].num_forbidden_nodes = num_forbidden;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?