denovodp.cpp

来自「MS-Clustering is designed to rapidly clu」· C++ 代码 · 共 1,254 行 · 第 1/3 页

CPP
1,254
字号
	vector<bool> used_nodes;  // indicators for each node if it was used in the current path


	vector<int> debug_idxs;
	bool debug_mode=false;
	if (debug_mode)
	{
		debug_idxs.push_back(127);
		debug_idxs.push_back(10);
		debug_idxs.push_back(38);
		debug_idxs.push_back(56);
	//	debug_idxs.push_back(576);
	//	debug_idxs.push_back(2950);
	//	debug_idxs.push_back(855);
	//	debug_idxs.push_back(860);
	}

	int i;
	if (max_length>31)
		max_length=31;
	
	added_scores.resize(num_nodes,NEG_INF);
	out_idx_counters.resize(num_nodes,0);
	used_nodes.resize(num_nodes,false);
	heap.clear();
	alt_heap.clear();
	heap.resize(required_num_paths);

	find_max_gains_per_length(max_length,max_gains_for_length);
	prm->sort_outgoing_edges_according_to_max_gains(max_gains_for_length);

//	prm->get_source_spectrum()->print_expected_by();
//	prm->print();
//	cout << endl;
//	exit(0);
	
	clock_t start_t,end_t;
	start_t = clock();

	if (only_complete_sequences)
	{
		node_ordering.clear();
		node_ordering.push_back(0);
	}
	else
		prm->get_node_ordering_according_to_max_gains(max_gains_for_length, node_ordering);

	int ns;
	for (ns=0; ns<node_ordering.size(); ns++)
	{
		const int start_idx = node_ordering[ns];

		if (max_gains_for_length[start_idx][max_length]<heap[0].score)
			break;

		const int num_first_out_edges = nodes[start_idx].out_edge_idxs.size();
		edge_idx_set current_path;
		int          current_node_idx;
	
		current_node_idx=start_idx;
		current_path.score = nodes[start_idx].score;
		used_nodes[start_idx] = true;

		if (try_complete_sequences && start_idx>0)
			current_path.score += non_complete_penalty;

		while (1)
		{
			
			// check if the search is running too long, if so decrease the heap size
			// and send the excess paths to the alternate heap
			end_t = clock();
			const double iteration_time = (end_t - start_t)/(double)CLOCKS_PER_SEC;
			
			if (iteration_time>7)
				break;

			if (iteration_time>half_life_time)
			{
				if (alt_heap.size()==0)
					alt_heap.resize(heap.size());

				// reduce heap only if large enough
				if (heap.size()>7)
				{
					int half_size = heap.size()/2;
					while (heap.size()>half_size)
					{
						pop_heap(heap.begin(),heap.end());
						const edge_idx_set& removed_path = heap[heap.size()-1];
						if (removed_path.score>min_score_needed && removed_path.score>alt_heap[0].score)
						{
							pop_heap(alt_heap.begin(),alt_heap.end());
							alt_heap[last_alt_heap_pos] = removed_path;
							push_heap(alt_heap.begin(),alt_heap.end());
						}
						heap.pop_back();
					}
					last_heap_pos = heap.size()-1;
				//	cout << "Reduced heap to : " << heap.size() << endl;
				}
				start_t = end_t;
			}

			if (out_idx_counters[current_node_idx] >= nodes[current_node_idx].out_edge_idxs.size())
			{
				if (current_node_idx == start_idx)
					break; // we've returned all the way back, exhausted this tree
				// store path if necessary

				bool store_anyway = (nodes[current_node_idx].out_edge_idxs.size() == 0 &&
					current_path.num_aa >= min_length &&
					current_path.num_aa <= max_length &&
					current_path.score > heap[0].score);

				if (try_complete_sequences &&
					! store_anyway &&
					current_path.score > heap[0].score && 
					current_node_idx == last_node_idx  && 
					start_idx == 0)
					store_anyway=true;

				if (store_anyway)
				{
					if (only_complete_sequences)
					{
						if (nodes[current_node_idx].type == NODE_C_TERM)
						{
							pop_heap(heap.begin(),heap.end());

							const edge_idx_set& removed_path = heap[last_heap_pos];
							if (alt_heap.size()>0 && 
								removed_path.score>min_score_needed &&
								removed_path.score>alt_heap[0].score)
							{
								pop_heap(alt_heap.begin(),alt_heap.end());
								alt_heap[last_alt_heap_pos] = removed_path;
								push_heap(alt_heap.begin(),alt_heap.end());
							}

							heap[last_heap_pos] = current_path;
							push_heap(heap.begin(),heap.end());
							if (debug_mode && current_path.is_good_prefix(debug_idxs))
							{
								cout << "PUSH0 : ";
								current_path.print();
							}
						}
					}
					else if (! try_complete_sequences || nodes[current_node_idx].type == NODE_C_TERM)
					{
						pop_heap(heap.begin(),heap.end());
						
						const edge_idx_set& removed_path = heap[last_heap_pos];
						if (removed_path.score>min_score_needed &&
							alt_heap.size()>0 &&
							removed_path.score>alt_heap[0].score)
						{
							pop_heap(alt_heap.begin(),alt_heap.end());
							alt_heap[last_alt_heap_pos] = removed_path;
							push_heap(alt_heap.begin(),alt_heap.end());
						}

						heap[last_heap_pos] = current_path;
						push_heap(heap.begin(),heap.end());
						if (debug_mode && current_path.is_good_prefix(debug_idxs))
						{
							cout << "PUSH1 : ";
							current_path.print();
						}
					}
					else
					{
						const score_t score_with_penalty = current_path.score + non_complete_penalty;
						if (score_with_penalty > min_score_needed &&
							score_with_penalty > heap[0].score)
						{
							current_path.score += non_complete_penalty;
							pop_heap(heap.begin(),heap.end());
							const edge_idx_set& removed_path = heap[last_heap_pos];
							if (alt_heap.size()>0 && removed_path.score>alt_heap[0].score)
							{
								pop_heap(alt_heap.begin(),alt_heap.end());
								alt_heap[last_alt_heap_pos] = removed_path;
								push_heap(alt_heap.begin(),alt_heap.end());
							}

							heap[last_heap_pos] = current_path;
							push_heap(heap.begin(),heap.end());
							current_path.score -= non_complete_penalty;
							if (debug_mode && current_path.is_good_prefix(debug_idxs))
							{
								cout << "PUSH2 : ";
								current_path.print();
							}
						}
					}
				}

				// backtrack
				out_idx_counters[current_node_idx] =0;
				used_nodes[current_node_idx]=false;
				current_path.length--;

				const int& path_length     = current_path.length;
				const MultiEdge& back_edge = multi_edges[current_path.edge_idxs[path_length]];
				current_path.num_aa		 -= back_edge.num_aa;
				current_node_idx          = back_edge.n_idx;
				current_path.score       -= added_scores[path_length];
				continue;
			}


			// discard this path if we are using too many edges or 
			// the score will not be able to improve enough
			// since the edges are sorted according to the gain they can bring,
			// none of the rest can help so we skip the rest

			const int remaining_aas = max_length - current_path.num_aa;
			const score_t threshold_score = (min_score_needed > heap[0].score ?  min_score_needed : heap[0].score);
			const score_t maximal_achievable_score = (remaining_aas>=0 ? current_path.score + max_gains_for_length[current_node_idx][remaining_aas] : NEG_INF);
			if (current_path.num_aa > max_length || maximal_achievable_score<threshold_score)
			{
				out_idx_counters[current_node_idx] = nodes[current_node_idx].out_edge_idxs.size();
				continue; 
			}
		
			// advance on the edge
			const int edge_idx = nodes[current_node_idx].out_edge_idxs[out_idx_counters[current_node_idx]];
			const MultiEdge& e = multi_edges[edge_idx];

			if (debug_mode && current_path.is_good_prefix(debug_idxs) &&
				(edge_idx == 127 || edge_idx == 10 || edge_idx == 38 || edge_idx == 56 )) 
			{
				current_path.print();
				int qq=1;
			}

			out_idx_counters[current_node_idx]++;
			current_node_idx = e.c_idx;
			out_idx_counters[current_node_idx] =0;
			used_nodes[current_node_idx]=true;
			
			added_scores[current_path.length] = e.max_variant_score + nodes[e.c_idx].score;

			// check if forbidden pair is used..
			if (forbidden_idxs[current_node_idx]>=0 && used_nodes[forbidden_idxs[current_node_idx]])
				added_scores[current_path.length] -= sym_penalty; 

			current_path.edge_idxs[current_path.length] = edge_idx;
			current_path.num_aa += multi_edges[edge_idx].num_aa;
			current_path.score += added_scores[current_path.length];
			current_path.length++;

			if (debug_mode && current_path.is_good_prefix(debug_idxs))
			{
				cout << "GP: " << current_path.length << "\t" << current_path.score << endl;
			}
			
			// 
			score_t heap_score = heap[0].score;
			score_t added_score = added_scores[current_path.length-1];

			// check if the path should be stored at this stage, and if we can mark
		

			if (added_scores[current_path.length-1]>-20.0 && 
				nodes[current_node_idx].out_edge_idxs.size()>0 &&
				current_path.num_aa <= max_length &&
				current_path.num_aa >= min_length &&
				current_path.score> heap[0].score )
			{
				if (only_complete_sequences)
				{
					if (nodes[current_node_idx].type == NODE_C_TERM)
					{
						pop_heap(heap.begin(),heap.end());
						const edge_idx_set& removed_path = heap[last_heap_pos];
						if (alt_heap.size()>0 &&
							removed_path.score > min_score_needed &&
							removed_path.score > alt_heap[0].score)
						{
							pop_heap(alt_heap.begin(),alt_heap.end());
							alt_heap[last_alt_heap_pos] = removed_path;
							push_heap(alt_heap.begin(),alt_heap.end());
						}

						heap[last_heap_pos] = current_path;
						push_heap(heap.begin(),heap.end());
						if (debug_mode && current_path.is_good_prefix(debug_idxs))
						{
							cout << "PUSH3 : ";
							current_path.print();
						}
					}
				}
				else if (! try_complete_sequences || nodes[current_node_idx].type == NODE_C_TERM)
				{
					pop_heap(heap.begin(),heap.end());
					const edge_idx_set& removed_path = heap[last_heap_pos];
					if (alt_heap.size()>0 && 
						removed_path.score > min_score_needed &&
						removed_path.score>alt_heap[0].score)
					{
						pop_heap(alt_heap.begin(),alt_heap.end());
						alt_heap[last_alt_heap_pos] = removed_path;
						push_heap(alt_heap.begin(),alt_heap.end());
					}

					heap[last_heap_pos] = current_path;
					push_heap(heap.begin(),heap.end());
					if (debug_mode && current_path.is_good_prefix(debug_idxs))
					{
						cout << "PUSH4 : ";
						current_path.print();
					}
				}
				else
				{

					if (current_path.score + non_complete_penalty > heap[0].score)
					{
						current_path.score += non_complete_penalty;
						pop_heap(heap.begin(),heap.end());
						const edge_idx_set& removed_path = heap[last_heap_pos];
						if (alt_heap.size()>0 &&
							removed_path.score> min_score_needed &&
							removed_path.score>alt_heap[0].score)
						{
							pop_heap(alt_heap.begin(),alt_heap.end());
							alt_heap[last_alt_heap_pos] = removed_path;
							push_heap(alt_heap.begin(),alt_heap.end());
						}

						heap[last_heap_pos] = current_path;
						push_heap(heap.begin(),heap.end());
						current_path.score -= non_complete_penalty;
						if (debug_mode && current_path.is_good_prefix(debug_idxs))
						{
							cout << "PUSH5 : ";
							current_path.print();
						}
					}
				}
			}
		}

		used_nodes[start_idx] = false;
	}

//	if (debug_mode)
//		exit(0);

	if (heap.size()==0)
		return;

	if (alt_heap.size()>0) 	// transfer all paths that are in current heap
	{
		int i;
		for (i=0; i<heap.size(); i++)
			if (heap[i].score>alt_heap[0].score)
			{
				pop_heap(alt_heap.begin(),alt_heap.end());
				alt_heap[last_alt_heap_pos]=heap[i];
				push_heap(alt_heap.begin(),alt_heap.end());
			}
	}

	
	// work on the alt_heap if necessary
	vector<edge_idx_set>& final_heap = (alt_heap.size()>0 ? alt_heap : heap);
	
	sort(final_heap.begin(),final_heap.end());

	while (final_heap.size()>0 && final_heap[final_heap.size()-1].score<-40)
		final_heap.pop_back();

	int actual_num_paths = required_num_paths;
	if (final_heap.size()<actual_num_paths)
		actual_num_paths = final_heap.size();

	multi_paths.resize(actual_num_paths);

	for (i=0; i<actual_num_paths; i++)
	{
		int j;
		vector<int> edge_idxs;
		edge_idxs.resize(final_heap[i].length);
		for (j=0; j<final_heap[i].length; j++)
			edge_idxs[j]=final_heap[i].edge_idxs[j];

		if (0 && debug_mode)
		{
			cout << i << " >>> \t";
			final_heap[i].print();
		}

		prm->create_path_from_edges(edge_idxs, multi_paths[i]);

		multi_paths[i].path_score = final_heap[i].score;
		multi_paths[i].original_rank = i;

		const vector<int>& node_idxs = multi_paths[i].node_idxs;
		const int max_idx = node_idxs.size()-1;
		int num_forbidden =0;
		for (j=0; j<max_idx; j++)
		{
			const int n_idx = node_idxs[j];
			if (forbidden_idxs[n_idx]<0)
				continue;
			int k;
			for (k=j+1; k<node_idxs.size(); k++)
				if (node_idxs[k]==forbidden_idxs[n_idx])
					break;
			if (k<node_idxs.size())
				num_forbidden++;
		}
		multi_paths[i].num_forbidden_nodes = num_forbidden;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?