⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 denovosolutions.h

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 H
字号:
#ifndef __DENOVOSOLUTIONS_H__
#define __DENOVOSOLUTIONS_H__


#include "Spectrum.h"
#include "Model.h"
#include "DeNovoDp.h"
#include "AdvancedScoreModel.h"
#include <set>


struct SeqPathKey {
	SeqPathKey() : pep_str(""), n_mass(NEG_INF), sort_key(NEG_INF), path_pos_idx(NEG_INF) {}
	SeqPathKey(const SeqPath& sp) : pep_str(sp.seq_str), n_mass(sp.n_term_mass),
									sort_key(sp.sort_key){}

	bool operator < (const SeqPathKey& other) const
	{
		return (pep_str < other.pep_str ||
			(pep_str == other.pep_str && other.n_mass - n_mass > tolerance));
	}

	static void set_tolerance(mass_t t) { tolerance = t; }
	static mass_t tolerance;
	string pep_str;
	mass_t n_mass;
	float  sort_key;
	int	   path_pos_idx;
};

struct idx_score_pair {
	idx_score_pair() : path_pos_idx(NEG_INF), sort_key((float)NEG_INF) {}
	idx_score_pair(const SeqPathKey& key) : path_pos_idx(key.path_pos_idx),
		sort_key(key.sort_key) {}

	bool operator< (const idx_score_pair& other) const
	{
		return sort_key>other.sort_key;
	}

	int path_pos_idx;
	float sort_key;
};


// well it's not really a heap
struct SeqPathHeap {
	void init(int _max_size, mass_t _tolerance) {
		paths.clear(); 
		paths.reserve(_max_size);
		min_score_heap.clear();
		path_keys.clear();

		max_size = _max_size; 
		tolerance = 8 * _tolerance;
		min_idx=-1; 
		min_value=99999;
		SeqPathKey::set_tolerance(tolerance);
	}

	int add_path(SeqPath& new_path, bool verbose = false);


	int max_size;
	mass_t tolerance;
	int min_idx;
	float min_value;
	vector<SeqPath> paths;
	set<SeqPathKey> path_keys;
	vector<idx_score_pair> min_score_heap;
};



/**************************************************************************
	Wrapper funciton that generates the desired solutions.
	Combines both local and global solutions (similar to the PepNovoTag
	and LocalTag solutions).
***************************************************************************/
bool generate_denovo_solutions(PrmGraph * & prm,
							   Model *model, 
							   Spectrum *spec,
							   bool denovo_mode,
							   mass_t pm_with_19,
							   int  charge,
							   int num_sols, 
							   int min_length, 
							   int max_length,
							   score_t min_score_needed,
							   vector<SeqPath>& solutions,
							   bool only_complete = false,
							   bool only_from_graph_containing_true_pep = false,
							   bool need_to_create_PrmGraph = true);






/***************************************************************************
	Wrapper function that generates several solutions according to different 
	precursor masses.
****************************************************************************/
void generate_denovo_solutions_from_several_pms(
							   vector<PrmGraph *>& prm_ptrs,
							   AdvancedScoreModel *model, 
							   Spectrum *spec,
							   bool denovo_mode,
							   int num_sols, 
							   int min_length, 
							   int max_length,
							   vector<mass_t>& different_pms_with_19,
							   vector<int>& charges,
							   vector<SeqPath>& solutions,
							   bool ind_only_complete = false);

void generate_denovo_solutions_from_several_pms_with_good_start_end_idxs(
							   vector<PrmGraph *>& prm_ptrs,
							   AdvancedScoreModel *model,
							   Spectrum *spec,
							   bool denovo_mode,
							   int num_sols, int min_length, int max_length,
							   vector<mass_t>& different_pms_with_19,
							   vector<int>& charges,
							   vector<SeqPath>& solutions);


bool generate_denovo_solutions_with_good_start_end_idxs(
							   PrmGraph*& prm,
							   Model *model, 
							   Spectrum *spec,
							   bool denovo_mode,
							   mass_t pm_with_19,
							   int  charge,
							   int num_sols, 
							   int min_length, 
							   int max_length,
							   vector<SeqPath>& solutions);

/**************************************************************************
Generates an prints the PRM graph
***************************************************************************/
void print_prm_graph_scores(Model *model, Spectrum *spec, 
					 mass_t pm_with_19, int charge);




/*************************************************************************
Generates tags by making a mixture of local/de novo tags
checks which tags appear in the longer denovo sequences sets the boolean
indicators in the tag seq paths
**************************************************************************/
void generate_tags(vector<PrmGraph *>& prm_ptrs,
				   AdvancedScoreModel *model,
				   BasicSpectrum& bs,
				   Spectrum *spec,
				   const vector<int>& max_num_tags,
				   int main_tag_length,				 // the length for which we parse de novo sequences
				   const vector<mass_t>& pms_with_19,
				   const vector<int>& charges,
				   vector<SeqPath>& final_tags,
				   bool use_original_num_tags=false,
				   int  prm_ptr_start_idx=0);




void output_denovo_solutions(SingleSpectrumFile *ssf, Config *config, ostream& out_stream,
							 const vector<SeqPath>& solutions, int max_num_sols = -1);


void output_tag_solutions(SingleSpectrumFile *ssf, Config *config, ostream& out_stream,
							 const vector<SeqPath>& solutions);

void perform_denovo_on_list_of_files(AdvancedScoreModel& model, 
									 const vector<string>& list_vector, 
									 int file_start_idx,
									 int num_solutions, 
									 int min_length, 
									 int max_length,
									 bool report_progress,
									 ostream& out_stream = cout);


void perform_tags_on_list_of_files(AdvancedScoreModel& model, 
									 const vector<string>& list_vector, 
									 int file_start_idx,
									 int num_solutions, 
									 int	tag_length,
									 bool report_progress,
									 ostream& out_stream = cout);





void create_tag_file_for_inspect(AdvancedScoreModel& model,
								 char *spectrum_file,
								 char *tag_string,
								 char *tag_suffix);

// makes a FASTA file with the sequences of full denovo sequences (completed
// from the SEQ in the annotated mgf file)
void make_denovo_training_fa(AdvancedScoreModel& model,
								 char *mgf);

void benchmark_tags(AdvancedScoreModel& model,
					char *spectrum_file,
					char *tag_string,
					int num_test_cases=-1);
								 

void perform_prm_on_list_of_files(Model& model, 
								  const vector<string>& list_vector,
								  float sqs_filter_prob,
								  int file_start_idx);

void perform_pmcsqs_on_list_of_files(Model& model, 
									 const vector<string>& list_vector,
									 int file_start_idx,
									 ostream& out_stream = cout);

void perform_sqs_on_list_of_files(Model& model, 
								  const vector<string>& list_vector,
								  int file_start_idx,
								  ostream& out_stream = cout);


void make_denovo_train_samples(char *file, int file_start_idx);




void benchmark_shew(Model& model, char *mgf_file);



#endif



⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -