⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 config.h

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 H
📖 第 1 页 / 共 2 页
字号:

	void set_regional_random_probability(int charge, int size_idx, int region_idx, float p)
	{
		regional_fragment_sets[charge][size_idx][region_idx].set_rand_prob(p);
	}

	float get_regional_random_probability(int charge, int size_idx, int region_idx)
	{
		return regional_fragment_sets[charge][size_idx][region_idx].get_rand_prob();
	}

	const FragmentType& get_fragment(int frag_idx) const
	{
		return all_fragments.get_fragment(frag_idx);
	}

	const vector < vector< vector< RegionalFragments> > >& get_regional_fragment_sets() const
	{ return regional_fragment_sets; }

	const RegionalFragments& get_regional_fragments(int charge, int size_idx, int region_idx) const
	{
		if (charge > max_charge_for_size)
			charge = max_charge_for_size;

		if (size_idx >= regional_fragment_sets[charge].size())
			size_idx = regional_fragment_sets[charge].size()-1;

		if (region_idx>= regional_fragment_sets[charge][size_idx].size())
			region_idx = regional_fragment_sets[charge][size_idx].size()-1;

		return regional_fragment_sets[charge][size_idx][region_idx];
	}

	RegionalFragments& get_non_const_regional_fragments(int charge, int size_idx, int region_idx)
	{
		if (charge > max_charge_for_size)
			charge = max_charge_for_size;

		if (size_idx >= regional_fragment_sets[charge].size())
			size_idx = regional_fragment_sets[charge].size()-1;

		if (region_idx>= regional_fragment_sets[charge][size_idx].size())
			region_idx = regional_fragment_sets[charge][size_idx].size()-1;

		return regional_fragment_sets[charge][size_idx][region_idx];
	}

	void clear_combos(int charge, int size_idx, int region_idx)
	{
		regional_fragment_sets[charge][size_idx][region_idx].frag_type_combos.clear();
	}

	void set_all_regional_fragment_relationships();
	void print_all_regional_fragment_relationships() const;


	// all strong fragments (that are strong in at least one regional fragment set)
	const vector<int>& get_all_strong_fragment_type_idxs() const
	{
		return all_strong_fragment_type_idxs;
	}

	// selects the fragments to be used, uses a cutoff that is X times random prob)
	void select_fragments_in_sets(score_t X=1.0, int max_num_frags=0);

	// For each regional fragments selects all fragments that have a minimal probability
	// to be strong.
	void select_strong_fragments(int charge,
						score_t min_prob = 0.5, int max_num_strong = 3, bool verbose = true);

	int get_strong_type1_idx() const { return strong_type1_idx; }
	int get_strong_type2_idx() const { return strong_type2_idx; }


	void sort_accoriding_to_fragment_probs(vector<score_t>& frag_probs, int charge, 
										   int size_idx, int region_idx)
	{
		regional_fragment_sets[charge][size_idx][region_idx].set_frag_probs(frag_probs);
		regional_fragment_sets[charge][size_idx][region_idx].sort_according_to_frag_probs();
	}

	void print_session_aas() const;
	void print_fragments(ostream &os) const;
	void print_regional_fragment_sets(ostream& os = cout) const;
	void read_fragments(istream& is);
	void read_regional_fragment_sets(istream& is);
	void clone_regional_fragment_sets(int source_charge, int target_charge);


	void print_all_fragments() const { all_fragments.print(); }


	void read_config(const char* file_name);

	void write_config();

	void print_config_parameters(ostream& os = cout) const;


	// parses a line that is assumed to be from a config file
	// all parameters are assumed to start with
	// #CONF <PARAMETER_NAME> VALUES
	void parse_config_parameter(char *buff);


	// checks if the given mass falls within the allowed regions for prefix masses
	bool is_allowed_prefix_mass(mass_t mass) const
	{
		return  (allowed_node_masses.is_covered(mass));
	}

	// checks if the given mass falls within the allowed regions for suffix masses
	bool is_allowed_suffix_mass(mass_t pm_with_19, mass_t mass) const
	{
		return  (allowed_node_masses.is_covered(pm_with_19 - mass - MASS_OHHH));
	}

	// checks if the given mass falls within the allowed regions for amino acid masses
	bool is_allowed_aa_combo_mass(mass_t mass) const
	{
		return  (allowed_node_masses.is_covered(mass));
	}


	// initializes the allowed_prefix_masses map
	// and the allowed suffix masses map
	void init_allowed_node_masses(mass_t max_mass = 400.0);

	// calclates the masses of the different aa_combos
	// combos are sorted aa lists.
	void calc_aa_combo_masses();
	
	const vector<AA_combo>& get_aa_edge_combos() const { return aa_edge_combos; }
	const vector<int>& get_combo_idxs_by_length(int length) const { return combo_idxs_by_length[length]; }
	const int *get_first_variant_ptr(int combo_idx) const;
	const int *get_variant_ptr(int var_idx) const { return &variant_vector[var_idx]; }

	// returns an index and number of combos that exist in the given mass range
	int get_ptrs_for_combos_in_mass_range(mass_t min_mass, mass_t max_mass, 
												   int& num_combos) const;
		
	// returns true if there is a combo that contains the ordered variant of the given aas
	bool combos_have_variant(const vector<int>& combos, int num_aa, int *var_aas) const;


	// This is the maximal mass of a combo of amino acids that can be in an edge
	// (equals the maximal combo mass + tolerance)
	mass_t get_max_combo_mass() const { return max_combo_mass; }



	mass_t get_min_exclude_range() const { return min_exclude_range; }
	mass_t get_max_exclude_range() const { return max_exclude_range; }
	
	void add_exclude_range(mass_t min_range, mass_t max_range);

	bool check_if_mass_is_in_exclude_range(mass_t m) const
	{
		if (min_ranges.size() == 0)
			return false;

		int i;
		for (i=0; i<min_ranges.size(); i++)
			if (m>=min_ranges[i] && m<=max_ranges[i])
				return true;
		return false;
	}
	
	// calculates the aa_variants vectors (for terms & A-V)
	void set_aa_variants();

	void print_aa_variants() const;

	void print_size_partitions() const;
	

private:
//	ConversionTables original_tables;  // with default values

	// These conversion tables represent the parameters after PTM modifications
	// All tables have the same aa indices however the actual masses might be
	// different (due to terminal additions for instance)
	ConversionTables session_tables; 

	vector<int> standard_aas; // the 20 unmodified amino acids
	vector<int> session_aas; //  all aas that can be part of a peptide (including terminal mods)


	vector< vector<int> > aa_variants; // holds for each of the amino acids A-V and the terminals
									   // all the variants that can be used for each amino acid (e.g. M-> M,M+16,M+32)
	// maps all labels to their aa_idx
	STRING2INT_MAP label2aa;


	// PTM vectors (hold info on all supported PTMs)
	bool       ind_read_PTM_file;
	
	PTM_list   all_fixed_PTMs;    // region PTM_ALL
	PTM_list   all_optional_PTMs; // region PTM_ALL
	PTM_list   all_terminal_PTMs; // must be either PTM_N_TERMINAL, PTM_C_TERMINAL



	// MASS SPEC TYPE
	int		   mass_spec_type; // type of machine, might influence the way things are done
							   // this parameter should be partof the model file, and not changed.

	int		    digest_type; // 0 - nothing , 1 Trypsin

	vector<int> n_term_digest_aas;
	vector<int> c_term_digest_aas;

	int        need_to_estimate_pm; // 0 - used file pm, 1 - use original pm

	int		   need_to_normalize; // 0 - don't normalize intensities, 1 - do normalize (sum of intensities = m_over_z)

	int		   itraq_mode; // 0 - no itraq, 1 - this is itraq data

	int		   use_spectrum_charge;

	int		   use_spectrum_mz;

	int		   filter_flag;

	score_t    terminal_score;
	score_t	   digest_score;
	score_t	   forbidden_pair_penalty;

	string     resource_dir;  // path to direcotry where resource files can be found
	string     fragments_file;
	string     regional_fragment_sets_file;
	string	   aa_combo_file;
	string	   model_name;   // the name of the model that uses this config
	string     config_file;

	mass_t max_n_term_mod; // for the current session_aas
	mass_t max_c_term_mod; // for the current session_aas
	mass_t min_n_term_mod;
	mass_t min_c_term_mod;


	// Tolerance variables
	mass_t tolerance;       // tolerance for consecutive peaks
	mass_t pm_tolerance; 

	mass_t local_window_size;
	int	 max_number_peaks_per_local_window;
	int  number_of_strong_peaks_per_local_window;

//	score_t random_prob; // the random prob of observing a peak in the region (bins/area);

	// Idxs for selected PTMs to be used
	vector<int>  selected_fixed_PTM_idxs;
	vector<int>  selected_optional_PTM_idxs;
	vector<int>  selected_terminal_PTM_idxs;

	// Vectors that hold a sorted list of amino acid combos and masses
	vector< AA_combo >    aa_edge_combos;       // all lengths combined
	vector< vector<int> > combo_idxs_by_length; // the idxs in the aa_edge_combos_vector

	vector< int > variant_vector; // holds the variants for the various combos
								  // each variant is listed with the number of amino acids
								  // followed by the amino acids themselves

	vector< int > combo_start_idxs; // has the index of the first combo with a given integer mass

	mass_t max_combo_mass;
	int  max_edge_length;

	// Thresholds for determining what model to use
	int max_charge_for_size;       // the size of size_thresholds
	vector< vector< mass_t > > size_thresholds; // charge, mass
	vector< mass_t >           region_thresholds;

	// The sets of fragments that are to be observed in the different models
	// these depend on the parent charge, the peptide size, and the region
	// in which the breakage is observed
	FragmentTypeSet all_fragments;    // all possible fragment types
	vector < vector< vector< RegionalFragments> > > regional_fragment_sets; // charge, size, region_idx
	vector<int> all_strong_fragment_type_idxs;
	int strong_type1_idx; // the two strongest (highest probability fragments)
	int strong_type2_idx;

	// mass exclude ranges (when doing clustering, for example ignore iTraq peaks)
	vector< mass_t > min_ranges;
	vector< mass_t > max_ranges;
	mass_t min_exclude_range;
	mass_t max_exclude_range;


	
	// These maps contain the ranges of permitted masses for amino acids combos,
	// prefix node masses, and suffix node masses, repectively.
	// Separate range maps are used because terminal PTMs can change the allowed ranges
	MassRangeMap allowed_node_masses;


	// these are used in the selection of edges for the PrmGraph
	vector< vector<bool> > allowed_double_edge, double_edge_with_same_mass_as_single;


	void init_standard_aas();

	void init_model_size_and_region_thresholds();

	void print_table_aas(const ConversionTables& table, 
							 const vector<int>& aas) const;


};
#endif


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -