⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 peakrankmodel.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 4 页
字号:
		{
			cut_mass+=aa2mass[tp.amino_acids[cut_idx-1]];
			if (intens[cut_idx]>=0)
			{
				RankBoostSample rbs;
				const FragmentType& frag = config->get_fragment(frag_type_idx);

				if (feature_set_type == 0)
				{
					fill_simple_peak_features(tp.amino_acids, cut_idx, 
						cut_mass, tp.pm_with_19, spec_charge, frag, rbs);
				}
				else if (feature_set_type == 1)
				{
					fill_advanced_peak_features(tp.amino_acids, cut_idx, 
						cut_mass, tp.pm_with_19, spec_charge, frag, rbs);
				}
				else if (feature_set_type == 2)
				{
					fill_partial_denovo_peak_features(tp.n_mass, c_term_mass, tp.amino_acids, cut_idx, 
						cut_mass, tp.pm_with_19, spec_charge, frag, tp.best_n_removed, tp.best_c_removed, rbs);
				}
				else
				{
					cout << "Error: feature set type not supported: " << feature_set_type << endl;
					exit(1);
				}

				rbs.group_idx = i;
				rbs.rank_in_group = cut_ranks[cut_idx];

				rbs.tag1 = rank_ds.get_num_samples(); // sample idx
				rbs.tag2 = cut_idx; // cut idx
				rbs.tag3 = tp.amino_acids.size(); // peptide length, this tag is used to filter
												  // the error estimates for a specific length

				rank_ds.add_sample(rbs);
				peak_intens.push_back(intens[cut_idx]);
			}	
		}

		ps.num_peaks = rank_ds.get_num_samples() - ps.peak_start_idx;
		peak_starts.push_back(ps);
		max_annotated_intens.push_back(max_ann_inten);
	}

	rank_ds.set_num_groups(sample_tps.size());
}



/***************************************************************************
Initializes the models according to specified max charge defaults (if not 
already initialized). Assumes all training files have the same prefix
and differ on the charge/size/mobility idxs).
****************************************************************************/
void PeakRankModel::train_all_partition_models(
								int frag_fill_type,
								char *prefix_path,
								int	  sel_charge,
								int   sel_size_idx,
								int	  sel_mobility,
								int	  frag_type_idx,
								char *report_dir,
								int   num_rounds,
								char *test_set,
								int	  test_peptide_length,
								char *stop_signal_file,
								weight_t max_weight_ratio)
{
	const int max_charge = size_thresholds.size() -1;

	this->feature_set_type = frag_fill_type;

	if (partition_models.size()<max_charge+1)
		partition_models.resize(max_charge+1);

	int charge; 
	for (charge=1; charge<=max_charge; charge++)
	{
		if (sel_charge>0 && sel_charge != charge)
			continue;

		const int num_size_idxs = size_thresholds[charge].size()+1;
		if (partition_models[charge].size()<num_size_idxs)
			partition_models[charge].resize(num_size_idxs);

		cout << "FRAG FILL TYPE: " << frag_fill_type << endl;
		cout << "CHARGE " << charge << "  , # size idxs: " << num_size_idxs << endl;
		cout << "Test peptide length: " << test_peptide_length << endl;
		int size_idx;
		for (size_idx=0; size_idx<num_size_idxs; size_idx++)
		{
			if (partition_models[charge][size_idx].size()<=NONMOBILE)
				partition_models[charge][size_idx].resize(NONMOBILE+1,NULL);

			int mobility;
			for (mobility=MOBILE; mobility<=NONMOBILE; mobility++)
			{
				if ((sel_charge>=0 && sel_charge != charge) ||
					(sel_size_idx>=0 && sel_size_idx != size_idx) ||
					(sel_mobility>=0 && sel_mobility != mobility) )
					continue;

				char tr_file[256];
				sprintf(tr_file,"%s_%d_%d_%d.txt",prefix_path,charge,size_idx,mobility);

				if (! partition_models[charge][size_idx][mobility])
					partition_models[charge][size_idx][mobility] = new PartitionModel;

				cout << "Training models for charge " << charge << " size " << size_idx<< " mobility " <<
					mobility << endl;
				cout << "Max weight ratio " << max_weight_ratio << endl;

				partition_models[charge][size_idx][mobility]->set_partition_name(get_peak_rank_model_name(),
					charge, size_idx, mobility);
			
				partition_models[charge][size_idx][mobility]->train_partition_model(this,
					tr_file,charge,size_idx,mobility,frag_type_idx,report_dir,
					num_rounds,test_set, test_peptide_length, stop_signal_file, 
					max_weight_ratio);

				cout << endl;
			}
		}
	}
}


/**********************************************************************
Lists the idxs of the partition models
charge size_idx mobility (not fragment)
***********************************************************************/
void PeakRankModel::list_all_model_idxs()
{
	int charge;
	for (charge=1; charge<size_thresholds.size(); charge++)
	{
		int size_idx;
		for (size_idx = 0; size_idx<=size_thresholds[charge].size(); size_idx++)
		{
			int mobility;
			for (mobility=MOBILE; mobility<=NONMOBILE; mobility++)
			{
				cout << charge << " " << size_idx << " " << mobility << endl;
			}
		}
	}
}


void PeakRankModel::print_model_init_stats() const
{
	if (this->feature_set_type<=2)
	{
		const int num_tabs = 2+(peak_rank_model_name.length()+1)/8;
		string gap_str ="";
		int i;
		for (i=0; i<num_tabs; i++)
			gap_str += "\t";

		cout << gap_str;
		
		for (i=0; i<10; i++)
			cout << " " << i;
		cout << endl;

		int charge;
		for (charge=1; charge<size_thresholds.size(); charge++)
		{
			int size_idx;
			for (size_idx = 0; size_idx<=size_thresholds[charge].size(); size_idx++)
			{
				int mobility;
				for (mobility=MOBILE; mobility<=NONMOBILE; mobility++)
				{
					if (this->partition_models[charge][size_idx][mobility])
						partition_models[charge][size_idx][mobility]->print_model_stats();
				}
			}
		}
		cout << endl;
	}
}


// set intens so maximal intensity of the fragments is 1.0
void find_ranks(const vector<intensity_t>& intens, vector<int>& ranks)
{
	vector<inten_pair> pairs;

	pairs.resize(intens.size());

	int num_with_inten=0;
	int i;
	for (i=0; i<intens.size(); i++)
	{
		pairs[i].idx=i;
		pairs[i].inten = intens[i];
		if (intens[i]>=0)
			num_with_inten++;
	}

	sort(pairs.begin(),pairs.end());

	const int missing_peak_rank = (intens.size()+num_with_inten)/2;

	ranks.resize(intens.size());
	for (i=0; i<pairs.size(); i++)
	{
		if (pairs[i].inten <0)
		{
			ranks[pairs[i].idx]=-1;
		}
		else if (pairs[i].inten == 0)
		{
			ranks[pairs[i].idx]=missing_peak_rank;
		}
		else
			ranks[pairs[i].idx]=i+1;
	}
}


void normalize_intens(vector<intensity_t>& intens)
{
	if (intens.size()<=1)
		return;

	float max_inten = intens[1];
	int i;
	for (i=2; i<intens.size(); i++)
		if (intens[i]>max_inten)
			max_inten = intens[i];

	if (max_inten<=0.0)
		return;

	float one_over = 1.0 / max_inten;

	for (i=1; i<intens.size(); i++)
		intens[i] *= one_over;
}


void PeakRankModel::set_mass_detection_defaults()
{
	max_detected_mass = 2000.0;

	charge_min_mass_coefficients.resize(4,0);
	charge_min_mass_coefficients[1]=0.24;
	charge_min_mass_coefficients[2]=0.12;
	charge_min_mass_coefficients[3]=0.08;
}

void PeakRankModel::set_size_thresholds()
{
	size_thresholds.resize(4);

	size_thresholds[1].push_back(1150.0);
	size_thresholds[1].push_back(1400.0);

	size_thresholds[2].push_back(1100.0);
	size_thresholds[2].push_back(1300.0);
	size_thresholds[2].push_back(1600.0);
	size_thresholds[2].push_back(1900.0);
	size_thresholds[2].push_back(2400.0);

	size_thresholds[3].push_back(1950.0);
	size_thresholds[3].push_back(2450.0);
	size_thresholds[3].push_back(3000.0);
}



mass_t PeakRankModel::calc_min_detected_mass(mass_t pm_with_19, int charge) const
{
	if (charge>= charge_min_mass_coefficients.size())
		charge = charge_min_mass_coefficients.size()-1;
	return (charge_min_mass_coefficients[charge]*pm_with_19);
}



bool PeakRankModel::read_peak_rank_model(Config *_config, const char *name, bool silent_ind,
				int specific_charge, int specific_size, int specific_mobility)
{
	config = _config;
	peak_rank_model_name = string(name);

	const string model_name_prefix = config->get_resource_dir() + "/" + string(name);
	const string main_model_file = model_name_prefix + "_rank_model.txt";


	ifstream main_stream(main_model_file.c_str());

	if (! main_stream.is_open() || ! main_stream.good())
	{
		cout << "Error: couldn't open file for reading: " << main_model_file << endl;
		exit(1);
	}

	string m_name;
	int num_aa_labels=0;

	feature_set_type = -1;

	main_stream >> m_name;
	main_stream >> feature_set_type;
	main_stream >> num_aa_labels;

	if (feature_set_type<0 || feature_set_type>5 || num_aa_labels<19)
	{
		cout << "Error: bad input parameters in PeakRankModel!" << endl;
		exit(1);
	}


	model_aa_labels.resize(num_aa_labels);
	int i;
	for (i=0; i<num_aa_labels; i++)
		main_stream >> model_aa_labels[i];

	convert_session_aas_to_model_aas();

	max_detected_mass=-1;
	main_stream >> this->max_detected_mass;

	int num_min_ratios=-1;
	main_stream >> num_min_ratios;
	this->charge_min_mass_coefficients.resize(num_min_ratios,1);

	for (i=0; i<num_min_ratios; i++)
		main_stream >> charge_min_mass_coefficients[i];

	int num_charges=0;
	main_stream >> num_charges;

	size_thresholds.resize(num_charges);
	partition_models.resize(num_charges);
	for (i=0; i<num_charges; i++)
	{
		int charge=-1;
		int num_sizes=0;

		main_stream >> charge;
		main_stream >> num_sizes;
		
		if (charge != i)
		{
			cout << "Error: charge mismatch is size thresholds!" << endl;
			exit(1);
		}

		size_thresholds[i].resize(num_sizes);
		int j;
		for (j=0; j<num_sizes; j++)
			main_stream >> size_thresholds[i][j];
		
		if (num_sizes>0)
		{
			partition_models[i].resize(num_sizes+1);
			for (j=0; j<partition_models[i].size(); j++)
				partition_models[i][j].resize(4);
		}
	}

	if (! silent_ind)
		cout << "Peak rank feature set type: " << feature_set_type << endl;


	// read parition models
	int num_models_read=0;
	int charge;
	for (charge=1; charge<partition_models.size(); charge++)
	{
		if (specific_charge>=0 && charge != specific_charge)
			continue;

		int size_idx;
		for (size_idx=0; size_idx<partition_models[charge].size(); size_idx++)
		{

			if (specific_size>=0 && size_idx != specific_size)
				continue;

			int mobility;
			for (mobility=MOBILE; mobility<=NONMOBILE; mobility++)
			{

				if (specific_mobility>=0 && mobility != specific_mobility)
					continue;

				partition_models[charge][size_idx][mobility] = new PartitionModel;
				partition_models[charge][size_idx][mobility]->set_feature_set_type(feature_set_type);
	
				num_models_read+=partition_models[charge][size_idx][mobility]->read_partition_model(model_name_prefix,
					config,charge,size_idx,mobility);
				partition_models[charge][size_idx][mobility]->set_partition_name(name,charge,size_idx,mobility);
			}
		}
	}

	if (! silent_ind)
		cout << "Read " << num_models_read << " fragment rank models..." << endl;
	return true;
}


void PeakRankModel::write_peak_rank_model(char *name, char *out_dir)
{
	const string model_name_prefix = (out_dir? string(out_dir) : config->get_resource_dir())
									  + "/" + string(name);
	const string main_model_file = model_name_prefix + "_rank_model.txt";
	

	ofstream main_stream(main_model_file.c_str());

	if (! main_stream.good())
	{
		cout << "Error: couldn't open file for writing: " << main_model_file << endl;
		exit(1);
	}

	// name
	main_stream << name << " " << this->feature_set_type << endl;

	// aa labels
	main_stream << model_aa_labels.size();
	int a;
	for (a=0; a<model_aa_labels.size(); a++)
		main_stream << " " << model_aa_labels[a];
	main_stream << endl;

	// min max detected masses
	int charge;
	main_stream << setprecision(5) << max_detected_mass << endl;
	main_stream << charge_min_mass_coefficients.size();
	for (charge=0; charge<charge_min_mass_coefficients.size(); charge++)
		main_stream << " " << charge_min_mass_coefficients[charge];
	main_stream << endl;

	// size thresholds
	main_stream << size_thresholds.size() << endl;
	for (charge=0; charge<size_thresholds.size(); charge++)
	{
		main_stream << charge << " " << size_thresholds[charge].size();
		int size_idx;
		for (size_idx=0; size_idx<size_thresholds[charge].size(); size_idx++)
			main_stream << " " << setprecision(5) << size_thresholds[charge][size_idx];
		main_stream << endl;
	}

	// write the parition models
	int models_written = 0;
	for (charge=0; charge<partition_models.size(); charge++)
	{
		int size_idx;
		for (size_idx=0; size_idx<partition_models[charge].size(); size_idx++)
		{
			int mobility;
			for (mobility=MOBILE; mobility<=NONMOBILE; mobility++)
			{
				if (partition_models[charge][size_idx][mobility])
				{
					int n=partition_models[charge][size_idx][mobility]->write_partition_model(model_name_prefix);
					models_written += n;
				}
			}
		}
	}

	cout << "Wrote rank model: " << name << endl;
	cout << "A total of " << models_written << " fragment models were written..." << endl;
}


void TrainingPeptide::create_training_peptide(const PeakRankModel& rm, 
											  const AnnotatedSpectrum& as)
{
	Config *config = as.get_config();
	amino_acids = as.get_peptide().get_amino_acids();
	length = amino_acids.size();
	charge = as.get_charge();
	mobility = get_proton_mobility(as.get_peptide(),charge);
	pm_with_19 = as.get_true_mass_with_19();

	const mass_t min_detected_mass = rm.calc_min_detected_mass(as.get_true_mass_with_19(),charge);
	const mass_t max_detected_mass = rm.get_max_detected_mass();
	const vector<Breakage>& breakages = as.get_breakages();

	const bool verbose=false;


	// find all participating fragment types
	frag_idxs.clear();
	int b;
	for (b=1; b<breakages.size()-1; b++)
	{
		int f;
		for (f=0; f<breakages[b].fragments.size(); f++)
		{
			const int frag_idx = breakages[b].fragments[f].frag_type_idx;
			if (breakages[b].fragments[f].intensity>0.01 &&
				this->get_frag_idx_pos(frag_idx)<0)
				frag_idxs.push_back(frag_idx);
		}
	}

	intens.resize(frag_idxs.size());

//	cout << "Frags: " << ranks.size() << endl;

	if (verbose)
		cout << as.get_peptide().as_string(config) << endl;
	
	int f;
	for (f=0; f<frag_idxs.size(); f++)
	{
		const int frag_idx = frag_idxs[f];
		const FragmentType& frag = config->get_fragment(frag_idx);
		vector<mass_t>& frag_intens = intens[f];

		frag_intens.resize(length,0);
		int num_inten_peaks=0;

		int i;
		for (i=0; i<length; i++)
		{
			const int pos = breakages[i].get_position_of_frag_idx(frag_idx);
			if (pos>=0)
			{
				frag_intens[i]=breakages[i].fragments[pos].intensity;
				num_inten_peaks++;
			}
		}


		frag_intens[0]=num_inten_peaks;

		for (i=1; i<frag_intens.size(); i++)
		{
			const mass_t exp_mass = frag.calc_expected_mass(breakages[i].mass,pm_with_19);
			if (exp_mass<min_detected_mass || exp_mass>max_detected_mass)
			{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -