⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 peakrankmodel.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 4 页
字号:
				frag_intens[i]=-999.0;
			}
	
		}

		if (verbose)
		{
			cout << frag.label << "\t";
			for (i=0; i<intens[f].size(); i++)
				cout << " " << fixed << setprecision(3) << intens[f][i];
			cout << endl;
		}
	}

	if (verbose)
		cout << endl;
}

void TrainingPeptide::get_ranks_for_frag_idx(int frag_idx, vector<int>& ranks) const
{
	const int pos = get_frag_idx_pos(frag_idx);
	if (pos<0)
	{
		cout << "Error: ranks not collect for frag " << frag_idx << endl;
		exit(1);
	}
	

	find_ranks(intens[pos],ranks);
}

// made up score, higher means more basic amino acids
float TrainingPeptide::get_basicity_score() const
{
	float basicity_score=0;
	int i;

	for (i=0; i<this->amino_acids.size(); i++)
	{
		if (amino_acids[i] == Arg)
			basicity_score+=1.0;
		if (amino_acids[i] == Lys)
			basicity_score+=0.55;
		if (amino_acids[i] == His)
			basicity_score+=0.3;
	}
	return basicity_score;
}


void TrainingPeptide::write_to_stream(ofstream& ofs) const
{
	if (frag_idxs.size() != intens.size())
	{
		cout << "Error: frag_idxs not same size ss intens!" << endl;
		exit(1);
	}

	ofs << this->charge << " " << this->mobility << " " << fixed << setprecision(3) << this->pm_with_19 << endl;
	ofs << this->amino_acids.size();
	int i;
	for (i=0; i<amino_acids.size(); i++)
		ofs << " " << amino_acids[i];
	ofs << endl;
	ofs << this->frag_idxs.size();
	for (i=0; i<frag_idxs.size(); i++)
		ofs << " " << frag_idxs[i];
	ofs << endl;
	for (i=0; i<intens.size(); i++)
	{
		ofs << intens[i].size();
		int j;
		for (j=0; j<intens[i].size(); j++)
			ofs << " " << intens[i][j];
			
		ofs << endl;
	}
}

bool TrainingPeptide::read_from_stream(ifstream& ifs)
{
	char buff[2048];
	ifs.getline(buff,512);

	istringstream is(buff);

	is >> charge >> mobility >> pm_with_19;
	if (charge<=0 || charge >100 || mobility<MOBILE || mobility>NONMOBILE ||
		pm_with_19<50 || pm_with_19>1000000	)
	{
		return false;
	}
	
	length=0;
	ifs.getline(buff,1024);
	is.clear();
	is.str(buff);
	is >> length;
	if (length<=0 || length>1000)
	{
		return false;
	}
	int i;
	this->amino_acids.resize(length,-1);
	for (i=0; i<length; i++)
	{
		is >> amino_acids[i];
		if (amino_acids[i]<0 || amino_acids[i]>1000)
		{
			return false;
		}
	}

	int num_frags=0;
	ifs.getline(buff,2048);
	is.clear();
	is.str(buff);
	is >> num_frags;
	if (num_frags<1 || num_frags>100)
	{
		return false;
	}
	
	this->frag_idxs.resize(num_frags,-1);
	for (i=0; i<num_frags; i++)
	{
		is >> frag_idxs[i];
		if (frag_idxs[i]<0 || frag_idxs[i]>1000)
		{
			return false;
		}
	}
	intens.resize(num_frags);
	
	for (i=0; i<num_frags; i++)
	{
		ifs.getline(buff,2048);
		istringstream is(buff);

		int size=0;
		is >> size;
		intens[i].resize(size,0);
		int j;
		for (j=0; j<size; j++)
			is >> intens[i][j];
	}
	return true;
}

void  TrainingPeptide::print(Config *config, ostream& ofs) const
{
	const vector<string>& aa2label = config->get_aa2label();
	const vector<mass_t>& aa2mass  = config->get_aa2mass();
	int i;
	mass_t m=n_mass;

	ofs << this->best_n_removed << " " << this->best_c_removed << " " << this->n_mass << " (" << pm_with_19 << ") ";
	for (i=0; i<amino_acids.size(); i++)
	{
		ofs << aa2label[amino_acids[i]];
		m+=aa2mass[amino_acids[i]];
	}
	ofs << " " << m << endl;
}


void read_data_into_training_peptides(const FileManager& fm, 
									  Config *config, 
									  PeakRankModel& rm,
									  vector<TrainingPeptide>& tps)
{
	FileSet fs;
	fs.select_all_files(fm);

	while (1)
	{
		SingleSpectrumFile *ssf;
		AnnotatedSpectrum as;
		if (! fs.get_next_spectrum(fm,config,&as,&ssf))
			break;

		as.annotate_spectrum(as.get_true_mass_with_19());

		TrainingPeptide tp;
		tp.create_training_peptide(rm,as);
		if (tp.frag_idxs.size()==0)
			continue;
		tps.push_back(tp);

	}

}


void convert_list_to_trianing_peptide_file(char *list, char *tp_file,
										   char *model_name, char *ptm_line)
{
	PeakRankModel rm;
	RegularRankModel model;
	FileManager fm;
	vector<TrainingPeptide> all_tps;

	if (! model_name)
	{
		model.read_model("LTQ_LOW_TRYP");
	}
	else
		model.read_model(model_name);

	Config *config= model.get_config();

	if (! ptm_line)
	{
		config->apply_selected_PTMs("C+57:M+16:Q-17");
	}
	else
		config->apply_selected_PTMs(ptm_line);


	rm.set_mass_detection_defaults();
	fm.init_from_list_file(config,list);

	read_data_into_training_peptides(fm,config,rm,all_tps);
	write_training_peptides_to_file(tp_file,all_tps);
}


void read_training_peptides_from_file(char *file, vector<TrainingPeptide>& all_tps,
									  int num_tp)
{

	ifstream ifs(file);
	char buff[64];

	if (! ifs.is_open())
	{
		cout << "Error: couldn't open: " << file << endl;
		exit(1);
	}

	int total_num_tp;
	ifs.getline(buff,64);
	sscanf(buff,"%d",&total_num_tp);

	if (num_tp<0 || num_tp>total_num_tp)
		num_tp = total_num_tp;

	
	const int start_idx=all_tps.size();
	all_tps.resize(start_idx+num_tp);

	int num_errors=0;
	int i;
	for (i=0; i<num_tp; i++)
		if (! all_tps[start_idx+i].read_from_stream(ifs))
			num_errors++;
	
	if (num_errors>0)
	{
		cout << "Warning: encountered " << num_errors << " errors reading " << num_tp << " training peptides..." << endl;
	}
	ifs.close();
}


void write_training_peptides_to_file(char *file, const vector<TrainingPeptide>& all_tps)
{
	ofstream ofs(file);
	ofs << all_tps.size() << endl;
	int i;
	for (i=0; i<all_tps.size(); i++)
	{
		all_tps[i].write_to_stream(ofs);
	}
	ofs.close();
}



void select_training_peptides(const vector<TrainingPeptide>& all_tps, 
							  vector<int>& selected_idxs,
							  int charge, 
							  int mobility, 
							  int min_length, 
							  int max_length, 
							  mass_t min_pm_with_19, 
							  mass_t max_pm_with_19)
{
	selected_idxs.clear();

	int i;
	for (i=0; i<all_tps.size(); i++)
	{
		const TrainingPeptide& tp = all_tps[i];
		if (charge>0 && charge != tp.charge)
			continue;
		
		if (mobility>0 && mobility != tp.mobility)
			continue;

		if (tp.length<min_length || tp.length>max_length ||
			tp.pm_with_19<min_pm_with_19 || tp.pm_with_19>max_pm_with_19)
			continue;

		selected_idxs.push_back(i);
	}
}




// for tps of a given charge
void size_mobility_stats(const vector<TrainingPeptide>& all_tps)
{
	vector< vector<int> > counts; // size / mobiliy (0 = all);
	int i;
	
	counts.resize(100);
	for (i=0; i<counts.size(); i++)
		counts[i].resize(4,0);

	for (i=0; i<all_tps.size(); i++)
	{
		int size_idx = (int)(all_tps[i].pm_with_19 * 0.01);

		if (all_tps[i].mobility<MOBILE || all_tps[i].mobility>NONMOBILE)
		{
			cout << "Error: bad mobility value!" << endl;
			exit(1);
		}

		if (size_idx<3 || size_idx>97)
		{
			cout << "Error: bad size_idx!" << endl;
			exit(1);
		}

		counts[size_idx][0]++;
		counts[size_idx][all_tps[i].mobility]++;
	}

	for (i=0; i<99; i++)
	{
		if (counts[i][0]==0)
			continue;

		cout << i*100;
		int j;
		for (j=0; j<4; j++)
		{
			cout << "\t" << counts[i][j];
			counts[99][j]+=counts[i][j];
		}
		cout << endl;	
	}
	
	cout << "---------------------------------------" << endl;
	
	for (i=0; i<4; i++)
		cout << "\t" << counts[99][i];
	cout << endl << endl;
}

// for tps of a given charge
void aa_composition_stats(const vector<TrainingPeptide>& all_tps, Config *config)
{
	const int num_aas = config->get_max_session_aa_idx()+1;

	vector< vector<int> > counts; // size / mobiliy (0 = all);
	
	int i;
	
	counts.resize(num_aas+1);
	for (i=0; i<=num_aas; i++)
		counts[i].resize(4,0);

	for (i=0; i<all_tps.size(); i++)
	{
		int size_idx = (int)(all_tps[i].pm_with_19 * 0.01);

		if (all_tps[i].mobility<MOBILE || all_tps[i].mobility>NONMOBILE)
		{
			cout << "Error: bad mobility value!" << endl;
			exit(1);
		}

		if (size_idx<3 || size_idx>97)
		{
			cout << "Error: bad size_idx!" << endl;
			exit(1);
		}

		int j;
		for (j=0; j<all_tps[i].amino_acids.size(); j++)
		{
			counts[all_tps[i].amino_acids[j]][0]++;
			counts[all_tps[i].amino_acids[j]][all_tps[i].mobility]++;
		}
	}

	for (i=0; i<num_aas; i++)
	{
		int j;
		for (j=0; j<4; j++)
			counts[num_aas][j]+=counts[i][j];
	}

	for (i=0; i<num_aas; i++)
	{
		if (counts[i][0]==0)
			continue;

		cout << config->get_aa2label()[i];
		int j;
		for (j=0; j<4; j++)
		{
			cout << "\t" << counts[i][j] << "\t" << fixed << setprecision(3) << 
				counts[i][j]/(double)counts[num_aas][j];
		}
		cout << endl;	
	}
	
	cout << "--------------------------------------------------------" << endl;
	
	for (i=0; i<4; i++)
		cout << "\t" << counts[num_aas][i] << "\t";
	cout << endl << endl;
}


void fragment_detection_stats(const vector<TrainingPeptide>& all_tps, Config *config)
{
	const int num_frags = config->get_all_fragments().size();

	vector< vector<int> > counts; // size / mobiliy (0 = all);
	vector< vector<int> > totals;
	
	int i;
	
	counts.resize(num_frags);
	for (i=0; i<=num_frags; i++)
		counts[i].resize(4,0);

	totals = counts;

	for (i=0; i<all_tps.size(); i++)
	{
		int size_idx = (int)(all_tps[i].pm_with_19 * 0.01);

		if (all_tps[i].mobility<MOBILE || all_tps[i].mobility>NONMOBILE)
		{
			cout << "Error: bad mobility value!" << endl;
			exit(1);
		}

		if (size_idx<3 || size_idx>97)
		{
			cout << "Error: bad size_idx!" << endl;
			exit(1);
		}

		const TrainingPeptide& tp = all_tps[i];
		int f;
		for (f=0; f<tp.frag_idxs.size(); f++)
		{
			const int frag_idx = tp.frag_idxs[f];
			int a;
			for (a=0; a<tp.intens[f].size(); a++)
			{
				if (tp.intens[f][a]>=0)
				{
					totals[frag_idx][0]++;
					totals[frag_idx][tp.mobility]++;
				}

				if (tp.intens[f][a]>0)
				{
					counts[frag_idx][0]++;
					counts[frag_idx][tp.mobility]++;
				}
			}
		}
	}

	for (i=0; i<num_frags; i++)
	{
	
		if (totals[i][0]==0)
			continue;

		cout << config->get_all_fragments()[i].label;
		int j;
		for (j=0; j<4; j++)
		{
			cout << "\t" << totals[i][j] << "\t" << fixed << setprecision(3) << 
				counts[i][j]/(double)totals[i][j];
		}
		cout << endl;	
	}
	
	cout << "--------------------------------------------------------" << endl;
}



void generate_size_reports()
{
	PeakRankModel rm;
	RegularRankModel model;
	vector<FileManager> fms;

	model.read_model("LTQ_LOW_TRYP"); 
	Config *config= model.get_config();

	config->apply_selected_PTMs("C+57:M+16:Q-17");

	rm.set_mass_detection_defaults();

	int charge;
	for (charge=1; charge<=3; charge++)
	{

		vector<TrainingPeptide> tps;

		tps.clear();

		char shew_file[256],hek_file[256];

		sprintf(shew_file,"C:\\Work\\msms5\\NewScore\\tps\\Shew_98_%d_unique_tps.txt",charge);
		sprintf(hek_file,"C:\\Work\\msms5\\NewScore\\tps\\HEK_98_%d_unique_tps.txt",charge);

		read_training_peptides_from_file(shew_file,tps);
		read_training_peptides_from_file(hek_file,tps);

		cout << "Cahrge " << charge << "  Read " << tps.size() << " peptides..." << endl;
		size_mobility_stats(tps);
		cout << endl;
	}

}




⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -