⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rescoredb.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 2 页
字号:
	}
}


/***************************************************************************************
This function touches up inspect search results by rescoring the sequences returned by
inspect. The function produces a new inspect results file with the scores (and delta scores)
replaced.
****************************************************************************************/
void DeNovoRankScorer::rescore_inspect_results(char *spectra_file, 
											   char *inspect_res, 
											   char *new_res_file) const
{
	Config *config = model->get_config();

	ifstream org_res(inspect_res);

	if (!  org_res.is_open() || ! org_res.good())
	{
		cout << "Error: couldn't open original inspect results file for reading:" << inspect_res << endl;
		exit(1);
	}

	ofstream new_res(new_res_file);
	if (! new_res.is_open() || ! new_res.good())
	{
			cout << "Error: couldn't open new inspect results file for writing:" << new_res << endl;
		exit(1);
	}

	char line_buff[1024];
	org_res.getline(line_buff,1024);

	bool read_line  = true;
	vector<string> field_names;
	if (line_buff[0] != '#')
	{
		read_line = false;
	}
	else
	{
		string header = string(line_buff);
		split_string(header,field_names);

		int i;
		for (i=0; i<field_names.size(); i++)
			cout << i << "\t" << field_names[i] << endl;
	}


	vector<ScanCandidateSet> cand_sets;
	vector<int> scan_mapping;
	cand_sets.clear();
	scan_mapping.resize(100000,-1);
	
	while (! org_res.eof())
	{
		vector<string> fields;

		if (read_line)
		{
			org_res.getline(line_buff,1024);
			if (org_res.gcount() < 5)
				continue;
		}
		else
		{
			read_line = true;
		}

		split_string(line_buff,fields);
		InspectResultsLine res;

		res.parse_from_fields(config,fields);

		if (cand_sets.size()==0 || ! cand_sets[cand_sets.size()-1].add_new_line(res))
		{
			ScanCandidateSet new_set;
			new_set.add_new_line(res);
			
			if (new_set.scan>=scan_mapping.size())
				scan_mapping.resize(2*scan_mapping.size(),-1);

			scan_mapping[new_set.scan]=cand_sets.size();
			cand_sets.push_back(new_set);
		}
	}
	org_res.close();

	cout << "Read results for " << cand_sets.size() << " scans..." << endl;

	FileManager fm;
	FileSet     fs;
	fm.init_from_file(config,spectra_file);
	fs.select_all_files(fm);
	const vector<SingleSpectrumFile *>& all_ssfs = fs.get_ssf_pointers();

	cout << "Read " <<  all_ssfs.size() << " spectra headers..." << endl;

	BasicSpecReader bsr;
	QCPeak *peaks = new QCPeak[5000];

	vector<bool> spectrum_indicators;
	spectrum_indicators.resize(cand_sets.size(),false);

	int num_found =0;
	int i;
	for (i=0; i<all_ssfs.size(); i++)
	{
		SingleSpectrumFile *ssf = all_ssfs[i];
		
		const int scan_number = ssf->get_scan();
		if (scan_mapping[scan_number]<0)
			continue;

		const int num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks);

		spectrum_indicators[scan_mapping[scan_number]]=true;
		num_found++;

		ScanCandidateSet& cand_set = cand_sets[scan_mapping[scan_number]];
		
		vector<PeptideSolution> peptide_sols;
		peptide_sols.resize(cand_set.results.size());

		int j;
		for (j=0; j<cand_set.results.size(); j++)
		{
			InspectResultsLine& inspect_res = cand_set.results[j];
			PeptideSolution& sol = peptide_sols[j];

			sol.pep = inspect_res.pep;
			sol.pm_with_19 = sol.pep.get_mass_with_19();
			sol.charge = inspect_res.Charge;
			sol.reaches_n_terminal = true;
			sol.reaches_c_terminal = true;
		}

		vector<score_pair> scores;
		score_complete_sequences(peptide_sols,ssf,peaks,num_peaks,scores);

		for (j=0; j<scores.size(); j++)
			cand_set.results[j].Score = scores[j].score;

		cand_set.recalbirate_scores(config);

		vector<string> pep_strings;
		pep_strings.resize(scores.size());
		int max_len =0;
		for (j=0; j<cand_set.results.size(); j++)
		{
			pep_strings[j]=cand_set.results[j].pep.as_string(config);
			if (pep_strings[j].length()>max_len)
				max_len = pep_strings[j].length();
		}

		if (1)
		{
			cand_set.output_to_stream(new_res,10);
		}
		else
		{
			for (j=0; j<cand_set.results.size(); j++)
			{
				cout << cand_set.scan << " " << cand_set.results[j].Charge << "\t";

				cout << cand_set.results[j].Protein.substr(0,3) << " " << pep_strings[j];
				if (pep_strings[j].length()<max_len)
				{
					int k;
					for (k=pep_strings[j].length(); k<max_len; k++)
						cout << " ";
				}
				cout << "\t" << cand_set.results[j].MQScore << "\t" << cand_set.results[j].Score << "\t" <<
				cand_set.results[j].DeltaScore << "\t" << cand_set.results[j].DeltaScoreOther << endl;
			}
			cout << endl;
		}
	}

	if (num_found<cand_sets.size())
	{
		cout << "Warning: found only " << num_found << "/" << cand_sets.size() << " of the scans scored by InsPecT!" << endl;
	}
	else
	{
		cout << "All scored scans found in spectrum file." << endl;
	}


	delete [] peaks;
}



/***************************************************************************************
This function touches up inspect search results by rescoring the sequences returned by
inspect. The function produces a new inspect results file with the scores (and delta scores)
replaced.
****************************************************************************************/
void DeNovoRankScorer::recalibrate_inspect_delta_scores(char *spectra_file, 
											   char *inspect_res, 
											   char *new_res_file) const
{
	Config *config = model->get_config();

	ifstream org_res(inspect_res);

	if (!  org_res.is_open() || ! org_res.good())
	{
		cout << "Error: couldn't open original inspect results file for reading:" << inspect_res << endl;
		exit(1);
	}

	ofstream new_res(new_res_file);
	if (! new_res.is_open() || ! new_res.good())
	{
			cout << "Error: couldn't open new inspect results file for writing:" << new_res << endl;
		exit(1);
	}

	char line_buff[1024];
	org_res.getline(line_buff,1024);

	bool read_line  = true;
	vector<string> field_names;
	if (line_buff[0] != '#')
	{
		read_line = false;
	}
	else
	{
		string header = string(line_buff);
		split_string(header,field_names);

		int i;
		for (i=0; i<field_names.size(); i++)
			cout << i << "\t" << field_names[i] << endl;
	}


	vector<ScanCandidateSet> cand_sets;
	vector<int> scan_mapping;
	cand_sets.clear();
	scan_mapping.resize(100000,-1);
	
	while (! org_res.eof())
	{
		vector<string> fields;

		if (read_line)
		{
			org_res.getline(line_buff,1024);
			if (org_res.gcount() < 5)
				continue;
		}
		else
		{
			read_line = true;
		}

		split_string(line_buff,fields);
		InspectResultsLine res;

		res.parse_from_fields(config,fields);

		if (cand_sets.size()==0 || ! cand_sets[cand_sets.size()-1].add_new_line(res))
		{
			ScanCandidateSet new_set;
			new_set.add_new_line(res);
			
			if (new_set.scan>=scan_mapping.size())
				scan_mapping.resize(2*scan_mapping.size(),-1);

			scan_mapping[new_set.scan]=cand_sets.size();
			cand_sets.push_back(new_set);
		}
	}
	org_res.close();

	cout << "Read results for " << cand_sets.size() << " scans..." << endl;

	FileManager fm;
	FileSet     fs;
	fm.init_from_file(config,spectra_file);
	fs.select_all_files(fm);
	const vector<SingleSpectrumFile *>& all_ssfs = fs.get_ssf_pointers();

	cout << "Read " <<  all_ssfs.size() << " spectra headers..." << endl;

	BasicSpecReader bsr;
	QCPeak *peaks = new QCPeak[5000];

	vector<bool> spectrum_indicators;
	spectrum_indicators.resize(cand_sets.size(),false);

	int num_found =0;
	int i;
	for (i=0; i<all_ssfs.size(); i++)
	{
		SingleSpectrumFile *ssf = all_ssfs[i];
		
		const int scan_number = ssf->get_scan();
		if (scan_mapping[scan_number]<0)
			continue;

		const int num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks);

		spectrum_indicators[scan_mapping[scan_number]]=true;
		num_found++;

		ScanCandidateSet& cand_set = cand_sets[scan_mapping[scan_number]];
		
		cand_set.recalbirate_scores(config);

		cand_set.output_to_stream(new_res,10);
	}

	if (num_found<cand_sets.size())
	{
		cout << "Warning: found only " << num_found << "/" << cand_sets.size() << " of the scans scored by InsPecT!" << endl;
	}
	else
	{
		cout << "All scored scans found in spectrum file." << endl;
	}


	delete [] peaks;
}



⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -