⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rescoredb.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "DeNovoRankScore.h"
#include "auxfun.h"


struct InspectResultsLine {
	bool parse_from_fields(Config *config,
						   const vector<string>& fields);

	string	SpectrumFile;
	int		scan;
	string	Annotation;
	string	Protein;
	int		Charge;
	float	MQScore;
	float	Score;
	int		Length;
	float	TotalPRMScore;
	float	MedianPRMScore;
	float	FractionY;
	float	FractionB;
	float	Intensity;
	int		NTT;
	float	p_value;
	float	F_Score;
	float	DeltaScore;
	float	DeltaScoreOther;
	int		RecordNumber;
	int		DBFilePos;
	int		SpecFilePos;

	int		orgRank;
	int		aaBefore;
	int		aaAfter;
	Peptide	pep;
};

struct ScanCandidateSet {

	bool add_new_line(const InspectResultsLine& res); // adds only if has the same file_idx, scan as others

	void recalbirate_scores(Config *config); // resorts and fills in the delta score fields 
											// according tho the value of the Score field

	void output_to_stream(ostream& os, int num_lines=-1	) const;

	int scan;
	vector<InspectResultsLine> results;
};



bool ScanCandidateSet::add_new_line(const InspectResultsLine& res)
{
	if (results.size() == 0)
	{
		scan = res.scan;
		results.push_back(res);
		results[0].orgRank = 0;
		return true;
	}

	if ( scan != res.scan)
		return false;

	results.push_back(res);
	results[results.size()-1].orgRank = results.size()-1;
	return true;
}


/***********************************************************************
Returns true if one of the mass lists is contained in the other (up to 
the tolerance level);
************************************************************************/
bool compare_cut_lists(const mass_t tolerance,
					   const vector<mass_t>& a_masses,
					   const vector<mass_t>& b_masses)
{
	const int num_a_cuts = a_masses.size();
	const int num_b_cuts = b_masses.size();
	int num_same=0;
	int a=0, b=0;
	
	const int min_needed = (int)(0.8 * (num_a_cuts<num_b_cuts ? num_a_cuts : num_b_cuts));

	while (a<num_a_cuts && b<num_b_cuts)
	{
		const mass_t a_mass = a_masses[a];
		const mass_t b_mass = b_masses[b];
		if (fabs(a_mass-b_mass)<tolerance)
		{
			num_same++;
			a++;
			b++;
			continue;
		}
		if (a_mass<b_mass)
		{
			a++;
			continue;
		}
		b++;
	}
	if (num_same >= min_needed)
		return true;

	// check shifted
	const mass_t shift = a_masses[num_a_cuts-1] - b_masses[num_b_cuts-1];
	a=0;
	b=0;
	num_same=0;
	while (a<num_a_cuts && b<num_b_cuts)
	{
		const mass_t a_mass = a_masses[a];
		const mass_t b_mass = b_masses[b]+shift;
		if (fabs(a_mass-b_mass)<tolerance)
		{
			num_same++;
			a++;
			b++;
			continue;
		}
		if (a_mass<b_mass)
		{
			a++;
			continue;
		}
		b++;
	}
	if (num_same >= min_needed)
		return true;

	
	return false;
}


/************************************************************************
Reorders the set's solutions according to the scores.
Updates the Delta score and Delta score other according to the new score.
*************************************************************************/
void ScanCandidateSet::recalbirate_scores(Config *config)
{
	vector<score_pair> pairs; 
	
	pairs.resize(results.size());
	int i;
	for (i=0; i<pairs.size(); i++)
	{
		pairs[i].idx=i;
		pairs[i].score = results[i].Score;
	}

	sort(pairs.begin(),pairs.end());

	vector<InspectResultsLine> sorted_results;
	sorted_results.resize(results.size());
	for (i=0; i<pairs.size(); i++)
		sorted_results[i]=results[pairs[i].idx];

	results=sorted_results;
	const int num_results = results.size();

	if (num_results==1)
	{
		results[0].DeltaScore=0;
		results[0].DeltaScoreOther=0;
		return;
	}

	vector< vector<mass_t> > exp_cut_masses;
	exp_cut_masses.resize(num_results);
	for (i=0; i<num_results; i++)
		results[i].pep.calc_expected_breakage_masses(config,exp_cut_masses[i]);
	
	results[0].DeltaScore = results[0].Score - results[1].Score;
	results[0].DeltaScoreOther = results[0].DeltaScore;
	for (i=1; i<num_results; i++)
	{
		results[i].DeltaScore = results[i].Score - results[0].Score;
		results[i].DeltaScoreOther = results[i].DeltaScore; // default value
	}

	const mass_t tolerance = config->get_tolerance();
	vector<bool> similar_cuts;
	similar_cuts.resize(num_results,true);
	for (i=1; i<num_results; i++)
		similar_cuts[i]=compare_cut_lists(tolerance,exp_cut_masses[0],exp_cut_masses[i]);
	
	// check if we need to correct the delta other
	if (similar_cuts[1] && num_results>2)
	{
		int j;
		for (j=2; j<num_results-1; j++)
			if (! similar_cuts[j])
				break;
		results[0].DeltaScoreOther = results[0].Score - results[j].Score;
	}
	
	// don't change the delta other of the lower ranks even if they are similar to the top scoring one
	// only the top score is what should be considerd

/*	for (i=1; i<num_results; i++)
	{
		if (similar_cuts[i])
		{
			int j;
			for (j=1; j<num_results-1; j++)
			{
				if (j==i)
					continue;
				if (! compare_cut_lists(tolerance,exp_cut_masses[i],exp_cut_masses[j]))
					break;
			}
			results[i].DeltaScoreOther = results[i].Score - results[j].Score;
		}
	}*/
}



bool InspectResultsLine::parse_from_fields(Config *config,
										   const vector<string>& fields)
{
	if (fields.size() != 20)
	{
		cout<< "Error: inspect results line has " << fields.size() << ", expecting 20" << endl;
		exit(1);
	}

	SpectrumFile = fields[0];

	if (sscanf(fields[1].c_str(),"%d",&scan) != 1 ||
		scan<0 || scan>100000000)
		error("scan");

	Annotation = fields[2];
	Protein	   = fields[3];

	if (sscanf(fields[4].c_str(),"%d",&Charge) != 1 ||
		Charge<0 || Charge>20)
		error("Charge");

	if (sscanf(fields[5].c_str(),"%f",&MQScore) != 1 ||
		Score<NEG_INF || Score>POS_INF)
		error("MQScore");
	
	if (sscanf(fields[6].c_str(),"%d",&Length) != 1 ||
		Length<1 || Length>POS_INF)
		error("Length");
	
	if (sscanf(fields[7].c_str(),"%f",&TotalPRMScore) != 1 ||
		TotalPRMScore<NEG_INF || TotalPRMScore>POS_INF)
		error("TotalPRMScore");

	if (sscanf(fields[8].c_str(),"%f",&MedianPRMScore) != 1 ||
		MedianPRMScore<NEG_INF || MedianPRMScore>POS_INF)
		error("MedianPRMScore");

	if (sscanf(fields[9].c_str(),"%f",&FractionY) != 1 ||
		FractionY<0 || FractionY>1000)
		error("FractionY");

	if (sscanf(fields[10].c_str(),"%f",&FractionB) != 1 ||
		FractionB<0 || FractionB>1000)
		error("FractionB");

	if (sscanf(fields[11].c_str(),"%f",&Intensity) != 1 ||
		Intensity<0)
		error("Intensity");

	if (sscanf(fields[12].c_str(),"%d",&NTT) != 1 ||
		NTT<0 || NTT>3)
		error("NTT");

	if (sscanf(fields[13].c_str(),"%f",&p_value) != 1)
		error("p_value");

	if (sscanf(fields[14].c_str(),"%f",&F_Score) != 1)
		error("F_Score");

	if (sscanf(fields[15].c_str(),"%f",&DeltaScore) != 1)
		error("DeltaScore");

	if (sscanf(fields[16].c_str(),"%f",&DeltaScoreOther) != 1)
		error("DeltaScoreOther");

	if (sscanf(fields[17].c_str(),"%d",&RecordNumber) != 1)
		error("RecordNumber");

	if (sscanf(fields[18].c_str(),"%d",&DBFilePos) != 1)
		error("DBFilePos");

	if (sscanf(fields[19].c_str(),"%d",&SpecFilePos) != 1)
		error("SpecFilePos");

	Score = MQScore;

	const vector<int>& char2aa = config->get_char2aa();
	const int ann_length = Annotation.length();

	if ((Annotation[1] != '.') || (Annotation[ann_length-2] != '.'))
	{
		cout << "Error: bad annotation format: " << Annotation << endl;
		cout << "Expecting X.XXXXXXXXX.X" << endl;
		cout << "Ann1   : " << Annotation[1] << endl;
		cout << "Ann n-2: " << Annotation[ann_length-2] << endl;
		exit(1);
	}

//	cout << "|" << Annotation << "|" << endl;
	aaBefore = char2aa[Annotation[0]];
	aaAfter	 = char2aa[Annotation[ann_length-1]];

	pep.parse_from_string(config,Annotation.substr(2,ann_length-4));
	
	return true;
}

void ScanCandidateSet::output_to_stream(ostream& os, int num_lines) const
{
	int i;
	for (i=0; i<this->results.size(); i++)
	{
		if (i==num_lines)
			break;

		os << results[i].SpectrumFile << "\t" << results[i].scan << "\t" << results[i].Annotation << "\t" << results[i].Protein << "\t";
		os << results[i].Charge << "\t" << results[i].Score << "\t" << results[i].Length << "\t";
		os << results[i].TotalPRMScore <<"\t" << results[i].MedianPRMScore << "\t" << results[i].FractionY << "\t";
		os << results[i].FractionB << "\t" << results[i].Intensity << "\t" << results[i].NTT << "\t";
		os << results[i].p_value << "\t" << results[i].F_Score << "\t" << results[i].DeltaScore << "\t";
		os << results[i].DeltaScoreOther << "\t" << results[i].RecordNumber << "\t" << results[i].DBFilePos << "\t";
		os << results[i].SpecFilePos << endl;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -