📄 rescoredb.cpp
字号:
#include "DeNovoRankScore.h"
#include "auxfun.h"
struct InspectResultsLine {
bool parse_from_fields(Config *config,
const vector<string>& fields);
string SpectrumFile;
int scan;
string Annotation;
string Protein;
int Charge;
float MQScore;
float Score;
int Length;
float TotalPRMScore;
float MedianPRMScore;
float FractionY;
float FractionB;
float Intensity;
int NTT;
float p_value;
float F_Score;
float DeltaScore;
float DeltaScoreOther;
int RecordNumber;
int DBFilePos;
int SpecFilePos;
int orgRank;
int aaBefore;
int aaAfter;
Peptide pep;
};
struct ScanCandidateSet {
bool add_new_line(const InspectResultsLine& res); // adds only if has the same file_idx, scan as others
void recalbirate_scores(Config *config); // resorts and fills in the delta score fields
// according tho the value of the Score field
void output_to_stream(ostream& os, int num_lines=-1 ) const;
int scan;
vector<InspectResultsLine> results;
};
bool ScanCandidateSet::add_new_line(const InspectResultsLine& res)
{
if (results.size() == 0)
{
scan = res.scan;
results.push_back(res);
results[0].orgRank = 0;
return true;
}
if ( scan != res.scan)
return false;
results.push_back(res);
results[results.size()-1].orgRank = results.size()-1;
return true;
}
/***********************************************************************
Returns true if one of the mass lists is contained in the other (up to
the tolerance level);
************************************************************************/
bool compare_cut_lists(const mass_t tolerance,
const vector<mass_t>& a_masses,
const vector<mass_t>& b_masses)
{
const int num_a_cuts = a_masses.size();
const int num_b_cuts = b_masses.size();
int num_same=0;
int a=0, b=0;
const int min_needed = (int)(0.8 * (num_a_cuts<num_b_cuts ? num_a_cuts : num_b_cuts));
while (a<num_a_cuts && b<num_b_cuts)
{
const mass_t a_mass = a_masses[a];
const mass_t b_mass = b_masses[b];
if (fabs(a_mass-b_mass)<tolerance)
{
num_same++;
a++;
b++;
continue;
}
if (a_mass<b_mass)
{
a++;
continue;
}
b++;
}
if (num_same >= min_needed)
return true;
// check shifted
const mass_t shift = a_masses[num_a_cuts-1] - b_masses[num_b_cuts-1];
a=0;
b=0;
num_same=0;
while (a<num_a_cuts && b<num_b_cuts)
{
const mass_t a_mass = a_masses[a];
const mass_t b_mass = b_masses[b]+shift;
if (fabs(a_mass-b_mass)<tolerance)
{
num_same++;
a++;
b++;
continue;
}
if (a_mass<b_mass)
{
a++;
continue;
}
b++;
}
if (num_same >= min_needed)
return true;
return false;
}
/************************************************************************
Reorders the set's solutions according to the scores.
Updates the Delta score and Delta score other according to the new score.
*************************************************************************/
void ScanCandidateSet::recalbirate_scores(Config *config)
{
vector<score_pair> pairs;
pairs.resize(results.size());
int i;
for (i=0; i<pairs.size(); i++)
{
pairs[i].idx=i;
pairs[i].score = results[i].Score;
}
sort(pairs.begin(),pairs.end());
vector<InspectResultsLine> sorted_results;
sorted_results.resize(results.size());
for (i=0; i<pairs.size(); i++)
sorted_results[i]=results[pairs[i].idx];
results=sorted_results;
const int num_results = results.size();
if (num_results==1)
{
results[0].DeltaScore=0;
results[0].DeltaScoreOther=0;
return;
}
vector< vector<mass_t> > exp_cut_masses;
exp_cut_masses.resize(num_results);
for (i=0; i<num_results; i++)
results[i].pep.calc_expected_breakage_masses(config,exp_cut_masses[i]);
results[0].DeltaScore = results[0].Score - results[1].Score;
results[0].DeltaScoreOther = results[0].DeltaScore;
for (i=1; i<num_results; i++)
{
results[i].DeltaScore = results[i].Score - results[0].Score;
results[i].DeltaScoreOther = results[i].DeltaScore; // default value
}
const mass_t tolerance = config->get_tolerance();
vector<bool> similar_cuts;
similar_cuts.resize(num_results,true);
for (i=1; i<num_results; i++)
similar_cuts[i]=compare_cut_lists(tolerance,exp_cut_masses[0],exp_cut_masses[i]);
// check if we need to correct the delta other
if (similar_cuts[1] && num_results>2)
{
int j;
for (j=2; j<num_results-1; j++)
if (! similar_cuts[j])
break;
results[0].DeltaScoreOther = results[0].Score - results[j].Score;
}
// don't change the delta other of the lower ranks even if they are similar to the top scoring one
// only the top score is what should be considerd
/* for (i=1; i<num_results; i++)
{
if (similar_cuts[i])
{
int j;
for (j=1; j<num_results-1; j++)
{
if (j==i)
continue;
if (! compare_cut_lists(tolerance,exp_cut_masses[i],exp_cut_masses[j]))
break;
}
results[i].DeltaScoreOther = results[i].Score - results[j].Score;
}
}*/
}
bool InspectResultsLine::parse_from_fields(Config *config,
const vector<string>& fields)
{
if (fields.size() != 20)
{
cout<< "Error: inspect results line has " << fields.size() << ", expecting 20" << endl;
exit(1);
}
SpectrumFile = fields[0];
if (sscanf(fields[1].c_str(),"%d",&scan) != 1 ||
scan<0 || scan>100000000)
error("scan");
Annotation = fields[2];
Protein = fields[3];
if (sscanf(fields[4].c_str(),"%d",&Charge) != 1 ||
Charge<0 || Charge>20)
error("Charge");
if (sscanf(fields[5].c_str(),"%f",&MQScore) != 1 ||
Score<NEG_INF || Score>POS_INF)
error("MQScore");
if (sscanf(fields[6].c_str(),"%d",&Length) != 1 ||
Length<1 || Length>POS_INF)
error("Length");
if (sscanf(fields[7].c_str(),"%f",&TotalPRMScore) != 1 ||
TotalPRMScore<NEG_INF || TotalPRMScore>POS_INF)
error("TotalPRMScore");
if (sscanf(fields[8].c_str(),"%f",&MedianPRMScore) != 1 ||
MedianPRMScore<NEG_INF || MedianPRMScore>POS_INF)
error("MedianPRMScore");
if (sscanf(fields[9].c_str(),"%f",&FractionY) != 1 ||
FractionY<0 || FractionY>1000)
error("FractionY");
if (sscanf(fields[10].c_str(),"%f",&FractionB) != 1 ||
FractionB<0 || FractionB>1000)
error("FractionB");
if (sscanf(fields[11].c_str(),"%f",&Intensity) != 1 ||
Intensity<0)
error("Intensity");
if (sscanf(fields[12].c_str(),"%d",&NTT) != 1 ||
NTT<0 || NTT>3)
error("NTT");
if (sscanf(fields[13].c_str(),"%f",&p_value) != 1)
error("p_value");
if (sscanf(fields[14].c_str(),"%f",&F_Score) != 1)
error("F_Score");
if (sscanf(fields[15].c_str(),"%f",&DeltaScore) != 1)
error("DeltaScore");
if (sscanf(fields[16].c_str(),"%f",&DeltaScoreOther) != 1)
error("DeltaScoreOther");
if (sscanf(fields[17].c_str(),"%d",&RecordNumber) != 1)
error("RecordNumber");
if (sscanf(fields[18].c_str(),"%d",&DBFilePos) != 1)
error("DBFilePos");
if (sscanf(fields[19].c_str(),"%d",&SpecFilePos) != 1)
error("SpecFilePos");
Score = MQScore;
const vector<int>& char2aa = config->get_char2aa();
const int ann_length = Annotation.length();
if ((Annotation[1] != '.') || (Annotation[ann_length-2] != '.'))
{
cout << "Error: bad annotation format: " << Annotation << endl;
cout << "Expecting X.XXXXXXXXX.X" << endl;
cout << "Ann1 : " << Annotation[1] << endl;
cout << "Ann n-2: " << Annotation[ann_length-2] << endl;
exit(1);
}
// cout << "|" << Annotation << "|" << endl;
aaBefore = char2aa[Annotation[0]];
aaAfter = char2aa[Annotation[ann_length-1]];
pep.parse_from_string(config,Annotation.substr(2,ann_length-4));
return true;
}
void ScanCandidateSet::output_to_stream(ostream& os, int num_lines) const
{
int i;
for (i=0; i<this->results.size(); i++)
{
if (i==num_lines)
break;
os << results[i].SpectrumFile << "\t" << results[i].scan << "\t" << results[i].Annotation << "\t" << results[i].Protein << "\t";
os << results[i].Charge << "\t" << results[i].Score << "\t" << results[i].Length << "\t";
os << results[i].TotalPRMScore <<"\t" << results[i].MedianPRMScore << "\t" << results[i].FractionY << "\t";
os << results[i].FractionB << "\t" << results[i].Intensity << "\t" << results[i].NTT << "\t";
os << results[i].p_value << "\t" << results[i].F_Score << "\t" << results[i].DeltaScore << "\t";
os << results[i].DeltaScoreOther << "\t" << results[i].RecordNumber << "\t" << results[i].DBFilePos << "\t";
os << results[i].SpecFilePos << endl;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -