📄 rescoredb.cpp
字号:
}
}
/***************************************************************************************
This function touches up inspect search results by rescoring the sequences returned by
inspect. The function produces a new inspect results file with the scores (and delta scores)
replaced.
****************************************************************************************/
void DeNovoRankScorer::rescore_inspect_results(char *spectra_file,
char *inspect_res,
char *new_res_file) const
{
Config *config = model->get_config();
ifstream org_res(inspect_res);
if (! org_res.is_open() || ! org_res.good())
{
cout << "Error: couldn't open original inspect results file for reading:" << inspect_res << endl;
exit(1);
}
ofstream new_res(new_res_file);
if (! new_res.is_open() || ! new_res.good())
{
cout << "Error: couldn't open new inspect results file for writing:" << new_res << endl;
exit(1);
}
char line_buff[1024];
org_res.getline(line_buff,1024);
bool read_line = true;
vector<string> field_names;
if (line_buff[0] != '#')
{
read_line = false;
}
else
{
string header = string(line_buff);
split_string(header,field_names);
int i;
for (i=0; i<field_names.size(); i++)
cout << i << "\t" << field_names[i] << endl;
}
vector<ScanCandidateSet> cand_sets;
vector<int> scan_mapping;
cand_sets.clear();
scan_mapping.resize(100000,-1);
while (! org_res.eof())
{
vector<string> fields;
if (read_line)
{
org_res.getline(line_buff,1024);
if (org_res.gcount() < 5)
continue;
}
else
{
read_line = true;
}
split_string(line_buff,fields);
InspectResultsLine res;
res.parse_from_fields(config,fields);
if (cand_sets.size()==0 || ! cand_sets[cand_sets.size()-1].add_new_line(res))
{
ScanCandidateSet new_set;
new_set.add_new_line(res);
if (new_set.scan>=scan_mapping.size())
scan_mapping.resize(2*scan_mapping.size(),-1);
scan_mapping[new_set.scan]=cand_sets.size();
cand_sets.push_back(new_set);
}
}
org_res.close();
cout << "Read results for " << cand_sets.size() << " scans..." << endl;
FileManager fm;
FileSet fs;
fm.init_from_file(config,spectra_file);
fs.select_all_files(fm);
const vector<SingleSpectrumFile *>& all_ssfs = fs.get_ssf_pointers();
cout << "Read " << all_ssfs.size() << " spectra headers..." << endl;
BasicSpecReader bsr;
QCPeak *peaks = new QCPeak[5000];
vector<bool> spectrum_indicators;
spectrum_indicators.resize(cand_sets.size(),false);
int num_found =0;
int i;
for (i=0; i<all_ssfs.size(); i++)
{
SingleSpectrumFile *ssf = all_ssfs[i];
const int scan_number = ssf->get_scan();
if (scan_mapping[scan_number]<0)
continue;
const int num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks);
spectrum_indicators[scan_mapping[scan_number]]=true;
num_found++;
ScanCandidateSet& cand_set = cand_sets[scan_mapping[scan_number]];
vector<PeptideSolution> peptide_sols;
peptide_sols.resize(cand_set.results.size());
int j;
for (j=0; j<cand_set.results.size(); j++)
{
InspectResultsLine& inspect_res = cand_set.results[j];
PeptideSolution& sol = peptide_sols[j];
sol.pep = inspect_res.pep;
sol.pm_with_19 = sol.pep.get_mass_with_19();
sol.charge = inspect_res.Charge;
sol.reaches_n_terminal = true;
sol.reaches_c_terminal = true;
}
vector<score_pair> scores;
score_complete_sequences(peptide_sols,ssf,peaks,num_peaks,scores);
for (j=0; j<scores.size(); j++)
cand_set.results[j].Score = scores[j].score;
cand_set.recalbirate_scores(config);
vector<string> pep_strings;
pep_strings.resize(scores.size());
int max_len =0;
for (j=0; j<cand_set.results.size(); j++)
{
pep_strings[j]=cand_set.results[j].pep.as_string(config);
if (pep_strings[j].length()>max_len)
max_len = pep_strings[j].length();
}
if (1)
{
cand_set.output_to_stream(new_res,10);
}
else
{
for (j=0; j<cand_set.results.size(); j++)
{
cout << cand_set.scan << " " << cand_set.results[j].Charge << "\t";
cout << cand_set.results[j].Protein.substr(0,3) << " " << pep_strings[j];
if (pep_strings[j].length()<max_len)
{
int k;
for (k=pep_strings[j].length(); k<max_len; k++)
cout << " ";
}
cout << "\t" << cand_set.results[j].MQScore << "\t" << cand_set.results[j].Score << "\t" <<
cand_set.results[j].DeltaScore << "\t" << cand_set.results[j].DeltaScoreOther << endl;
}
cout << endl;
}
}
if (num_found<cand_sets.size())
{
cout << "Warning: found only " << num_found << "/" << cand_sets.size() << " of the scans scored by InsPecT!" << endl;
}
else
{
cout << "All scored scans found in spectrum file." << endl;
}
delete [] peaks;
}
/***************************************************************************************
This function touches up inspect search results by rescoring the sequences returned by
inspect. The function produces a new inspect results file with the scores (and delta scores)
replaced.
****************************************************************************************/
void DeNovoRankScorer::recalibrate_inspect_delta_scores(char *spectra_file,
char *inspect_res,
char *new_res_file) const
{
Config *config = model->get_config();
ifstream org_res(inspect_res);
if (! org_res.is_open() || ! org_res.good())
{
cout << "Error: couldn't open original inspect results file for reading:" << inspect_res << endl;
exit(1);
}
ofstream new_res(new_res_file);
if (! new_res.is_open() || ! new_res.good())
{
cout << "Error: couldn't open new inspect results file for writing:" << new_res << endl;
exit(1);
}
char line_buff[1024];
org_res.getline(line_buff,1024);
bool read_line = true;
vector<string> field_names;
if (line_buff[0] != '#')
{
read_line = false;
}
else
{
string header = string(line_buff);
split_string(header,field_names);
int i;
for (i=0; i<field_names.size(); i++)
cout << i << "\t" << field_names[i] << endl;
}
vector<ScanCandidateSet> cand_sets;
vector<int> scan_mapping;
cand_sets.clear();
scan_mapping.resize(100000,-1);
while (! org_res.eof())
{
vector<string> fields;
if (read_line)
{
org_res.getline(line_buff,1024);
if (org_res.gcount() < 5)
continue;
}
else
{
read_line = true;
}
split_string(line_buff,fields);
InspectResultsLine res;
res.parse_from_fields(config,fields);
if (cand_sets.size()==0 || ! cand_sets[cand_sets.size()-1].add_new_line(res))
{
ScanCandidateSet new_set;
new_set.add_new_line(res);
if (new_set.scan>=scan_mapping.size())
scan_mapping.resize(2*scan_mapping.size(),-1);
scan_mapping[new_set.scan]=cand_sets.size();
cand_sets.push_back(new_set);
}
}
org_res.close();
cout << "Read results for " << cand_sets.size() << " scans..." << endl;
FileManager fm;
FileSet fs;
fm.init_from_file(config,spectra_file);
fs.select_all_files(fm);
const vector<SingleSpectrumFile *>& all_ssfs = fs.get_ssf_pointers();
cout << "Read " << all_ssfs.size() << " spectra headers..." << endl;
BasicSpecReader bsr;
QCPeak *peaks = new QCPeak[5000];
vector<bool> spectrum_indicators;
spectrum_indicators.resize(cand_sets.size(),false);
int num_found =0;
int i;
for (i=0; i<all_ssfs.size(); i++)
{
SingleSpectrumFile *ssf = all_ssfs[i];
const int scan_number = ssf->get_scan();
if (scan_mapping[scan_number]<0)
continue;
const int num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks);
spectrum_indicators[scan_mapping[scan_number]]=true;
num_found++;
ScanCandidateSet& cand_set = cand_sets[scan_mapping[scan_number]];
cand_set.recalbirate_scores(config);
cand_set.output_to_stream(new_res,10);
}
if (num_found<cand_sets.size())
{
cout << "Warning: found only " << num_found << "/" << cand_sets.size() << " of the scans scored by InsPecT!" << endl;
}
else
{
cout << "All scored scans found in spectrum file." << endl;
}
delete [] peaks;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -