📄 pmc_rank.cpp
字号:
sam.add_real_feature(r_idx++,stats.mean_offset_h2o_pairs/(1.0+stats.num_h2o_loss_frag_pairs));
}
else
r_idx+=2;
if (stats.mean_offset_c2_h2o_pairs<POS_INF)
{
sam.add_real_feature(r_idx++,stats.mean_offset_c2_h2o_pairs);
sam.add_real_feature(r_idx++,stats.mean_offset_c2_h2o_pairs/(1.0+stats.num_h2o_loss_c2_frag_pairs));
}
else
r_idx+=2;
// individual offsets
for (j=0; j<5 && j<stats.offset_pairs_ordered_by_inten.size(); j++)
sam.add_real_feature(r_idx+j,stats.offset_pairs_ordered_by_inten[j]);
r_idx+=5;
for (j=0; j<5 && j<stats.c2_offset_pairs_ordered_by_inten.size(); j++)
sam.add_real_feature(r_idx+j,stats.c2_offset_pairs_ordered_by_inten[j]);
r_idx+=5;
// add the +0 +1 +2 strict counts
sam.add_real_feature(r_idx++,stats.num_strict_pairs0);
sam.add_real_feature(r_idx++,stats.inten_strict_pairs0 * inten_norm);
sam.add_real_feature(r_idx++,stats.num_strict_pairs1);
sam.add_real_feature(r_idx++,stats.inten_strict_pairs1 * inten_norm);
sam.add_real_feature(r_idx++,stats.num_strict_pairs2);
sam.add_real_feature(r_idx++,stats.inten_strict_pairs2 * inten_norm);
sam.add_real_feature(r_idx++,stats.c2_num_strict_pairs0);
sam.add_real_feature(r_idx++,stats.c2_inten_strict_pairs0 * inten_norm);
sam.add_real_feature(r_idx++,stats.c2_num_strict_pairs1);
sam.add_real_feature(r_idx++,stats.c2_inten_strict_pairs1 * inten_norm);
sam.add_real_feature(r_idx++,stats.c2_num_strict_pairs2);
sam.add_real_feature(r_idx++,stats.c2_inten_strict_pairs2 * inten_norm);
// add comparative features to -2 -1 +1 +2 Da away
for (j=0; j<idx_offsets.size(); j++)
{
const int other_idx = i + idx_offsets[j];
if (other_idx<0 || other_idx>= samples.size())
{
r_idx+=12;
continue;
}
const PMCRankStats& other = curr_spec_rank_pmc_tables[charge][other_idx];
sam.add_real_feature(r_idx++,stats.num_frag_pairs - other.num_frag_pairs);
sam.add_real_feature(r_idx++,stats.num_strong_frag_pairs - other.num_strong_frag_pairs);
sam.add_real_feature(r_idx++,stats.num_c2_frag_pairs - other.num_c2_frag_pairs);
sam.add_real_feature(r_idx++,stats.num_strong_c2_frag_pairs - other.num_strong_c2_frag_pairs);
sam.add_real_feature(r_idx++,stats.num_h2o_loss_frag_pairs - other.num_h2o_loss_frag_pairs);
sam.add_real_feature(r_idx++,stats.num_h2o_loss_c2_frag_pairs - other.num_h2o_loss_c2_frag_pairs);
sam.add_real_feature(r_idx++,(stats.inten_frag_pairs - other.inten_frag_pairs) * inten_norm);
sam.add_real_feature(r_idx++,(stats.inten_strong_pairs - other.inten_strong_pairs) * inten_norm);
sam.add_real_feature(r_idx++,(stats.inten_c2_pairs - other.inten_c2_pairs) * inten_norm);
sam.add_real_feature(r_idx++,(stats.inten_c2_strong_pairs - other.inten_c2_strong_pairs) * inten_norm);
sam.add_real_feature(r_idx++,(stats.inten_h2o_loss_frag_pairs - other.inten_h2o_loss_frag_pairs) * inten_norm);
sam.add_real_feature(r_idx++,(stats.itnen_h2o_loss_c2_frag_pairs - other.itnen_h2o_loss_c2_frag_pairs) * inten_norm);
}
const int plus_idx = i + idx_skip;
const int minus_idx = i-idx_skip;
if (plus_idx<samples.size() && minus_idx>0)
{
const PMCRankStats& plus = curr_spec_rank_pmc_tables[charge][plus_idx];
const PMCRankStats& minus = curr_spec_rank_pmc_tables[charge][minus_idx];
sam.add_real_feature(r_idx++,plus.num_frag_pairs - minus.num_frag_pairs);
sam.add_real_feature(r_idx++,plus.num_strong_frag_pairs - minus.num_strong_frag_pairs);
sam.add_real_feature(r_idx++,plus.num_c2_frag_pairs - minus.num_c2_frag_pairs);
sam.add_real_feature(r_idx++,plus.num_strong_c2_frag_pairs - minus.num_strong_c2_frag_pairs);
sam.add_real_feature(r_idx++,plus.num_h2o_loss_frag_pairs - minus.num_h2o_loss_frag_pairs);
sam.add_real_feature(r_idx++,plus.num_h2o_loss_c2_frag_pairs - minus.num_h2o_loss_c2_frag_pairs);
sam.add_real_feature(r_idx++,(plus.inten_frag_pairs - minus.inten_frag_pairs) * inten_norm);
sam.add_real_feature(r_idx++,(plus.inten_strong_pairs - minus.inten_strong_pairs) * inten_norm);
sam.add_real_feature(r_idx++,(plus.inten_c2_pairs - minus.inten_c2_pairs) * inten_norm);
sam.add_real_feature(r_idx++,(plus.inten_c2_strong_pairs - minus.inten_c2_strong_pairs) * inten_norm);
sam.add_real_feature(r_idx++,(plus.inten_h2o_loss_frag_pairs - minus.inten_h2o_loss_frag_pairs) * inten_norm);
sam.add_real_feature(r_idx++,(plus.itnen_h2o_loss_c2_frag_pairs - minus.itnen_h2o_loss_c2_frag_pairs) * inten_norm);
}
}
}
void init_PMC_feature_names(vector<string>& names)
{
names.clear();
int i;
names.push_back("OFFSET FROM MEASURED M/Z");
names.push_back("OFFSET FROM MEASURED M/Z, NUM PAIRS <=2");
names.push_back("OFFSET FROM MEASURED M/Z, NUM PAIRS <=4");
names.push_back("OFFSET FROM MEASURED M/Z, NUM PAIRS >4");
names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG PAIRS <3");
names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG PAIRS >=3");
names.push_back("OFFSET FROM MEASURED M/Z, NUM C2 PAIRS <=2");
names.push_back("OFFSET FROM MEASURED M/Z, NUM C2 PAIRS <=4");
names.push_back("OFFSET FROM MEASURED M/Z, NUM C2 PAIRS >4");
names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG C2 PAIRS <3");
names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG C2 PAIRS >=3");
names.push_back("# PAIRS");
names.push_back("# STRONG PAIRS");
names.push_back("# C2 PAIRS");
names.push_back("# STRONG C2 PAIRS");
names.push_back("# H2O PAIRS");
names.push_back("# C2 H2O PAIRS");
names.push_back("INTEN PAIRS");
names.push_back("INTEN STRONG PAIRS");
names.push_back("INTEN C2 PAIRS");
names.push_back("INTEN STRONG C2 PAIRS");
names.push_back("INTEN H2O PAIRS");
names.push_back("INTEN C2 H2O PAIRS");
for (i=2; i<7; i++)
{
char name[64];
sprintf(name,"AVG OFFSET TOP (STRONG %d)",i);
names.push_back(name);
}
for (i=2; i<7; i++)
{
char name[64];
sprintf(name,"AVG OFFSET TOP C2 (STRONG %d)",i);
names.push_back(name);
}
names.push_back("MEAN OFFSET PAIRS");
names.push_back("WEIGHTED MEAN OFFSET PAIRS");
names.push_back("MEAN OFFSET STRONG PAIRS");
names.push_back("WEIGHTED MEAN OFFSET STRONG PAIRS");
names.push_back("MEAN OFFSET C2 PAIRS");
names.push_back("WEIGHTED MEAN OFFSET C2 PAIRS");
names.push_back("MEAN OFFSET STRONG C2 PAIRS");
names.push_back("WEIGHTED MEAN OFFSET STRONG C2 PAIRS");
names.push_back("MEAN OFFSET H2O PAIRS");
names.push_back("WEIGHTED MEAN OFFSET H2O PAIRS");
names.push_back("MEAN OFFSET C2 H2O PAIRS");
names.push_back("WEIGHTED MEAN OFFSET C2 H2O PAIRS");
for (i=0; i<5; i++)
{
char name[64];
sprintf(name,"PAIR OFFSET (STRONG %d)",i+1);
names.push_back(name);
}
for (i=0; i<5; i++)
{
char name[64];
sprintf(name,"C2 PAIR OFFSET (STRONG %d)",i+1);
names.push_back(name);
}
names.push_back("NUM STRICT 0");
names.push_back("INTEN STRICT 0");
names.push_back("NUM STRICT 1");
names.push_back("INTEN STRICT 1");
names.push_back("NUM STRICT 2");
names.push_back("INTEN STRICT 2");
names.push_back("NUM C2 STRICT 0");
names.push_back("INTEN C2 STRICT 0");
names.push_back("NUM C2 STRICT 1");
names.push_back("INTEN C2 STRICT 1");
names.push_back("NUM C2 STRICT 2");
names.push_back("INTEN C2 STRICT 2");
// diff features with -2 -1 +1 +2
const string dis_labels[]={"-2","-1","+1","+2"};
for (i=0; i<4; i++)
{
const string prefix = "DIFF WITH "+dis_labels[i]+" ";
names.push_back(prefix+"# PAIRS");
names.push_back(prefix+"# STRONG PAIRS");
names.push_back(prefix+"# C2 PAIRS");
names.push_back(prefix+"# STRONG C2 PAIRS");
names.push_back(prefix+"# H2O PAIRS");
names.push_back(prefix+"# C2 H2O PAIRS");
names.push_back(prefix+"INTEN PAIRS");
names.push_back(prefix+"INTEN STRONG PAIRS");
names.push_back(prefix+"INTEN C2 PAIRS");
names.push_back(prefix+"INTEN STRONG C2 PAIRS");
names.push_back(prefix+"INTEN H2O PAIRS");
names.push_back(prefix+"INTEN C2 H2O PAIRS");
}
names.push_back("DIFF +1/-1 # PAIRS");
names.push_back("DIFF +1/-1 # STRONG PAIRS");
names.push_back("DIFF +1/-1 # C2 PAIRS");
names.push_back("DIFF +1/-1 # STRONG C2 PAIRS");
names.push_back("DIFF +1/-1 # H2O PAIRS");
names.push_back("DIFF +1/-1 # C2 H2O PAIRS");
names.push_back("DIFF +1/-1 INTEN PAIRS");
names.push_back("DIFF +1/-1 INTEN STRONG PAIRS");
names.push_back("DIFF +1/-1 INTEN C2 PAIRS");
names.push_back("DIFF +1/-1 INTEN STRONG C2 PAIRS");
names.push_back("DIFF +1/-1 INTEN H2O PAIRS");
names.push_back("DIFF +1/-1 INTEN C2 H2O PAIRS");
cout << "Initialized: " << names.size() << " real feature names..." << endl;
}
void PMCSQS_Scorer::output_pmc_rank_results(const FileManager& fm,
int charge,
const vector<SingleSpectrumFile *>& test_ssfs)
{
BasicSpecReader bsr;
static QCPeak peaks[5000];
vector<int> org_offset_counts, new_offset_counts;
org_offset_counts.resize(201,0);
new_offset_counts.resize(201,0);
vector<mass_t> org_offsets;
vector<mass_t> corr_offsets;
org_offsets.clear();
corr_offsets.clear();
int i;
for (i=0; i<test_ssfs.size(); i++)
{
SingleSpectrumFile* ssf = test_ssfs[i];
BasicSpectrum bs;
bs.num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks);
bs.peaks = peaks;
bs.ssf = ssf;
init_for_current_spec(config,bs);
calculate_curr_spec_pmc_values(bs, bin_increment);
PmcSqsChargeRes res;
find_best_mz_values_from_rank_model(bs, charge, res);
ssf->peptide.calc_mass(config);
mass_t true_mz = (ssf->peptide.get_mass() + 18.01 + charge)/charge;
org_offsets.push_back(true_mz - ssf->m_over_z);
corr_offsets.push_back(true_mz - res.mz1);
}
mass_t m_org,sd_org,m_corr,sd_corr;
calc_mean_sd(org_offsets,&m_org, &sd_org);
calc_mean_sd(corr_offsets,&m_corr,&sd_corr);
cout << "CHARGE: " << charge << endl;
cout << "ORG: mean " << m_org << " " << sd_org << endl;
cout << "CORR: mean " << m_corr << " " << sd_corr << endl;
for (i=0; i<org_offsets.size(); i++)
{
int org_idx = 100 + int(org_offsets[i] * 20);
if (org_idx<0)
org_idx = 0;
if (org_idx>200)
org_idx=200;
org_offset_counts[org_idx]++;
int new_idx = 100 + int(corr_offsets[i] * 20);
if (new_idx<0)
new_idx = 0;
if (new_idx>200)
new_idx=200;
new_offset_counts[new_idx]++;
}
int cum_org=0;
int cum_new=0;
for (i=0; i<=200; i++)
{
if (org_offset_counts[i]==0 && new_offset_counts[i]==0)
continue;
cum_org+=org_offset_counts[i];
cum_new+=new_offset_counts[i];
cout << fixed << setprecision(3) << i*0.05 - 5.0 << "\t" <<
org_offset_counts[i]/(float)org_offsets.size() << "\t" <<
new_offset_counts[i]/(float)corr_offsets.size() << "\t" <<
cum_org/(float)org_offsets.size() << "\t"<<
cum_new/(float)corr_offsets.size() << endl;
}
}
void PMCSQS_Scorer::find_best_mz_values_from_rank_model(const BasicSpectrum& bs,
int charge, PmcSqsChargeRes& res)
{
static vector<RankBoostSample> spec_samples;
static vector<float> rank_scores;
fill_RankBoost_smaples_with_PMC(bs, charge, spec_samples);
if (rank_scores.size() != spec_samples.size())
rank_scores.resize(spec_samples.size(),NEG_INF);
int best_idx=-1;
float best_score=NEG_INF;
int i;
for (i=0; i<spec_samples.size(); i++)
{
const mass_t pm_with_19 = bs.ssf->m_over_z * charge - (charge + 1);
const int size_idx = get_rank_model_size_idx(charge, pm_with_19);
if (charge>= pmc_rank_models.size() ||
size_idx>= pmc_rank_models[charge].size() ||
! pmc_rank_models[charge][size_idx])
continue;
rank_scores[i]=pmc_rank_models[charge][size_idx]->calc_rank_score(spec_samples[i]);
curr_spec_rank_pmc_tables[charge][i].rank_score = rank_scores[i];
if (rank_scores[i]>best_score)
{
best_score=rank_scores[i];
best_idx = i;
}
}
// no suitable models were found for this spectrum
if (best_idx<0)
{
res.mz1 = bs.ssf->m_over_z;
res.score1 = 10.0;
res.mz2 = NEG_INF;
res.score2 = NEG_INF;
return;
}
res.mz1 = curr_spec_rank_pmc_tables[charge][best_idx].m_over_z;
res.score1 = best_score;
// look for additional m/z
int second_best_idx=-1;
float second_best_score=NEG_INF;
const mass_t mz_diff = curr_spec_rank_pmc_tables[charge][1].m_over_z -
curr_spec_rank_pmc_tables[charge][0].m_over_z;
const int idx_diff = (int)(0.45/(charge * mz_diff));
for (i=0; i<spec_samples.size(); i++)
{
if (fabs(i-best_idx)<idx_diff)
continue;
if (rank_scores[i]>second_best_score)
{
second_best_score=rank_scores[i];
second_best_idx = i;
}
}
if (second_best_idx>=0)
{
res.mz2 = curr_spec_rank_pmc_tables[charge][second_best_idx].m_over_z;
res.score2 = second_best_score;
}
else
{
res.mz2 = NEG_INF;
res.score2 = NEG_INF;
}
// const int size_idx = this->get_rank_model_size_idx(charge,res.mz1*charge-charge+1);
// res.mz1 -= pmc_charge_mz_biases[charge][size_idx];
// res.mz2 -= pmc_charge_mz_biases[charge][size_idx];
// cout << charge << " ]\t" << res.mz1 << "\t" << res.prob1 << "\t" << res.mz2 << "\t" << res.prob2 << endl;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -