📄 pmc_rank.cpp
字号:
if (back_idx>=0 && peaks[forward_idx].mass + peaks[back_idx].mass > min2)
{
if (! (strict_iso_inds[back_idx] || strict_iso_inds[forward_idx]))
continue;
const mass_t offset = fabs(peaks[forward_idx].mass + peaks[back_idx].mass - single_charge_pair_sum);
const float inten_sum = peaks[forward_idx].intensity + peaks[back_idx].intensity;
pairs2.push_back(offset_pair(offset,inten_sum));
}
}
c2_pairs0.clear();
forward_idx = -1;
back_idx = num_peaks-1;
while (forward_idx<back_idx)
{
forward_idx++;
if (strict_iso_inds[forward_idx])
continue;
mass_t sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
while (back_idx>=0 && sum>max_double_sum)
{
back_idx--;
if (back_idx<0)
break;
sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
}
if (back_idx>=0 && sum > min_double_sum)
{
if (strict_iso_inds[back_idx])
continue;
const mass_t offset = fabs(sum - double_charge_pair_sum);
const float inten_sum = peaks[forward_idx].intensity + peaks[back_idx].intensity;
c2_pairs0.push_back(offset_pair(offset,inten_sum));
}
}
c2_pairs1.clear();
const mass_t maxc21 = max_double_sum + 1.0;
const mass_t minc21 = min_double_sum + 1.0;
forward_idx = -1;
back_idx = num_peaks-1;
while (forward_idx<back_idx)
{
forward_idx++;
mass_t sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
while (back_idx>=0 && sum>maxc21)
{
back_idx--;
if (back_idx<0)
break;
sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
}
if (back_idx>=0 && sum > minc21)
{
if (! (strict_iso_inds[back_idx] || strict_iso_inds[forward_idx]) )
continue;
const mass_t offset = fabs(sum - double_charge_pair_sum);
const float inten_sum = peaks[forward_idx].intensity + peaks[back_idx].intensity;
c2_pairs1.push_back(offset_pair(offset,inten_sum));
}
}
c2_pairs2.clear();
const mass_t maxc22 = max_double_sum + 2.0;
const mass_t minc22 = min_double_sum + 2.0;
forward_idx = -1;
back_idx = num_peaks-1;
while (forward_idx<back_idx)
{
forward_idx++;
mass_t sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
while (back_idx>=0 && sum>maxc22)
{
back_idx--;
if (back_idx<0)
break;
sum = 2*peaks[forward_idx].mass + peaks[back_idx].mass;
}
if (back_idx>=0 && sum > minc22)
{
if (! (strict_iso_inds[back_idx] || strict_iso_inds[forward_idx]) )
continue;
const mass_t offset = fabs(sum - double_charge_pair_sum);
const float inten_sum = peaks[forward_idx].intensity + peaks[back_idx].intensity;
c2_pairs2.push_back(offset_pair(offset,inten_sum));
}
}
// use the first 4 peaks
stats.inten_strict_pairs0=0;
stats.num_strict_pairs0 = pairs0.size();
sort(pairs0.begin(),pairs0.end(),cmp_offset_pair_inten);
for (i=0; i<4 && i<pairs0.size(); i++)
{
// stats.offset_strict_pairs0.push_back(pairs0[i].offset);
stats.inten_strict_pairs0+=pairs0[i].inten_sum;
}
stats.inten_strict_pairs1=0;
stats.num_strict_pairs1 = pairs1.size();
sort(pairs1.begin(),pairs1.end(),cmp_offset_pair_inten);
for (i=0; i<4 && i<pairs1.size(); i++)
{
// stats.offset_strict_pairs1.push_back(pairs1[i].offset);
stats.inten_strict_pairs1+=pairs1[i].inten_sum;
}
stats.inten_strict_pairs2=0;
stats.num_strict_pairs2 = pairs2.size();
sort(pairs2.begin(),pairs2.end(),cmp_offset_pair_inten);
for (i=0; i<4 && i<pairs2.size(); i++)
{
// stats.offset_strict_pairs2.push_back(pairs2[i].offset);
stats.inten_strict_pairs2+=pairs2[i].inten_sum;
}
stats.c2_inten_strict_pairs0=0;
stats.c2_num_strict_pairs0 = c2_pairs0.size();
sort(c2_pairs0.begin(),c2_pairs0.end(),cmp_offset_pair_inten);
for (i=0; i<4 && i<c2_pairs0.size(); i++)
{
// stats.c2_offset_strict_pairs0.push_back(c2_pairs0[i].offset);
stats.c2_inten_strict_pairs0+=c2_pairs0[i].inten_sum;
}
stats.c2_inten_strict_pairs1=0;
stats.c2_num_strict_pairs1 = c2_pairs1.size();
sort(c2_pairs1.begin(),c2_pairs1.end(),cmp_offset_pair_inten);
for (i=0; i<4 && i<c2_pairs1.size(); i++)
{
// stats.c2_offset_strict_pairs1.push_back(c2_pairs1[i].offset);
stats.c2_inten_strict_pairs1+=c2_pairs1[i].inten_sum;
}
stats.c2_inten_strict_pairs2=0;
stats.c2_num_strict_pairs2 = c2_pairs2.size();
sort(c2_pairs2.begin(), c2_pairs2.end(),cmp_offset_pair_inten);
for (i=0; i<4 && i<c2_pairs2.size(); i++)
{
// stats.c2_offset_strict_pairs2.push_back(c2_pairs2[i].offset);
stats.c2_inten_strict_pairs2+=c2_pairs2[i].inten_sum;
}
}
void fill_rank_PMC_stats(int charge,
const mass_t single_charge_pair_sum, // the sum of b+y or c+z
mass_t minus_range,
mass_t plus_range,
mass_t increment,
Config *config,
const BasicSpectrum& bs,
const vector<bool>& strong_inds,
const vector<float>& iso_levels,
const vector<bool>& iso_inds,
vector<PMCRankStats>& pmc_stats_vec)
{
const mass_t tolerance = config->get_tolerance()*0.55;
const int num_bins_per_Da = (int)(1.0/increment);
const int num_bins = (int)((plus_range-minus_range)*num_bins_per_Da)+1;
const mass_t one_over_charge = 1.0/(mass_t)charge;
if (pmc_stats_vec.size() != num_bins)
pmc_stats_vec.resize(num_bins);
mass_t delta=minus_range;
int i;
for (i=0; i<num_bins; i++)
{
calc_pmc_rank_stats_for_mass(bs.peaks,bs.num_peaks,single_charge_pair_sum+delta,
tolerance, iso_levels, strong_inds, iso_inds, pmc_stats_vec[i]);
pmc_stats_vec[i].m_over_z = (single_charge_pair_sum+delta+charge-2.0005)/charge;
delta+=increment;
}
}
void PMCRankStats::clear()
{
m_over_z=0;
rank_score = NEG_INF;
num_frag_pairs=0;
num_strong_frag_pairs=0;
num_c2_frag_pairs=0;
num_strong_c2_frag_pairs=0;
num_h2o_loss_frag_pairs=0;
inten_frag_pairs=0;
inten_strong_pairs=0;
inten_c2_pairs=0;
inten_c2_strong_pairs=0;
inten_h2o_loss_frag_pairs=0;
itnen_h2o_loss_c2_frag_pairs=0;
mean_offset_pairs=0;
mean_offset_strong_pairs=0;
mean_offset_c2_pairs=0;
mean_offset_c2_strong_pairs=0;
mean_offset_h2o_pairs=0;
mean_offset_c2_h2o_pairs=0;
ind_pairs_with_min_tol=false;
ind_strong_pairs_with_min_tol=false;
ind_c2_pairs_with_min_tol=false;
ind_c2_strong_pairs_with_min_tol=false;
log_dis_from_pairs_min_tol=0;
log_dis_from_strong_pairs_min_tol=0;
log_dis_from_c2_pairs_min_tol=0;
log_dis_from_c2_strong_pairs_min_tol=0;
offset_pairs_ordered_by_inten.clear();
strong_offset_pairs_ordered_by_inten.clear();
c2_offset_pairs_ordered_by_inten.clear();
num_strict_pairs0=0; inten_strict_pairs0=0;
num_strict_pairs1=0; inten_strict_pairs1=0;
num_strict_pairs2=0; inten_strict_pairs2=0;
}
/**************************************************************************
Fills in the RankBoost feature data
***************************************************************************/
void PMCSQS_Scorer::fill_RankBoost_smaples_with_PMC(
const BasicSpectrum& bs,
int charge,
vector<RankBoostSample>& samples) const
{
const int num_samples = curr_spec_rank_pmc_tables[charge].size();
const int idx_skip = int((1.0/bin_increment)+0.00001);
vector<int> idx_offsets;
int i;
idx_offsets.clear();
idx_offsets.push_back(-2*idx_skip);
idx_offsets.push_back(-1*idx_skip);
idx_offsets.push_back(idx_skip);
idx_offsets.push_back(2*idx_skip);
if (samples.size() != num_samples)
samples.resize(num_samples);
for (i=0; i<num_samples; i++)
{
const PMCRankStats& stats = curr_spec_rank_pmc_tables[charge][i];
RankBoostSample& sam = samples[i];
const float inten_norm = 1.0/(curr_spec_total_intensity+1.0);
int r_idx=0;
const mass_t mz_offset = (stats.m_over_z - bs.ssf->m_over_z);
sam.clear();
sam.add_real_feature(r_idx++,mz_offset);
if (stats.num_frag_pairs<=2)
{
sam.add_real_feature(r_idx,mz_offset);
}
else if (stats.num_frag_pairs<4)
{
sam.add_real_feature(r_idx+1,mz_offset);
}
else
sam.add_real_feature(r_idx+2,mz_offset);
r_idx+=3;
if (stats.num_strong_frag_pairs<3)
{
sam.add_real_feature(r_idx,mz_offset);
}
else
sam.add_real_feature(r_idx+1,mz_offset);
r_idx+=2;
if (stats.num_c2_frag_pairs<=2)
{
sam.add_real_feature(r_idx,mz_offset);
}
else if (stats.num_c2_frag_pairs<4)
{
sam.add_real_feature(r_idx+1,mz_offset);
}
else
sam.add_real_feature(r_idx+2,mz_offset);
r_idx+=3;
if (stats.num_strong_c2_frag_pairs<3)
{
sam.add_real_feature(r_idx,mz_offset);
}
else
sam.add_real_feature(r_idx+1,mz_offset);
r_idx+=2;
/* names.push_back("OFFSET FROM MEASURED M/Z, NUM PAIRS <=2");
names.push_back("OFFSET FROM MEASURED M/Z, NUM PAIRS <=5");
names.push_back("OFFSET FROM MEASURED M/Z, NUM PAIRS >5");
names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG PAIRS <4");
names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG PAIRS >4");
names.push_back("OFFSET FROM MEASURED M/Z, NUM C2 PAIRS <=2");
names.push_back("OFFSET FROM MEASURED M/Z, NUM C2 PAIRS <=5");
names.push_back("OFFSET FROM MEASURED M/Z, NUM C2 PAIRS >5");
names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG C2 PAIRS <4");
names.push_back("OFFSET FROM MEASURED M/Z, NUM STRONG C2 PAIRS >4");*/
sam.add_real_feature(r_idx++,stats.num_frag_pairs);
sam.add_real_feature(r_idx++,stats.num_strong_frag_pairs);
sam.add_real_feature(r_idx++,stats.num_c2_frag_pairs);
sam.add_real_feature(r_idx++,stats.num_strong_c2_frag_pairs);
sam.add_real_feature(r_idx++,stats.num_h2o_loss_frag_pairs);
sam.add_real_feature(r_idx++,stats.num_h2o_loss_c2_frag_pairs);
sam.add_real_feature(r_idx++,stats.inten_frag_pairs * inten_norm);
sam.add_real_feature(r_idx++,stats.inten_strong_pairs * inten_norm);
sam.add_real_feature(r_idx++,stats.inten_c2_pairs * inten_norm);
sam.add_real_feature(r_idx++,stats.inten_c2_strong_pairs * inten_norm);
sam.add_real_feature(r_idx++,stats.inten_h2o_loss_frag_pairs * inten_norm);
sam.add_real_feature(r_idx++,stats.itnen_h2o_loss_c2_frag_pairs * inten_norm);
// averages of top k offsets
float avg=0;
int j;
for (j =0; j<7 && j<stats.offset_pairs_ordered_by_inten.size(); j++)
{
avg += fabs(stats.offset_pairs_ordered_by_inten[j]);
if (j>=2)
sam.add_real_feature(r_idx+j-2,avg/(float)j);
}
r_idx+=5;
avg=0;
for (j =0; j<7 && j<stats.c2_offset_pairs_ordered_by_inten.size(); j++)
{
avg += fabs(stats.c2_offset_pairs_ordered_by_inten[j]);
if (j>=2)
sam.add_real_feature(r_idx+j-2,avg/(float)j);
}
r_idx+=5;
// offset data
if (stats.mean_offset_pairs<POS_INF)
{
sam.add_real_feature(r_idx++,stats.mean_offset_pairs);
sam.add_real_feature(r_idx++,stats.mean_offset_pairs/(1.0+stats.num_frag_pairs));
}
else
r_idx+=2;
if (stats.mean_offset_strong_pairs<POS_INF)
{
sam.add_real_feature(r_idx++,stats.mean_offset_strong_pairs);
sam.add_real_feature(r_idx++,stats.mean_offset_strong_pairs/(1.0+stats.num_strong_frag_pairs));
}
else
r_idx+=2;
if (stats.mean_offset_c2_pairs<POS_INF)
{
sam.add_real_feature(r_idx++,stats.mean_offset_c2_pairs);
sam.add_real_feature(r_idx++,stats.mean_offset_c2_pairs/(1.0+stats.num_c2_frag_pairs));
}
else
r_idx+=2;
if (stats.mean_offset_c2_strong_pairs<POS_INF)
{
sam.add_real_feature(r_idx++,stats.mean_offset_c2_strong_pairs);
sam.add_real_feature(r_idx++,stats.mean_offset_c2_strong_pairs/(1.0+stats.num_strong_c2_frag_pairs));
}
else
r_idx+=2;
if (stats.mean_offset_h2o_pairs<POS_INF)
{
sam.add_real_feature(r_idx++,stats.mean_offset_h2o_pairs);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -