📄 denovopartmodel.cpp
字号:
if (oris.size()>0)
{
const float one_over = 1.0/(float)oris.size();
rbs.add_real_feature(f_idx++,(float)num_with1*one_over);
rbs.add_real_feature(f_idx++,(float)num_with2*one_over);
rbs.add_real_feature(f_idx++,(float)num_with_alot*one_over);
rbs.add_real_feature(f_idx++,num_dual_ori*one_over);
}
else
f_idx+=4;
rbs.add_real_feature(f_idx++,(float)switches);
/* feature_names.push_back("PRM #breakages with 1 frag detected");
feature_names.push_back("PRM #breakages with 2 frag detected");
feature_names.push_back("PRM #breakages with > 5 frags detected");
feature_names.push_back("PRM #breakages with dual orientation frags");
feature_names.push_back("PRM #orientation switches");*/
}
void DeNovoPartitionModel::fill_pmcsqs_features(
const PeptideSolution& sol,
const vector<PmcSqsChargeRes>& res,
const PMCSQS_Scorer *pmc_model,
RankBoostSample& rbs) const
{
int f_idx = pmc_start_idx;
const mass_t& mz1 = res[sol.charge].mz1;
const mass_t& mz2 = res[sol.charge].mz2;
const mass_t pm1_with_19 = (mz1>0 ? mz1 * sol.charge - (sol.charge-1)*MASS_PROTON : NEG_INF);
const mass_t pm2_with_19 = (mz2>0 ? mz2 * sol.charge - (sol.charge-1)*MASS_PROTON : NEG_INF);
const float prob_charge = res[sol.charge].min_comp_prob;
if (mz1>0)
{
rbs.add_real_feature(f_idx,res[sol.charge].sqs_prob);
rbs.add_real_feature(f_idx+1,prob_charge);
if (prob_charge>0.95)
{
rbs.add_real_feature(f_idx+2,pm1_with_19-sol.pep.get_mass_with_19());
}
else
rbs.add_real_feature(f_idx+3,pm1_with_19-sol.pep.get_mass_with_19());
rbs.add_real_feature(f_idx+4,res[sol.charge].score1);
}
f_idx+=5;
if (mz2>0)
{
rbs.add_real_feature(f_idx++,res[sol.charge].score2);
rbs.add_real_feature(f_idx++,pm2_with_19-sol.pep.get_mass_with_19());
}
else
f_idx+=2;
float max_prob=-1;
int c;
for (c=1; c<res.size(); c++)
{
if (c==sol.charge)
continue;
if (res[c].min_comp_prob>max_prob)
max_prob=res[c].min_comp_prob;
}
if (max_prob>=0)
rbs.add_real_feature(f_idx,max_prob);
f_idx++;
const vector< vector< PMCRankStats > >& curr_stats = pmc_model->get_curr_spec_rank_pmc_tables();
const vector< PMCRankStats >& charge_stats = curr_stats[sol.charge];
int i;
int mz_idx =0;
while (mz_idx<charge_stats.size() && charge_stats[mz_idx].m_over_z>mz1)
mz_idx++;
if (mz_idx>0 && mz_idx<charge_stats.size() &&
charge_stats[mz_idx].m_over_z-mz1>mz1-charge_stats[mz_idx-1].m_over_z)
mz_idx--;
int best_idx=0;
for (i=0; i<charge_stats.size(); i++)
if (charge_stats[i].rank_score>charge_stats[best_idx].rank_score)
best_idx = i;
const float score_diff = charge_stats[best_idx].rank_score - charge_stats[mz_idx].rank_score;
if (prob_charge>0.95)
{
rbs.add_real_feature(f_idx,score_diff);
}
else if (prob_charge>0.7)
{
rbs.add_real_feature(f_idx+1,score_diff);
}
else
{
rbs.add_real_feature(f_idx+2,score_diff);
}
f_idx+=3;
}
void DeNovoPartitionModel::fill_composition_features(
const PeptideSolution& sol,
Config *config,
PeptideCompAssigner *comp_assigner,
const SeqPath& path,
RankBoostSample& rbs) const
{
PeptideCompStats comp_stats;
comp_assigner->fill_peptide_stats(sol.pep,comp_stats);
const int num_aas = sol.pep.get_num_aas();
const vector<int> sol_aas = sol.pep.get_amino_acids();
int f_idx = comp_start_idx;
if (sol.reaches_n_terminal)
rbs.add_real_feature(f_idx,(float)comp_stats.start_comp[3]);
f_idx++;
if (sol.reaches_c_terminal)
rbs.add_real_feature(f_idx,(float)comp_stats.end_comp[3]);
f_idx++;
const int *len3_counts = comp_stats.cat_counts[3];
if (1)
{
rbs.add_real_feature(f_idx++,(float)(len3_counts[19]+len3_counts[20]));
rbs.add_real_feature(f_idx++,(float)(len3_counts[15]+len3_counts[16]+len3_counts[17]+len3_counts[18]));
rbs.add_real_feature(f_idx++,(float)(len3_counts[7]+len3_counts[8]+len3_counts[9]+len3_counts[10]+
len3_counts[11]+len3_counts[12]+len3_counts[13]+len3_counts[14]));
rbs.add_real_feature(f_idx++,(float)(len3_counts[3]+len3_counts[4]+len3_counts[5]+len3_counts[6]));
rbs.add_real_feature(f_idx++,(float)(len3_counts[1]+len3_counts[2]));
}
else
f_idx+=5;
int avg = 0;
int cat;
for (cat=1; cat<=MAX_COMP_CAT; cat++)
{
avg+=cat*len3_counts[cat];
if (len3_counts[cat]>0)
break;
}
int min_cat = cat++;
for ( ; cat<=MAX_COMP_CAT; cat++)
avg+=cat*len3_counts[cat];
if (1)
rbs.add_real_feature(f_idx,min_cat);
f_idx++;
if (num_aas>0)
rbs.add_real_feature(f_idx,avg/(float)num_aas);
f_idx++;
vector<score_pair> pairs;
if (path.positions.size()>3)
{
const int start_idx = 1;
const int end_idx = path.positions.size()-2;
int i;
for (i=start_idx; i<end_idx; i++)
{
const PathPos& pos = path.positions[i];
if (pos.node_idx>0)
{
score_pair p;
p.idx = i;
p.score = pos.node_score;
pairs.push_back(p);
}
}
sort(pairs.begin(),pairs.end());
for (i=0; i<pairs.size() && i<4; i++)
{
const int idx = pairs[i].idx;
int prev_idx;
for (prev_idx = idx-1; prev_idx>=0; prev_idx--)
if (path.positions[prev_idx].edge_idx>=0)
break;
int prev_cat = -1;
if (prev_idx>=0 && idx- prev_idx<=3)
{
prev_cat = comp_assigner->get_aa_category(idx-prev_idx,&sol_aas[prev_idx],
(prev_idx == 0 && sol.reaches_n_terminal), false);
}
int next_cat = -1;
if (path.positions[idx].edge_idx>=0)
{
int next_idx;
for (next_idx = idx+1; next_idx<=num_aas; next_idx++)
if (path.positions[next_idx].node_idx>0)
break;
if (next_idx<=num_aas && next_idx-idx<=3)
{
next_cat = comp_assigner->get_aa_category(next_idx-idx,&sol_aas[idx],
false, (next_idx == num_aas && sol.reaches_c_terminal));
}
}
int span_aas[2]={path.positions[idx-1].aa,path.positions[idx].aa};
int span_cat = comp_assigner->get_aa_category(2,span_aas, false, false);
rbs.add_real_feature(f_idx+i*3,(float)prev_cat);
rbs.add_real_feature(f_idx+i*3+1,(float)next_cat);
rbs.add_real_feature(f_idx+i*3+2,(float)span_cat);
}
}
f_idx+=12;
const vector<int>& org_aas = config->get_org_aa();
vector<int> aa_counts;
int max_val=2;
if (sol_aas.size()>14)
max_val=3;
if (sol_aas.size()>22)
max_val=4;
aa_counts.resize(Val+1,0);
int i;
for (i=0; i<sol_aas.size(); i++)
aa_counts[org_aas[sol_aas[i]]]++;
aa_counts[Leu]+=aa_counts[Ile];
int c=0;
for (i=Ala; i<=Val; i++)
{
if (i==Ile)
continue;
if (aa_counts[i]>0)
{
rbs.add_real_feature(f_idx+c,(aa_counts[i]>max_val ? max_val : aa_counts[i]));
}
c++;
}
f_idx+=c;
const int max_idx = sol_aas.size()-1;
int num_W=0;
int num_Q=0;
int num_N=0;
int num_XG=0;
for (i=0; i<max_idx; i++)
{
if (path.positions[i].edge_idx>=0)
continue;
const int aa1=sol_aas[i];
const int aa2=sol_aas[i+1];
if (aa2 == Gly && (aa1 == Glu || aa1 == Gly || aa1 == Ala))
num_XG++;
if (aa1 == Gly && aa2 == Gly)
{
num_N++;
continue;
}
if ((aa1 == Ala && aa2 == Gly) ||
(aa1 == Gly && aa2 == Ala))
{
num_Q++;
continue;
}
if ((aa1 == Glu && aa2 == Gly) ||
(aa1 == Gly && aa2 == Glu) ||
(aa1 == Ala && aa2 == Asp) ||
(aa1 == Asp && aa2 == Ala) ||
(aa1 == Val && aa2 == Ser) ||
(aa1 == Ser && aa2 == Val))
{
num_W++;
continue;
}
}
const int num_problematic = (num_W+num_Q+num_N);
if (num_problematic>0)
rbs.add_real_feature(f_idx,(float)num_problematic);
f_idx++;
if (num_W>0)
rbs.add_real_feature(f_idx,(float)num_W);
f_idx++;
if (num_Q>0)
rbs.add_real_feature(f_idx,(float)num_Q);
f_idx++;
if (num_N>0)
rbs.add_real_feature(f_idx,(float)num_N);
f_idx++;
if (num_XG>0)
rbs.add_real_feature(f_idx,(float)num_XG);
f_idx++;
/* feature_names.push_back("PEP COMP #double EG,GE,AD,DA,VS,SV");
feature_names.push_back("PEP COMP #double ");
feature_names.push_back("PEP COMP #double GG");
feature_names.push_back("PEP COMP #double GA");
feature_names.push_back("PEP COMP #double AG");
feature_names.push_back("PEP COMP #double SL");*/
}
void DeNovoPartitionModel::fill_peak_offset_features(
Config *config,
const PeptideSolution& sol,
const vector< vector<mass_t> >& masses,
const vector< vector<intensity_t> >& intens,
RankBoostSample& rbs) const
{
int f_idx = peak_offset_start_idx;
vector<mass_t> break_masses;
sol.pep.calc_expected_breakage_masses(config,break_masses);
const int max_break = break_masses.size()-1;
const mass_t pm_with_19 = sol.pm_with_19;
int f;
for (f=0; f<ppp_frag_type_idxs.size() && f<2; f++)
{
const int frag_idx = ppp_frag_type_idxs[f];
const vector<intensity_t>& frag_intens = intens[frag_idx];
const vector<mass_t>& frag_masses = masses[frag_idx];
const FragmentType& fragment = config->get_fragment(frag_idx);
vector<mass_t> exp_peak_masses;
vector<int> inten_ranks, rank_positions;
exp_peak_masses.resize(break_masses.size(),NEG_INF);
find_inten_ranks(frag_intens, inten_ranks);
rank_positions.resize(inten_ranks.size(),NEG_INF);
int c;
for (c=1; c<inten_ranks.size(); c++)
if (frag_intens[c]>0)
rank_positions[inten_ranks[c]]=c;
// self offset features
mass_t max_self_off=0;
mass_t avg_self_off=0;
int num_frags_detected=0;
int b;
for (b=1; b<=max_break; b++)
{
exp_peak_masses[b]=fragment.calc_expected_mass(break_masses[b],pm_with_19);
if (frag_intens[b]<=0)
continue;
const float& frag_mass = frag_masses[b];
const float& peak_mass = exp_peak_masses[b];
mass_t offset=fabs(frag_masses[b]-exp_peak_masses[b]);
if (offset>3.0)
{
cout << "Error: bad peak offset calculations!" << endl;
exit(1);
}
avg_self_off+=offset;
if (offset>max_self_off)
max_self_off=offset;
num_frags_detected++;
}
rbs.add_real_feature(f_idx++,num_frags_detected);
if (num_frags_detected==0)
{
f_idx+=7;
continue;
}
rbs.add_real_feature(f_idx++,max_self_off);
if (num_frags_detected>0)
rbs.add_real_feature(f_idx,(avg_self_off/num_frags_detected));
f_idx++;
if (num_frags_detected<5)
{
f_idx+=4;
continue;
}
// between peak offsets
int num_gaps=0;
mass_t max_consec_offset=0;
mass_t avg_consec_offset=0;
for (b=1; b<exp_peak_masses.size(); b++)
if (frag_intens[b]>0)
break;
int prev=b++;
bool in_gap=false;
int num_gap_offsets=0;
for ( ; b<exp_peak_masses.size(); b++)
if (frag_intens[b]<=0)
{
if (! in_gap)
{
num_gaps++;
in_gap=true;
}
}
else
{
in_gap=false;
const mass_t offset=fabs(exp_peak_masses[b] -
exp_peak_masses[prev] -
frag_masses[b] +
frag_masses[prev]);
if (fabs(offset)>3.0)
{
cout << "Exp " << b << " " << exp_peak_masses[b] << endl;
cout << "Exp " << prev << " " << exp_peak_masses[prev] << endl;
cout << "Frag " << b << " " << frag_masses[b] << endl;
cout << "Frag " << prev << " " << frag_masses[prev] << endl;
exit(0);
}
if (offset>max_consec_offset)
max_consec_offset=offset;
avg_consec_offset+=offset;
num_gap_offsets++;
}
rbs.add_real_feature(f_idx++,max_consec_offset);
if (num_gap_offsets>0)
rbs.add_real_feature(f_idx,avg_consec_offset/num_gap_offsets);
f_idx++;
// Peak grab features
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -