📄 denovopartmodel.cpp
字号:
vector<mass_t> offset_diffs;
offset_diffs.clear();
for (b=1; b<max_break-2; b++)
{
if (frag_intens[b]<=0)
continue;
if (frag_intens[b+1]>0 && frag_intens[b+2]>0)
{
const mass_t offset1 = fabs(exp_peak_masses[b+2]-exp_peak_masses[b]-frag_masses[b+2]+frag_masses[b]);
const mass_t offset2 = fabs(exp_peak_masses[b+1]-exp_peak_masses[b]-frag_masses[b+1]+frag_masses[b]);
const mass_t offset3 = fabs(exp_peak_masses[b+2]-exp_peak_masses[b+1]-frag_masses[b+2]+frag_masses[b+1]);
offset_diffs.push_back(fabs(offset1-offset2-offset3));
}
if (frag_intens[b+1]>0 && frag_intens[b+2]<=0 && frag_intens[b+3]>0)
{
const mass_t offset1 = fabs(exp_peak_masses[b+3]-exp_peak_masses[b]-frag_masses[b+3]+frag_masses[b]);
const mass_t offset2 = fabs(exp_peak_masses[b+1]-exp_peak_masses[b]-frag_masses[b+1]+frag_masses[b]);
const mass_t offset3 = fabs(exp_peak_masses[b+3]-exp_peak_masses[b+1]-frag_masses[b+3]+frag_masses[b+1]);
offset_diffs.push_back(fabs(offset1-offset2-offset3));
}
if (frag_intens[b+1]<=0 && frag_intens[b+2]>0 && frag_intens[b+3]>0)
{
const mass_t offset1 = fabs(exp_peak_masses[b+3]-exp_peak_masses[b]-frag_masses[b+3]+frag_masses[b]);
const mass_t offset2 = fabs(exp_peak_masses[b+2]-exp_peak_masses[b]-frag_masses[b+2]+frag_masses[b]);
const mass_t offset3 = fabs(exp_peak_masses[b+3]-exp_peak_masses[b+2]-frag_masses[b+3]+frag_masses[b+2]);
offset_diffs.push_back(fabs(offset1-offset2-offset3));
}
}
sort(offset_diffs.begin(),offset_diffs.end());
int i,counter=0;
for (i=offset_diffs.size()-1; i>=0; i--)
{
if (offset_diffs[i]==0)
break;
rbs.add_real_feature(f_idx+counter,offset_diffs[i]);
if (++counter==3)
break;
}
f_idx+=3;
}
}
void DeNovoPartitionModel::fill_ann_peak_features(const PeptideSolution& sol,
const vector< vector<mass_t> >& masses,
const vector< vector<intensity_t> >& intens,
const AnnotatedSpectrum& as,
RankBoostSample& rbs) const
{
int f_idx = ann_peak_start_idx;
const intensity_t total_inten = as.get_total_original_intensity();
intensity_t ann_inten=0;
int num_ann_inten=0;
vector<int> frag_counts;
frag_counts.resize(intens.size(),0);
int f;
for (f=0; f<intens.size(); f++)
{
const int num_intens = intens[f].size();
int c;
for (c=1; c<num_intens; c++)
{
const intensity_t& frag_inten = intens[f][c];
if (frag_inten>0)
{
ann_inten += frag_inten;
frag_counts[f]++;
}
}
num_ann_inten+=frag_counts[f];
}
int length = sol.pep.get_num_aas();
if (length<7)
length=7;
rbs.add_real_feature(f_idx++,sol.pm_with_19-as.get_org_pm_with_19());
rbs.add_real_feature(f_idx++,length);
if (total_inten>0)
rbs.add_real_feature(f_idx,(float)ann_inten/total_inten);
f_idx++;
if (as.get_num_peaks()>0)
rbs.add_real_feature(f_idx,(float)num_ann_inten/as.get_num_peaks());
f_idx++;
const vector<Peak>& peaks = as.get_peaks();
const vector< vector<PeakAnnotation> >& peak_anns = as.get_peak_annotations();
int count25=0;
int count_half=0;
int count_top_third=0;
int count_mid_third=0;
int count_last_third=0;
const int half_num_peaks = (peaks.size()/2);
const int third_num_peaks = (peaks.size()/3);
const int two_third_num_peaks = third_num_peaks*2;
int i;
for (i=0; i<peaks.size(); i++)
{
if (peak_anns[i].size()==0)
continue;
const int rank = peaks[i].rank;
if (rank>=two_third_num_peaks)
{
count_last_third++;
continue;
}
if (rank>=third_num_peaks)
{
count_mid_third++;
}
else
count_top_third++;
if (rank<half_num_peaks)
count_half++;
if (rank<25)
count25++;
}
rbs.add_real_feature(f_idx++,(float)count25);
rbs.add_real_feature(f_idx++,(float)count_half);
rbs.add_real_feature(f_idx++,(float)(count_top_third-count_mid_third));
rbs.add_real_feature(f_idx++,(float)(count_top_third-count_last_third));
rbs.add_real_feature(f_idx++,(float)(count_mid_third-count_last_third));
const int num_frags = as.get_config()->get_all_fragments().size();
int max_f = 7;
if (num_frags<max_f)
max_f = num_frags;
if (intens.size()<max_f)
max_f=intens.size();
for (f=0; f<max_f; f++)
{
rbs.add_real_feature(f_idx+f,(float)frag_counts[f]);
}
f_idx+=7;
}
void DeNovoPartitionModel::fill_inten_balance_features(Config *config,
const PeptideSolution& sol,
const SeqPath& path,
RankBoostSample& rbs) const
{
int f_idx = inten_balance_start_idx;
const vector<int>& amino_acids = sol.pep.get_amino_acids();
const vector<PathPos>& positions = path.positions;
int n_idx = 0;
while (n_idx<positions.size() &&
( ! positions[n_idx].breakage || positions[n_idx].breakage->fragments.size() == 0))
n_idx++;
int c_idx = positions.size()-1;
while (c_idx>0 &&
( ! positions[c_idx].breakage || positions[c_idx].breakage->fragments.size() == 0))
c_idx--;
const int nc_diff = c_idx-n_idx;
rbs.add_real_feature(f_idx++,(float)nc_diff);
if (nc_diff<4)
return;
vector<int> pos_idxs;
const int mid_idx = (n_idx + c_idx)/2;
int i;
for (i=mid_idx-2; i<=mid_idx+2; i++)
if (positions[i].node_idx>0)
pos_idxs.push_back(i);
intensity_t pre_inten=0;
intensity_t suf_inten=0;
for (i=0; i<pos_idxs.size(); i++)
{
Breakage *breakage = positions[pos_idxs[i]].breakage;
int j;
for (j=0; j<breakage->fragments.size(); j++)
{
const BreakageFragment& bf = breakage->fragments[j];
const FragmentType& frag = config->get_fragment(bf.frag_type_idx);
if (frag.orientation == PREFIX)
{
pre_inten += bf.intensity;
}
else
suf_inten += bf.intensity;
}
}
const intensity_t sum_inten = pre_inten + suf_inten;
if (sum_inten<=0)
return;
const float pre_ratio = pre_inten/sum_inten;
intensity_t all_pre_inten=0;
intensity_t all_suf_inten=0;
for (i=0; i<positions.size(); i++)
{
Breakage *breakage = positions[i].breakage;
if (! breakage)
continue;
int j;
for (j=0; j<breakage->fragments.size(); j++)
{
const BreakageFragment& bf = breakage->fragments[j];
const FragmentType& frag = config->get_fragment(bf.frag_type_idx);
if (frag.orientation == PREFIX)
{
all_pre_inten += bf.intensity;
}
else
all_suf_inten += bf.intensity;
}
}
const intensity_t all_sum_inten = all_pre_inten + all_suf_inten;
if (all_sum_inten<=0)
return;
const float all_pre_ratio = all_pre_inten/all_sum_inten;
// special N C side aa indicators
int num_nH=0, num_cH=0;
int num_nK=0, num_cK=0;
int num_nR=0, num_cR=0;
for (i=0; i<=mid_idx; i++)
{
if (amino_acids[i] == His)
num_nH++;
if (amino_acids[i] == Lys)
num_nK++;
if (amino_acids[i] == Arg)
num_nR++;
}
// uses regular amino acid codes
if (sol.most_basic_aa_removed_from_n>0)
{
if (sol.most_basic_aa_removed_from_n == His)
{
num_nH++;
}
else if (sol.most_basic_aa_removed_from_n == Lys)
{
num_nK++;
}
else if (sol.most_basic_aa_removed_from_n == Arg)
num_nR++;
}
for (i=mid_idx; i<positions.size()-1; i++)
{
if (amino_acids[i] == His)
num_cH++;
if (amino_acids[i] == Lys)
num_cK++;
if (amino_acids[i] == Arg)
num_cR++;
}
// uses regular amino acid codes
if (sol.most_basic_aa_removed_from_c>0)
{
if (sol.most_basic_aa_removed_from_c == His)
{
num_cH++;
} else if (sol.most_basic_aa_removed_from_c == Lys)
{
num_cK++;
}
else if (sol.most_basic_aa_removed_from_c == Arg)
num_cR++;
}
const int RKH_n_combo_idx = calc_RKH_combo_idx(num_nR,num_nK,num_nH);
const int RKH_c_combo_idx = calc_RKH_combo_idx(num_cR,num_cK,num_cH);
const float RKH_liniar_pair_idx = RKH_pair_matrix[RKH_n_combo_idx][RKH_c_combo_idx];
rbs.add_real_feature(f_idx++,(float)RKH_n_combo_idx);
rbs.add_real_feature(f_idx++,(float)RKH_c_combo_idx);
rbs.add_real_feature(f_idx++,RKH_liniar_pair_idx);
if (RKH_liniar_pair_idx<=-4)
{
rbs.add_real_feature(f_idx,pre_ratio);
rbs.add_real_feature(f_idx+5,all_pre_ratio);
}
else if (RKH_liniar_pair_idx<=-2)
{
rbs.add_real_feature(f_idx+1,pre_ratio);
rbs.add_real_feature(f_idx+6,all_pre_ratio);
}
else if (RKH_liniar_pair_idx<=1)
{
rbs.add_real_feature(f_idx+2,pre_ratio);
rbs.add_real_feature(f_idx+7,all_pre_ratio);
}
else if (RKH_liniar_pair_idx<=3)
{
rbs.add_real_feature(f_idx+3,pre_ratio);
rbs.add_real_feature(f_idx+8,all_pre_ratio);
}
else
{
rbs.add_real_feature(f_idx+4,pre_ratio);
rbs.add_real_feature(f_idx+9,all_pre_ratio);
}
f_idx+=10;
/* feature_names.push_back("INTEN BAL c_idx - n_idx");
feature_names.push_back("INTEN BAL RHK N");
feature_names.push_back("INTEN BAL RHK C");
feature_names.push_back("INTEN BAL RHK pair");
feature_names.push_back("INTEN BAL prefix prop, pair -4,-5");
feature_names.push_back("INTEN BAL prefix prop, pair -2,-3");
feature_names.push_back("INTEN BAL prefix prop, pair -1,0,+1");
feature_names.push_back("INTEN BAL prefix prop, pair +2,+3");
feature_names.push_back("INTEN BAL prefix prop, pair +4,+5");*/
}
void DeNovoPartitionModel::fill_tryp_terminal_features(const PeptideSolution& sol,
const SeqPath& path,
RankBoostSample& rbs) const
{
int f_idx = tryp_terminal_start_idx;
const Peptide& pep = sol.pep;
const int num_aas = pep.get_num_aas();
const vector<int>& amino_acids = pep.get_amino_acids();
int first_aa = amino_acids[0];
int last_aa = amino_acids[num_aas-1];
int aa_before = pep.get_aa_before();
int aa_after = pep.get_aa_after();
int num_bad_tryp=0;
int num_tryp=0;
if (aa_before<Ala || aa_before == Lys || aa_before == Arg || ! sol.reaches_n_terminal)
num_tryp++;
if (sol.type>=0)
{
if (last_aa == Lys || last_aa == Arg || aa_after <Ala)
num_tryp++;
}
else
{
if (last_aa == Lys || last_aa == Arg || ! sol.reaches_c_terminal)
num_tryp++;
}
// count missed tryptic (not including Pro)
int num_missed_tryp=0;
int i;
for (i=0; i<amino_acids.size()-1; i++)
if ((amino_acids[i] == Arg || amino_acids[i] == Lys) && amino_acids[i+1] != Pro)
num_missed_tryp++;
if (! sol.reaches_c_terminal && (last_aa == Lys || last_aa == Arg))
num_missed_tryp++;
rbs.add_real_feature(f_idx++,num_tryp);
rbs.add_real_feature(f_idx++,num_missed_tryp);
if (sol.reaches_c_terminal)
rbs.add_real_feature(f_idx,last_aa);
f_idx++;
if (path.positions.size()>2)
{
PathPos digest_pos = path.positions[path.positions.size()-2];
if (digest_pos.node_idx>0)
{
const int num_frags = digest_pos.breakage->fragments.size();
if (last_aa == Arg)
{
rbs.add_real_feature(f_idx,num_frags);
}
else if (last_aa == Lys)
{
rbs.add_real_feature(f_idx+1,num_frags);
}
else
rbs.add_real_feature(f_idx+2,num_frags);
}
}
f_idx += 3;
if (sol.reaches_n_terminal)
{
if (last_aa == Arg)
{
rbs.add_real_feature(f_idx,first_aa);
}
else if (last_aa == Lys)
{
rbs.add_real_feature(f_idx+1,first_aa);
}
else
rbs.add_real_feature(f_idx+2,first_aa);
}
f_idx+=3;
}
void DeNovoPartitionModel::fill_PTM_peak_features(
Config *config,
const PeptideSolution& sol,
const vector< vector<mass_t> >& masses,
const vector< vector<intensity_t> >& intens,
const AnnotatedSpectrum& as,
RankBoostSample& rbs) const
{
static const int m16_aa_idx = config->get_aa_from_label("M+16");
static const int b_frag_idx = config->get_frag_idx_from_label("b");
static const int y_frag_idx = config->get_frag_idx_from_label("y");
static const mass_t tolerance = (config->get_tolerance()>0.2 ?
config->get_tolerance() * 0.5 : config->get_tolerance());
int f_idx = PTM_peak_start_idx;
// find the most prominant occerences of M+16
if (m16_aa_idx>0)
{
const vector<int>& amino_acids = sol.pep.get_amino_acids();
vector<score_pair> pairs;
pairs.clear();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -