📄 denovopartmodel.cpp
字号:
{
int cut_idx;
for (cut_idx=1; cut_idx<prediction_ranks.size(); cut_idx++)
if (prediction_ranks[cut_idx]==pred_rank)
break;
if (cut_idx==prediction_ranks.size())
break;
const int obs_rank = inten_ranks[cut_idx];
if (frag_intens[cut_idx]>0)
score_offsets[pred_rank]=predicted_scores[cut_idx]-sorted_predicted_scores[obs_rank];
// cout << pred_rank << "\t" << cut_idx << "\t" << score_offsets[pred_rank] << endl;
}
for (i=0; i<10 && i<num_obs_frags; i++)
rbs.add_real_feature(f_idx+i,score_offsets[i]);
f_idx+=10;
}
// ppp.print_ranks_vs_intens(intens);
vector<score_pair> pairs;
if (num_ppp_frags>1)
{
const int frag1_idx = ppp_frag_type_idxs[0];
const int frag2_idx = ppp_frag_type_idxs[1];
int i;
int num_comp_pairs=0;
const int min_size = (intens[frag1_idx].size()>intens[frag2_idx].size() ? intens[frag2_idx].size() :
intens[frag1_idx].size());
for (i=1; i<min_size; i++)
{
const float inten1 = intens[frag1_idx][i];
const float inten2 = intens[frag2_idx][i];
const int pred1 = ppp_prediction_ranks[0][i];
const int pred2 = ppp_prediction_ranks[1][i];
if (inten1>0 && inten2>0)
num_comp_pairs++;
if (inten1>=0 && inten2>=0 && pred1<POS_INF && pred2<POS_INF)
pairs.push_back(score_pair(i,pred1+pred2));
}
rbs.add_real_feature(f_idx, (float)num_comp_pairs);
sort(pairs.begin(),pairs.end());
int counter=0;
for (i=pairs.size()-1; i>=0; i--)
{
if (counter++==7)
break;
const int cut_idx = pairs[i].idx;
int stat=0;
if (intens[frag1_idx][cut_idx]>0)
stat++;
if (intens[frag2_idx][cut_idx]>0)
stat++;
rbs.add_real_feature(f_idx+counter, (float)stat);
}
}
f_idx += 8;
}
void DeNovoPartitionModel::fill_prm_features(const PeptideSolution& sol,
const SeqPath& path,
int model_type,
RankBoostSample& rbs) const
{
int f_idx = prm_start_idx;
const PrmGraph *prm = path.prm_ptr;
const Config *config = prm->get_config();
const vector<PathPos>& positions = path.positions;
const int num_aas = path.get_num_aa();
vector<int> amino_acids;
path.get_amino_acids(amino_acids);
if (! prm)
{
cout << "Error: SeqPath has not prm ptr!" << endl;
exit(1);
}
int i;
const vector<mass_t>& aa2mass = config->get_aa2mass();
mass_t pep_mass = 0;
for (i=0; i<num_aas; i++)
pep_mass += aa2mass[amino_acids[i]];
const mass_t delta = (positions[positions.size()-1].mass - positions[0].mass) - pep_mass;
if (fabs(delta)>20)
{
int i;
for (i=0; i<positions.size(); i++)
{
cout << "Error: mismatch between peptide and Seqpath: " << i << "\t" <<
positions[i].node_idx << " " << positions[i].mass << endl;
}
exit(0);
}
vector<float> breakage_scores;
breakage_scores.clear();
for (i=0; i<positions.size(); i++)
{
const int node_idx = positions[i].node_idx;
if (node_idx>=0)
{
const Node& node = prm->get_node(node_idx);
if (node.type != NODE_N_TERM && node.type != NODE_C_TERM)
breakage_scores.push_back(positions[i].breakage->score);
}
}
float min_consec_two_breaks = POS_INF;
float min_consec_three_breaks = POS_INF;
float max_consec_two_breaks = NEG_INF;
float max_consec_three_breaks = NEG_INF;
float total_breakage_score = NEG_INF;
if (breakage_scores.size()>1)
{
total_breakage_score=0;
for (i=0; i<breakage_scores.size()-1; i++)
{
const float sum_score = breakage_scores[i]+breakage_scores[i+1];
if (sum_score<min_consec_two_breaks)
min_consec_two_breaks = sum_score;
if (sum_score>max_consec_two_breaks)
max_consec_two_breaks = sum_score;
if (i>0)
{
const float sum_three= sum_score + breakage_scores[i-1];
if (sum_three<min_consec_three_breaks)
min_consec_three_breaks = sum_three;
if (sum_three>max_consec_three_breaks)
max_consec_three_breaks = sum_three;
}
total_breakage_score+=breakage_scores[i];
}
total_breakage_score+=breakage_scores[i];
}
int aa_start=0;
if (positions[0].node_idx==0 && positions[0].edge_idx<0)
while (aa_start<positions.size() && positions[aa_start].node_idx<0)
aa_start++;
int aa_end = positions.size()-2;
while (aa_end>aa_start && positions[aa_end].node_idx<0)
aa_end--;
int eff_num_aas = (aa_end-aa_start);
if (sol.reaches_c_terminal)
eff_num_aas++;
if (eff_num_aas<6)
eff_num_aas=6;
const int num_breakage_scores = breakage_scores.size();
const int delta_num_breakages = eff_num_aas-breakage_scores.size();
// int num_missing_edges=0;
// for (i=0; i<positions.size()-1; i++)
// if (positions[i].node_idx>=0 && positions[i].edge_idx<0)
// num_missing_edges++;
sort(breakage_scores.begin(),breakage_scores.end());
// add features
int combo_idx=0;
if (sol.reaches_n_terminal && ! sol.reaches_c_terminal)
{
combo_idx=1;
}
else if (! sol.reaches_n_terminal && sol.reaches_c_terminal)
{
combo_idx=2;
}
else if (! sol.reaches_n_terminal && ! sol.reaches_c_terminal)
combo_idx = 3;
const int idx_shift = ( (model_type == 1 || model_type == 3) ? combo_idx * 6 : 0);
int sfidx = f_idx + idx_shift;
rbs.add_real_feature(sfidx, delta);
if (total_breakage_score>NEG_INF)
rbs.add_real_feature(sfidx+1, total_breakage_score);
if (num_breakage_scores>0 && total_breakage_score>NEG_INF)
rbs.add_real_feature(sfidx+2, total_breakage_score/num_breakage_scores);
if (eff_num_aas>0 && total_breakage_score>NEG_INF)
rbs.add_real_feature(sfidx+3, total_breakage_score/eff_num_aas);
rbs.add_real_feature(sfidx+4, path.path_score);
if (eff_num_aas>0)
rbs.add_real_feature(sfidx+5, path.path_score/eff_num_aas);
f_idx+= ((model_type == 1 || model_type == 3) ? 24 : 6);
if (model_type == 1 || model_type == 3)
{
int path_rank = path.org_rank;
if (model_type == 1 && path_rank > 200)
path_rank = 200;
if (model_type == 3 && path_rank> 75)
path_rank = 75;
rbs.add_real_feature(f_idx++, total_breakage_score);
rbs.add_real_feature(f_idx++, path.path_score);
rbs.add_real_feature(f_idx++, path_rank);
rbs.add_real_feature(f_idx++, path.multi_path_score);
if (path.delta_score>=0)
{
rbs.add_real_feature(f_idx,path.delta_score);
if (path.delta_score<=1.5)
{
rbs.add_real_feature(f_idx+1,path_rank);
}
else if (path.delta_score<=7.5)
{
rbs.add_real_feature(f_idx+2,path_rank);
}
else if (path.delta_score<=15.0)
{
rbs.add_real_feature(f_idx+3,path_rank);
}
else
rbs.add_real_feature(f_idx+4,path_rank);
}
f_idx+=5;
if (model_type == 3)
{
if (path.tag_percent_top_5>0)
rbs.add_real_feature(f_idx,path.tag_percent_top_5);
if (path.tag_percent_top_20>0)
rbs.add_real_feature(f_idx+1,path.tag_percent_top_20);
if (path.tag_percent_all>0)
rbs.add_real_feature(f_idx+2,path.tag_percent_all);
if (path.tag_percent_top_5>0)
{
rbs.add_real_feature(f_idx+3,path.org_rank);
}
else if (path.tag_percent_top_20>0)
{
rbs.add_real_feature(f_idx+4,path.org_rank);
}
else if (path.tag_percent_all>0)
{
rbs.add_real_feature(f_idx+5,path.org_rank);
}
if (path.multi_path_rank<POS_INF)
rbs.add_real_feature(f_idx+5,path.multi_path_rank);
f_idx+=7;
}
}
rbs.add_real_feature(f_idx++,delta_num_breakages);
rbs.add_real_feature(f_idx++,path.num_forbidden_nodes);
rbs.add_real_feature(f_idx++,num_breakage_scores);
if (num_breakage_scores>0)
rbs.add_real_feature(f_idx,breakage_scores[0]);
f_idx++;
if (num_breakage_scores>2)
rbs.add_real_feature(f_idx,breakage_scores[1]);
f_idx++;
if (num_breakage_scores>4)
rbs.add_real_feature(f_idx,breakage_scores[2]);
f_idx++;
if (min_consec_three_breaks<POS_INF)
rbs.add_real_feature(f_idx,min_consec_three_breaks);
f_idx++;
if (max_consec_three_breaks>NEG_INF)
rbs.add_real_feature(f_idx,max_consec_three_breaks);
f_idx++;
if (min_consec_two_breaks<POS_INF)
rbs.add_real_feature(f_idx,min_consec_two_breaks);
f_idx++;
if (max_consec_two_breaks>NEG_INF)
rbs.add_real_feature(f_idx,max_consec_two_breaks);
f_idx++;
int num1=0,num2=0,num3=0,num4=0,num5=0;
for (i=0; i<breakage_scores.size(); i++)
{
const float& score = breakage_scores[i];
if (score<-10)
{
num1++;
}
else if (score<0)
{
num2++;
}
else if (score<8)
{
num3++;
}
else if (score<15)
{
num4++;
}
else
num5++;
}
rbs.add_real_feature(f_idx++,num1);
rbs.add_real_feature(f_idx++,num2);
rbs.add_real_feature(f_idx++,num3);
rbs.add_real_feature(f_idx++,num4);
rbs.add_real_feature(f_idx++,num5);
if (num_breakage_scores>0)
{
rbs.add_real_feature(f_idx++,(float)num1/(float)num_breakage_scores);
rbs.add_real_feature(f_idx++,(float)(num1+num2)/(float)num_breakage_scores);
rbs.add_real_feature(f_idx++,(float)(num3+num4+num5)/(float)num_breakage_scores);
rbs.add_real_feature(f_idx++,(float)(num4+num5)/(float)num_breakage_scores);
}
else
f_idx+=4;
if (sol.reaches_n_terminal && positions[0].edge_idx>=0 && num_breakage_scores>2)
{
int i=1;
while (positions[i].node_idx<0)
i++;
rbs.add_real_feature(f_idx,positions[i].node_score);
}
f_idx++;
if (sol.reaches_c_terminal && num_breakage_scores>2)
{
int i=positions.size()-2;
while (positions[i].node_idx<0)
i++;
if (positions[i].edge_idx>=0)
{
rbs.add_real_feature(f_idx,positions[i].node_score);
}
}
f_idx++;
static vector<int> frag_idx_oris;
if (frag_idx_oris.size() == 0)
{
const vector<FragmentType>& all_frags = config->get_all_fragments();
frag_idx_oris.resize(all_frags.size(),NEG_INF);
int i;
for (i=0; i<all_frags.size(); i++)
frag_idx_oris[i]=all_frags[i].orientation;
}
int num_dual_ori=0;
int num_with1=0, num_with2=0, num_with_alot=0;
vector<int> oris;
for (i=0; i<positions.size(); i++)
{
const Breakage *breakage = positions[i].breakage;
if (! breakage)
continue;
const int num_frags = breakage->fragments.size();
if (num_frags ==1)
{
num_with1++;
}
else if (num_frags == 2)
{
num_with2++;
}
else if (num_frags > 5)
{
num_with_alot++;
}
int num_pre=0,num_suf=0;
int j;
for (j=0; j<breakage->fragments.size(); j++)
{
const int ori = frag_idx_oris[breakage->fragments[j].frag_type_idx];
if (ori==PREFIX)
{
num_pre++;
}
else
num_suf++;
}
if (num_pre == 0 || num_suf - num_pre > 5)
{
oris.push_back(SUFFIX);
}
else if (num_suf == 0 || num_pre - num_suf>5)
{
oris.push_back(PREFIX);
}
else
oris.push_back(99);
if (num_pre>0 && num_suf>0)
num_dual_ori++;
}
int prev=99;
int switches=0;
for (i=0; i<oris.size(); i++)
{
if (oris[i] != 99)
{
if (prev != 99 && prev != oris[i])
switches++;
prev=oris[i];
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -