📄 fragmentselection.cpp
字号:
const int charge = as.get_charge();
const int size_idx = as.get_size_idx();
const vector<Breakage>& breakages = as.get_breakages();
const mass_t min_mass = as.get_min_peak_mass() ;
const mass_t max_mass = as.get_max_peak_mass() ;
vector< vector<int> > frag_ind;
frag_ind.clear();
frag_ind.resize(num_regions[size_idx]);
for (j=0; j<num_regions[size_idx]; j++)
frag_ind[j].resize(all_fragments.size(),0);
for (j=0; j<breakages.size(); j++)
{
const Breakage& breakage = breakages[j];
int f;
if (very_verbose)
cout << " " << breakage.fragments.size();
// cout << j << " (" << breakage.breakage_region << ") ";
for (f=0; f<breakage.fragments.size(); f++)
{
if (breakage.fragments[f].mass>0)
{
// cout << all_fragments[breakage.fragments[f].frag_type_idx].frag_label
// << " ";
frag_probs[size_idx][breakage.region_idx]
[breakage.fragments[f].frag_type_idx]++;
frag_ind[breakage.region_idx][breakage.fragments[f].frag_type_idx]=1;
}
}
for (f=0; f<all_fragments.size(); f++)
{
mass_t exp_mass = all_fragments[f].calc_expected_mass(breakage.mass,as.get_true_mass_with_19());
if (exp_mass >= min_mass && exp_mass <= max_mass)
in_range_counts[size_idx][breakage.region_idx][f]++;
}
}
if (very_verbose)
cout << endl;
int f;
for (f=0; f<all_fragments.size(); f++)
{
int r;
for (r=0; r<num_regions[size_idx]; r++)
spec_counts[size_idx][r][f]+= frag_ind[r][f];
}
avg_rand += (as.get_num_peaks() * config->get_tolerance() *2.0) / (max_mass - min_mass) ;
}
const double num_spectra = fs.get_total_spectra();
avg_rand /= num_spectra;
int s;
for (s=0; s<frag_probs.size(); s++)
{
int r;
for (r=0; r<frag_probs[s].size(); r++)
{
int f;
for (f=0; f<frag_probs[s][r].size(); f++)
if (frag_probs[s][r][f]>0)
{
// cout << frag_probs[c][s][r][f] << " " <<
// in_range_counts[c][s][r][f] << endl;
frag_probs[s][r][f] /= in_range_counts[s][r][f];
}
}
}
}
struct frag_per {
bool operator< (const frag_per& other) const
{
return (percent > other.percent);
}
int frag_idx;
double percent;
int in_range_count;
};
/********************************************************************
Determines which fragments should belong in a regional fragment set.
These are the actual fragments that get scored when a breakage falls in
that regions.
*********************************************************************/
void select_regional_fragments(const FileManager& fm, Config *config, int charge,
bool verbose)
{
vector< vector< vector<double> > > frag_probs, in_range_counts;
vector< vector< vector<int> > > spec_counts;
double avg_rand;
int num_files_used;
collect_probs_of_known_fragments(fm, config, frag_probs, in_range_counts, spec_counts,
avg_rand, num_files_used, charge, verbose);
const double min_num_in_range = 0.05 * num_files_used;
int size_idx;
for (size_idx=0; size_idx < frag_probs.size(); size_idx++)
{
int region_idx;
for (region_idx=0; region_idx<frag_probs[size_idx].size(); region_idx++)
{
if (verbose)
{
cout << "Charge " << charge << ", Size " << size_idx <<", Region " <<
region_idx << endl;
cout << "Random " << fixed << setprecision(4) << avg_rand << endl;
}
const vector<int>& frag_type_idxs = config->get_regional_fragment_type_idxs(charge,
size_idx,region_idx);
vector<score_t> probs;
probs.resize(frag_type_idxs.size(),0);
int f;
cout << "FRAG_TYPES::: " << frag_type_idxs.size() << endl;
for (f=0; f<frag_type_idxs.size(); f++)
{
const int frag_type_idx = frag_type_idxs[f];
if (in_range_counts[size_idx][region_idx][frag_type_idx]>=min_num_in_range)
{
probs[f] = frag_probs[size_idx][region_idx][frag_type_idx];
}
}
config->sort_accoriding_to_fragment_probs(probs,charge,size_idx,region_idx);
config->set_regional_random_probability(charge,size_idx,region_idx,(float)avg_rand);
if (verbose)
{
const vector<score_t>& frag_probs = config->get_regional_fragments(charge,size_idx,region_idx).get_frag_probs();
for (f=0; f<frag_type_idxs.size(); f++)
{
cout << setw(12) << left << config->get_fragment(frag_type_idxs[f]).label <<
setprecision(5) << setw(10) << config->get_fragment(frag_type_idxs[f]).offset <<
" " << fixed << setprecision(5) << frag_probs[f] <<
" " << setprecision(0) << in_range_counts[size_idx][region_idx][frag_type_idxs[f]]
<< " ( " << spec_counts[size_idx][region_idx][frag_type_idxs[f]] << ")"<< endl;
}
cout << endl;
}
}
}
}
struct combo_pair {
bool operator< (const combo_pair& other) const
{
return count>other.count;
}
int count, f_idx1, f_idx2;
};
// chooses for each regional model the pair of fragments that identify
// the largest number of cuts not already found using strong fragments
void select_frag_combos(const FileManager& fm,
Config *config,
int charge,
int max_num_combos)
{
const vector< vector< RegionalFragments> > & regional_fragment_sets =
config->get_regional_fragment_sets()[charge];
const vector<FragmentType>& all_fragments = config->get_all_fragments();
const int num_all_frags = all_fragments.size();
if (charge<1)
{
cout << "Error: charge must be > 1" << endl;
exit(1);
}
int j;
for (j=0; j<regional_fragment_sets.size(); j++)
{
int k;
for (k=0; k<regional_fragment_sets[j].size(); k++)
config->clear_combos(charge,j,k);
}
int c; // combo counter
for (c=0; c<max_num_combos && c<6; c++)
{
FileSet fs;
fs.select_files(fm,0,10000,-1,1,charge);
vector< vector< vector< vector< int > > > > counts;
counts.resize(regional_fragment_sets.size());
int j;
for (j=0; j<regional_fragment_sets.size(); j++)
{
int k;
counts[j].resize(regional_fragment_sets[j].size());
for (k=0; k<regional_fragment_sets[j].size(); k++)
{
int f;
counts[j][k].resize(num_all_frags);
for (f=0; f<num_all_frags; f++)
counts[j][k][f].resize(num_all_frags,0);
}
}
cout << endl << "Round " << c+1 << endl << endl;
// check breakage instances
while(1)
{
int j;
AnnotatedSpectrum as;
if (! fs.get_next_spectrum(fm,config,&as))
break;
as.annotate_spectrum(as.get_true_mass_with_19());
const int size_idx = as.get_size_idx();
const vector<Breakage>& breakages = as.get_breakages();
const vector<Peak>& peaks = as.get_peaks();
const vector<int>& strong_peak_idxs = as.get_strong_peak_idxs();
vector<int> strong_ind;
strong_ind.resize(peaks.size(),0);
for (j=0; j<strong_peak_idxs.size(); j++)
strong_ind[strong_peak_idxs[j]]=1;
for (j=0; j<breakages.size(); j++)
{
const Breakage& breakage = breakages[j];
if (breakage.num_frags_detected<2)
continue;
// check if breakage is found by a strong frag
const int region_idx = breakage.region_idx;
const RegionalFragments& rf = regional_fragment_sets[size_idx][region_idx];
int f;
for (f=0; f<breakage.fragments.size(); f++)
if (rf.is_a_strong_frag_type(breakage.fragments[f].frag_type_idx) &&
strong_ind[breakage.fragments[f].peak_idx])
break;
if (f<breakage.fragments.size())
continue;
// check if breakage is found by an existing combo
const vector<FragmentCombo>& combos = rf.get_frag_type_combos();
bool found_by_combo = false;
if (combos.size()>0)
{
for (f=0; f<combos.size(); f++)
{
int i;
for (i=0; i<combos[f].frag_inten_idxs.size(); i++)
if (breakage.get_position_of_frag_idx(combos[f].frag_inten_idxs[i])<0)
break;
if (i<combos[f].frag_inten_idxs.size())
continue;
if (combos[f].frag_no_inten_idxs.size() == 0)
{
found_by_combo = true;
break;
}
for (i=0; i<combos[f].frag_no_inten_idxs.size(); i++)
if (breakage.get_position_of_frag_idx(combos[f].frag_no_inten_idxs[i])>0)
break;
if (i<combos[f].frag_inten_idxs.size())
continue;
found_by_combo = true;
break;
}
}
if (found_by_combo)
continue;
// add to count for every pair
for (f=0; f<breakage.fragments.size()-1; f++)
{
int g;
for (g=f+1; g<breakage.fragments.size(); g++)
{
int f_idx = breakage.fragments[f].frag_type_idx;
int g_idx = breakage.fragments[g].frag_type_idx;
if (! rf.is_a_strong_frag_type(f_idx) &&
! rf.is_a_strong_frag_type(g_idx) )
continue;
if (f_idx < g_idx)
{
counts[size_idx][region_idx][f_idx][g_idx]++;
// cout << size_idx << " " << region_idx << " " << f_idx << " " << g_idx <<
// " >> " << counts[size_idx][region_idx][f_idx][g_idx] << endl;
}
else
{
counts[size_idx][region_idx][g_idx][f_idx]++;
// cout << size_idx << " " << region_idx << " " << g_idx << " " << f_idx <<
// " >> " << counts[size_idx][region_idx][g_idx][f_idx] << endl;
}
}
}
}
}
for (j=0; j<regional_fragment_sets.size(); j++)
{
int k;
for (k=0; k<regional_fragment_sets[j].size(); k++)
{
vector<combo_pair> pairs;
pairs.clear();
int f,g;
for (f=0; f<num_all_frags-1; f++)
for (g=f+1; g<num_all_frags; g++)
if ( counts[j][k][f][g]>0)
{
combo_pair p;
p.count = counts[j][k][f][g];
p.f_idx1 = f;
p.f_idx2 = g;
pairs.push_back(p);
}
sort(pairs.begin(),pairs.end());
if (pairs.size() == 0)
continue;
const RegionalFragments& rf = regional_fragment_sets[j][k];
FragmentCombo combo;
if (rf.is_a_strong_frag_type(pairs[0].f_idx1))
{
combo.frag_inten_idxs.push_back(pairs[0].f_idx1);
combo.frag_inten_idxs.push_back(pairs[0].f_idx2);
}
else
{
combo.frag_inten_idxs.push_back(pairs[0].f_idx2);
combo.frag_inten_idxs.push_back(pairs[0].f_idx1);
}
config->get_non_const_regional_fragments(charge,j,k).add_combo(combo);
/* int i;
for (i=0; i<pairs.size(); i++)
cout << pairs[i].count << " " << config->get_fragment(pairs[i].f_idx1).label
<< " " << config->get_fragment(pairs[i].f_idx2).label << endl;
cout << endl;*/
}
}
}
}
struct f_pair {
bool operator< (const f_pair& other) const
{
return (prob>other.prob);
}
int idx;
score_t prob;
};
void explore_fragment_set(FileManager& fm, Config *config)
{
FileSet fs;
vector<FragmentType> all_frags = config->get_all_fragments();
const int num_frags = all_frags.size();
const mass_t tolerance = 0.0075;
vector<double> exp_counts, obs_counts, spec_counts;
exp_counts.resize(num_frags,0);
obs_counts.resize(num_frags,0);
spec_counts.resize(num_frags,0);
fs.select_all_files(fm);
while (1)
{
Spectrum s;
if (! fs.get_next_spectrum(fm,config,&s))
break;
vector<mass_t> break_masses;
s.get_peptide().calc_expected_breakage_masses(config,break_masses);
const int num_peaks = s.get_num_peaks();
vector<int> used_peak_ind;
vector<int> frag_ind;
used_peak_ind.resize(num_peaks,0);
frag_ind.resize(num_frags,0);
const mass_t min_mass = s.get_min_peak_mass() - tolerance;
const mass_t max_mass = s.get_max_peak_mass() + tolerance;
const mass_t true_mass_with_19 = s.get_true_mass_with_19();
// annotate peaks according to their order
int i;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -