📄 discretepeakmodel.cpp
字号:
#include "DiscretePeakModel.h"
void DiscretePeakModel::clone_charge_model(int source_charge, int target_charge)
{
config.clone_regional_fragment_sets(source_charge, target_charge);
if (regional_models.size()<=target_charge)
regional_models.resize(target_charge+1);
regional_models[target_charge] = regional_models[source_charge];
}
/********************************************************************
Learns all different table models, and writes them to the resource
directory.
*********************************************************************/
void DiscretePeakModel::train_score_model(const char *name, const FileManager& fm,
int charge, int size_idx, int region_idx)
{
int c;
for (c=0; c<=fm.get_max_charge(); c++)
{
if (charge>0 && charge != c)
continue;
if (fm.get_num_spectra(c) < 1)
continue;
learn_parent_dependencies(fm,c);
learn_q_rand(fm,c);
print_report(c,0,1);
}
convert_probs_to_scores();
// write score model
write_score_model(name);
// trains and writes edge models
// edge_model.train_all_edge_models(fm,this);
// this->write_model();
// this->read_model(name);
// edge_model.read_edge_models(&config,(char *)config.get_model_name().c_str());
// trains and writes tag models
// tag_model.train_models(fm,this);
}
// learns the probabilities of randomly observing peaks
void DiscretePeakModel::learn_q_rand(const FileManager& fm, int charge)
{
int i;
FileSet fs;
const vector<FragmentType>& all_fragments = config.get_all_fragments();
const int max_frag_type_idx = config.get_all_fragments().size();
fs.select_files(fm,0,2500,-1,-1,charge);
vector<double> noise_probs;
vector<double> num_files_with_rank_level; // to nake unbaised calculation of q_rand
noise_probs.resize(level_thresholds.size(),0.001);
num_files_with_rank_level.resize(level_thresholds.size()+1,0);
for (i=0; i<fs.get_total_spectra(); i++)
{
AnnotatedSpectrum as;
fs.get_next_spectrum(fm,&config,&as);
this->init_model_for_scoring_spectrum(&as);
int spec_rank = this->get_lowest_level_in_spectrum(&as);
int j;
for (j=0; j<=spec_rank; j++)
num_files_with_rank_level[j]++;
as.annotate_spectrum(as.get_true_mass_with_19());
mass_t max_peak_mass = as.get_max_peak_mass();
if (max_peak_mass > as.get_org_pm_with_19())
max_peak_mass = as.get_org_pm_with_19();
mass_t peak_area = max_peak_mass - as.get_min_peak_mass();
// add annotations to the peak statistics only from the breakages 1 to n-1
double bin_prob = config.get_pm_tolerance()*2 / peak_area;
int b;
vector<Breakage>& breakages = as.get_non_const_breakages();
const vector<Peak>& peaks = as.get_peaks();
vector<bool> used_peaks;
used_peaks.resize(peaks.size(),false);
for (b=1; b<breakages.size()-1; b++)
{
this->set_breakage_peak_levels(&breakages[b]);
int f;
for (f=0; f<breakages[b].fragments.size(); f++)
{
const int f_idx = breakages[b].fragments[f].frag_type_idx;
const int p_idx = breakages[b].fragments[f].peak_idx;
used_peaks[p_idx]=true;
}
}
// add unused peaks to noise statistics
const vector< vector<PeakAnnotation> >& peak_annotations = as.get_peak_annotations();
int p_idx;
for (p_idx =0; p_idx<peaks.size(); p_idx++)
{
if (! used_peaks[p_idx] && peak_annotations[p_idx].size() == 0)
{
noise_probs[get_peak_level(p_idx)]+=bin_prob;
}
}
}
double inten_prob = 0;
for (i=1; i<noise_probs.size(); i++)
{
if (num_files_with_rank_level[i]>0)
{
noise_probs[i] /= num_files_with_rank_level[i];
inten_prob += noise_probs[i];
}
}
noise_probs[0] = 1.0 - inten_prob;
// correct frag_probs to acocunt for noise (some of the annotated peaks might be noise)
// calculate q values
q_rand = noise_probs;
}
void DiscretePeakModel::print_frag_probs(const vector<int>& frag_type_idxs,
int charge, int size, int region, ostream& os) const
{
const vector<FragmentType>& all_fragments = config.get_all_fragments();
int f;
int len=15;
os << "Rank " << "q_rand ";
for (f=0; f<frag_type_idxs.size(); f++)
{
int f_idx = frag_type_idxs[f];
int sl = (len - all_fragments[f_idx].label.length())/2;
os << setw(sl) << left << " " << setw(len -sl) << left << all_fragments[f_idx].label;
}
os << endl;
os << " ";
for (f=0; f<frag_type_idxs.size(); f++)
os << " prob score ";
os<< endl;
// regional_models[2][0][1].independent_frag_tables[0].
// const vector< vector<double> >& f_probs = regional_models[charge][size][region].independent_frag_tables;
vector<double> totals;
totals.resize(frag_type_idxs.size(),0);
double total_rand=0;
int r;
for (r=0; r<level_thresholds.size(); r++)
{
if (r==0)
{
os << setw(8) << left << "no peak";
}
else if (level_thresholds[r]-level_thresholds[r-1]==1)
{
os << setw(8) << left << level_thresholds[r];
}
else
{
os << setw(3) << left << level_thresholds[r-1]+1 << "-" << setw(3) << right << level_thresholds[r] << " ";
}
os << " " << setprecision(5) << left << q_rand[r] << " " ;
total_rand+=q_rand[r];
for (f=0; f<frag_type_idxs.size(); f++)
{
int f_idx = frag_type_idxs[f];
score_t frag_prob = regional_models[charge][size][region].independent_frag_tables[f_idx].get_score_prob_from_idx(r);
os << setprecision(4) << frag_prob << " ";
os << setw(5) << setprecision(2) << log(frag_prob / q_rand[r]) << " ";
totals[f]+= frag_prob;
}
os << endl;
if (r == 0)
os << endl;
}
os << endl;
os << "Toatal: ";
os << setw(10) << left << setprecision(4) << total_rand;
for (f=0; f<frag_type_idxs.size(); f++)
{
os << setw(len) << left << setprecision(4) << totals[f];
}
os << endl;
}
void DiscretePeakModel::print_report(int charge, int size, int region,
ostream& os) const
{
const vector<int> frag_type_idxs = config.get_regional_fragment_type_idxs(charge,size,region);
int i;
for (i=0; i<frag_type_idxs.size(); i+=4)
{
vector<int> f_idxs;
f_idxs.clear();
int f;
for (f=i; f<i+4 && f<frag_type_idxs.size(); f++)
f_idxs.push_back(frag_type_idxs[f]);
// this->print_frag_probs(f_idxs, charge, size, region, os);
os << endl << endl;
}
}
struct idx_dkl {
bool operator< ( const idx_dkl& other) const
{
return dkl_sum > other.dkl_sum;
}
int idx;
double dkl_sum;
};
/*************************************************************************
// creates models allowing each fragment to have upto two parents
// (chooses the ones that give the best difference in probability
// in terms of DKL). Parent fragments must appear before the current
// fragment in the order of the regional fragment set
*************************************************************************/
void DiscretePeakModel::learn_parent_dependencies(const FileManager& fm, int charge)
{
FileSet fs;
const vector< vector< vector< RegionalFragments > > >& regional_fragments = config.get_regional_fragment_sets();
fs.select_files(fm,0,1E6,-1,-1,charge);
// init regional models
if (regional_models.size() < charge+1)
regional_models.resize(charge+1);
// This holds all the possible tables (all combination of up to 2 parents)
// for each fragments in each region
vector< vector< vector< vector< FragProbTable > > > > all_tables; // size,region,frag,table
int size_idx;
regional_models[charge].resize(regional_fragments[charge].size());
all_tables.resize(regional_fragments[charge].size());
for (size_idx=0; size_idx<regional_fragments[charge].size(); size_idx++)
{
int region_idx;
regional_models[charge][size_idx].resize(regional_fragments[charge][size_idx].size());
all_tables[size_idx].resize(regional_fragments[charge][size_idx].size());
cout << "SIZE " << size_idx << endl;
for (region_idx=0; region_idx<regional_fragments[charge][size_idx].size(); region_idx++)
{
const vector<int>& frag_type_idxs = regional_fragments[charge][size_idx][region_idx].get_frag_type_idxs();
regional_models[charge][size_idx][region_idx].frag_type_idxs = frag_type_idxs;
regional_models[charge][size_idx][region_idx].independent_frag_tables.resize(frag_type_idxs.size());
regional_models[charge][size_idx][region_idx].single_breakage_tables.resize(frag_type_idxs.size());
all_tables[size_idx][region_idx].resize(frag_type_idxs.size());
cout << " REGION " << region_idx << endl;
// create all possible tables for a given fragments (set parents according
// to order in frag_type_idxs)
int f;
for (f=0; f<frag_type_idxs.size(); f++)
{
all_tables[size_idx][region_idx][f].clear();
cout << " FRAG " << config.get_fragment(frag_type_idxs[f]).label << endl;
// first add table with no parents
FragProbTable fpt;
vector<int> fields,num_vals;
fields.resize(5,-1);
fields[0]=frag_type_idxs[f];
num_vals.resize(5,0);
num_vals[0]= num_peak_levels;
fpt.init_fields(&config,fields,num_vals);
fpt.init_counts();
all_tables[size_idx][region_idx][f].push_back(fpt);
// add all tables with one parent
if (f<1)
continue;
int p;
for (p=0; p<f; p++)
{
FragProbTable fpt;
vector<int> fields,num_vals;
fields.resize(5,-1);
fields[0]=frag_type_idxs[f];
fields[1]=frag_type_idxs[p];
num_vals.resize(5,0);
num_vals[0]= num_peak_levels;
num_vals[1] = 2 ; // binary values
num_vals[1] = num_peak_levels;
fpt.init_fields(&config,fields,num_vals);
fpt.init_counts();
all_tables[size_idx][region_idx][f].push_back(fpt);
}
// add all tables with two parents
// if (f<2)
continue;
int p1,p2;
for (p1=0; p1<f; p1++)
for (p2=0; p2<f; p2++)
{
if (p1 == p2)
continue;
FragProbTable fpt;
vector<int> fields,num_vals;
fields.resize(5,-1);
fields[0]=frag_type_idxs[f];
fields[1]=frag_type_idxs[p1];
fields[2]=frag_type_idxs[p2];
num_vals.resize(5,0);
num_vals[0]= num_peak_levels;
num_vals[1] = 2 ; // binary values
num_vals[1] = num_peak_levels;
num_vals[2] = 2 ; // binary values
if (p1>p2 && num_vals[1] == num_vals[2])
continue;
fpt.init_fields(&config,fields,num_vals);
fpt.init_counts();
all_tables[size_idx][region_idx][f].push_back(fpt);
}
}
}
}
// Add instances to all tables
int counter=0;
while(1)
{
AnnotatedSpectrum as;
if (! fs.get_next_spectrum(fm,&config,&as))
break;
// if (counter>=300)
// break;
as.annotate_spectrum(as.get_true_mass_with_19());
const int size_idx = as.get_size_idx();
init_model_for_scoring_spectrum(&as);
cout << counter++ << " " << as.get_file_name() << endl;
int b;
vector<Breakage>& breakages = as.get_non_const_breakages();
for (b=0; b<breakages.size(); b++)
{
set_breakage_peak_levels(&breakages[b]);
int region_idx = config.calc_region_idx(breakages[b].mass,
as.get_true_mass_with_19(), as.get_charge(), as.get_min_peak_mass(),as.get_max_peak_mass());
// add an instance to each table
cout << region_idx;
const vector<int>& frag_type_idxs = regional_models[charge][size_idx][region_idx].frag_type_idxs;
int f,t;
// only add instances if the expected peak position is within the min/max range
for (f=0; f<frag_type_idxs.size(); f++)
{
mass_t exp_frag_mass = config.get_fragment(frag_type_idxs[f]).calc_expected_mass(breakages[b].mass,as.get_true_mass_with_19());
if (exp_frag_mass < as.get_min_peak_mass() || exp_frag_mass > as.get_max_peak_mass())
continue;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -