📄 denovorankscore.cpp
字号:
score_pairs.clear();
if (peptide_sols.size()==0)
return;
int charge1=0,charge2=0;
mass_t mz1=0,mz2=0;
float prob1=0,prob2=0;
BasicSpectrum bs;
bs.peaks = peaks;
bs.num_peaks = num_peaks;
bs.ssf = ssf;
Config *config = model->get_config();
score_pairs.clear();
model->get_best_mz_charge(config,bs, &mz1, &charge1, &prob1,
&mz2, &charge2, &prob2, &pmc_sqs_res);
as.init_from_QCPeaks(config,peaks,num_peaks,ssf);
vector<ScalingFactor> same_scaling_factors;
same_scaling_factors.resize(10);
int i;
for (i=0; i<peptide_sols.size(); i++)
{
const PeptideSolution& sol = peptide_sols[i];
const Peptide& pep = sol.pep;
const mass_t mass_with_19 = pep.get_mass_with_19();
const int charge = sol.charge;
const int size_idx = (forced_size_idx>=0 ? forced_size_idx :
peak_model->get_size_group(charge,mass_with_19) );
DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];
if (! part_model|| ! part_model->ind_was_initialized)
{
cout << "Error: no de novo rank model for charge " << charge << " size " << size_idx << endl;
exit(1);
}
if (same_scaling_factors[charge].score_scale == 1.0) // makes sure the same scale is used for all sequences with this charge
same_scaling_factors[charge] = part_model->get_scaling_factor(mass_with_19);
RankBoostSample rbs;
as.set_peptide(pep);
as.annotate_spectrum(mass_with_19);
fill_complete_peptide_rbs(sol, peaks, num_peaks, as, pmc_sqs_res, rbs, size_idx);
float score=part_model->boost_model.calc_rank_score(rbs);
score += same_scaling_factors[charge].score_shift;
score *= same_scaling_factors[charge].score_scale;
if (pep.get_num_aas()<8)
{
score -=1;
if (pep.get_num_aas()<7)
score -=1;
if (pep.get_num_aas()<6)
score -=5;
}
score_pairs.push_back(score_pair(i,score));
}
}
void DeNovoRankScorer::score_denovo_sequences(
const vector<SeqPath>& seq_paths,
SingleSpectrumFile *ssf,
QCPeak* peaks,
int num_peaks,
vector<score_pair>& score_pairs,
int forced_size_idx,
int max_idx_for_ranking) const
{
PeakRankModel *&peak_model = peak_prediction_models[model_type];
AnnotatedSpectrum as;
vector<PmcSqsChargeRes> pmc_sqs_res;
int charge1=0,charge2=0;
mass_t mz1=0,mz2=0;
float prob1=0,prob2=0;
BasicSpectrum bs;
bs.peaks = peaks;
bs.num_peaks = num_peaks;
bs.ssf = ssf;
Config *config = model->get_config();
score_pairs.clear();
model->get_best_mz_charge(config,bs, &mz1, &charge1, &prob1,
&mz2, &charge2, &prob2, &pmc_sqs_res);
const mass_t corr_mass_with_19 = mz1*charge1 - (charge1-1.0);
as.init_from_QCPeaks(config,peaks,num_peaks,ssf);
as.set_corrected_pm_with_19(corr_mass_with_19);
vector<ScalingFactor> scaling_factors;
scaling_factors.resize(10);
int i;
for (i=0; i<seq_paths.size(); i++)
{
if (max_idx_for_ranking>0 && i>=max_idx_for_ranking)
{
score_pairs.push_back(score_pair(i,-(float)i));
continue;
}
const SeqPath& path = seq_paths[i];
// create a peptide solution to represent the path
PeptideSolution sol;
vector<int> amino_acids;
path.get_amino_acids(amino_acids);
sol.charge = path.charge;
sol.pm_with_19 = path.pm_with_19;
sol.pep.set_peptide_aas(amino_acids);
sol.pep.set_n_gap(path.n_term_mass);
sol.reaches_n_terminal = (sol.pep.get_n_gap()<1.0);
sol.pep.calc_mass(config);
sol.reaches_c_terminal = (sol.pep.get_mass_with_19()>sol.pm_with_19 - 7);
sol.type = -1;
// choose pm according to if peptide reaches both ends
const mass_t mass_with_19 =( (sol.reaches_n_terminal && sol.reaches_c_terminal) ?
sol.pep.get_mass_with_19() : corr_mass_with_19);
const int charge = path.charge;
const int size_idx = (forced_size_idx>=0 ? forced_size_idx :
peak_model->get_size_group(charge,mass_with_19) );
RankBoostSample main_rbs; // holds the features that are common to all variants of the SeqPath
vector<RankBoostSample> combo_variant_rbs; // holds features that might depend on the identity of the
// missing amino acids on the n and c-terminal sides
as.set_peptide(sol.pep);
as.annotate_spectrum(mass_with_19);
fill_denovo_peptide_rbs_with_combos(sol, path, peaks, num_peaks, as, pmc_sqs_res, main_rbs, combo_variant_rbs, size_idx);
DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];
if (! part_model || ! part_model->ind_was_initialized)
{
cout << "Error: no de novo rank model for charge " << charge << " size " << size_idx << endl;
exit(1);
}
if (scaling_factors[charge].score_scale == 1.0) // makes sure the same scale is used for all sequences with this charge
scaling_factors[charge] = part_model->get_scaling_factor(mass_with_19);
float max_score = NEG_INF;
int v_idx;
for (v_idx = 0; v_idx<combo_variant_rbs.size(); v_idx++)
{
RankBoostSample rbs = main_rbs;
RankBoostSample& var_rbs = combo_variant_rbs[v_idx];
int j;
for (j=0; j<var_rbs.real_active_idxs.size(); j++)
rbs.add_real_feature(var_rbs.real_active_idxs[j],var_rbs.real_active_values[j]);
const float score=part_model->boost_model.calc_rank_score(rbs);
if (score>max_score)
max_score=score;
}
max_score += scaling_factors[charge].score_shift;
max_score *= scaling_factors[charge].score_scale;
score_pairs.push_back(score_pair(i,max_score));
}
}
void DeNovoRankScorer::list_feature_differences(const vector<SeqPath>& seq_paths,
SingleSpectrumFile *ssf,
QCPeak* peaks,
int num_peaks) const
{
PeakRankModel *&peak_model = peak_prediction_models[model_type];
AnnotatedSpectrum as;
vector<PmcSqsChargeRes> pmc_sqs_res;
int charge1=0,charge2=0;
mass_t mz1=0,mz2=0;
float prob1=0,prob2=0;
BasicSpectrum bs;
bs.peaks = peaks;
bs.num_peaks = num_peaks;
bs.ssf = ssf;
Config *config = model->get_config();
model->get_best_mz_charge(config,bs, &mz1, &charge1, &prob1,
&mz2, &charge2, &prob2, &pmc_sqs_res);
const mass_t corr_mass_with_19 = mz1*charge1 - (charge1-1.0);
as.init_from_QCPeaks(config,peaks,num_peaks,ssf);
as.set_corrected_pm_with_19(corr_mass_with_19);
vector<ScalingFactor> scaling_factors;
scaling_factors.resize(10);
const mass_t mass_with_19 = seq_paths[0].pm_with_19;
const int charge = seq_paths[0].charge;
const int size_idx = peak_model->get_size_group(charge,mass_with_19);
DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];
if (! part_model || ! part_model->ind_was_initialized)
{
cout << "Error: no de novo rank model for charge " << charge << " size " << size_idx << endl;
exit(1);
}
if (scaling_factors[charge].score_scale == 1.0) // makes sure the same scale is used for all sequences with this charge
scaling_factors[charge] = part_model->get_scaling_factor(mass_with_19);
vector<RankBoostSample> boost_samples;
int i;
for (i=0; i<seq_paths.size(); i++)
{
const SeqPath& path = seq_paths[i];
// create a peptide solution to represent the path
PeptideSolution sol;
vector<int> amino_acids;
path.get_amino_acids(amino_acids);
sol.charge = path.charge;
sol.pm_with_19 = path.pm_with_19;
sol.pep.set_peptide_aas(amino_acids);
sol.pep.set_n_gap(path.n_term_mass);
sol.reaches_n_terminal = (sol.pep.get_n_gap()<1.0);
sol.pep.calc_mass(config);
sol.reaches_c_terminal = (sol.pep.get_mass_with_19()>sol.pm_with_19 - 7);
sol.type = -1;
// choose pm according to if peptide reaches both ends
RankBoostSample main_rbs; // holds the features that are common to all variants of the SeqPath
vector<RankBoostSample> combo_variant_rbs; // holds features that might depend on the identity of the
// missing amino acids on the n and c-terminal sides
as.set_peptide(sol.pep);
as.annotate_spectrum(mass_with_19);
fill_denovo_peptide_rbs_with_combos(sol, path, peaks, num_peaks, as, pmc_sqs_res, main_rbs, combo_variant_rbs, size_idx);
float max_score = NEG_INF;
int v_idx;
for (v_idx = 0; v_idx<combo_variant_rbs.size(); v_idx++)
{
RankBoostSample rbs = main_rbs;
RankBoostSample& var_rbs = combo_variant_rbs[v_idx];
int j;
for (j=0; j<var_rbs.real_active_idxs.size(); j++)
rbs.add_real_feature(var_rbs.real_active_idxs[j],var_rbs.real_active_values[j]);
boost_samples.push_back(rbs);
}
}
part_model->boost_model.list_feature_vals_according_to_score(boost_samples);
}
void DeNovoRankScorer::score_tag_sequences(
const vector<SeqPath>& seq_paths,
SingleSpectrumFile *ssf,
QCPeak* peaks,
int num_peaks,
vector<score_pair>& score_pairs,
int forced_size_idx) const
{
PeakRankModel *&peak_model = peak_prediction_models[model_type];
AnnotatedSpectrum as;
vector<PmcSqsChargeRes> pmc_sqs_res;
int charge1=0,charge2=0;
mass_t mz1=0,mz2=0;
float prob1=0,prob2=0;
BasicSpectrum bs;
bs.peaks = peaks;
bs.num_peaks = num_peaks;
bs.ssf = ssf;
Config *config = model->get_config();
as.init_from_QCPeaks(config,peaks,num_peaks,ssf);
vector<ScalingFactor> scaling_factors;
scaling_factors.resize(10);
score_pairs.clear();
int i;
for (i=0; i<seq_paths.size(); i++)
{
const SeqPath& path = seq_paths[i];
// create a peptide solution to represent the path
PeptideSolution sol;
vector<int> amino_acids;
path.get_amino_acids(amino_acids);
sol.charge = path.charge;
sol.pm_with_19 = path.pm_with_19;
sol.pep.set_peptide_aas(amino_acids);
sol.pep.set_n_gap(path.n_term_mass);
sol.reaches_n_terminal = (sol.pep.get_n_gap()<1.0);
sol.pep.calc_mass(config);
sol.reaches_c_terminal = (sol.pep.get_mass_with_19()>sol.pm_with_19 - 7);
sol.type = -1;
// choose pm according to if peptide reaches both ends
const mass_t mass_with_19 =( (sol.reaches_n_terminal && sol.reaches_c_terminal) ?
sol.pep.get_mass_with_19() : path.pm_with_19);
const int charge = path.charge;
const int size_idx = (forced_size_idx>=0 ? forced_size_idx :
peak_model->get_size_group(charge,mass_with_19) );
RankBoostSample rbs; // holds the features that are common to all variants of the SeqPath
as.set_peptide(sol.pep);
as.annotate_spectrum(mass_with_19);
fill_tag_rbs(sol,path,peaks,num_peaks,as,rbs,size_idx);
DeNovoPartitionModel *part_model = dnv_part_models[charge][size_idx];
if (! part_model || ! part_model->ind_was_initialized)
{
cout << "Error: no de novo rank model for charge " << charge << " size " << size_idx << endl;
exit(1);
}
if (scaling_factors[charge].score_scale == 1.0) // makes sure the same scale is used for all sequences with this charge
scaling_factors[charge] = part_model->get_scaling_factor(mass_with_19);
float score = part_model->boost_model.calc_rank_score(rbs);
score += scaling_factors[charge].score_shift;
score *= scaling_factors[charge].score_scale;
score_pairs.push_back(score_pair(i,score));
}
}
void DeNovoRankScorer::read_denovo_rank_scorer_model(const char *path, string type_string, bool silent_ind)
{
ifstream ifs(path);
if (! ifs.good() || ! ifs.is_open())
{
cout << "Error: couldn't open file for reading: " << path << endl;
exit(1);
}
ifs >> dnv_model_name >> model_type;
if (model_type<0 || model_type>10)
{
cout << "Error: bad model type specified!" << endl;
exit(1);
}
string model_str, peak_model_str, comp_assign_str;
int max_charge;
ifs >> max_charge >> model_str >> peak_model_str >> comp_assign_str;
if (! model)
{
cout << "Error: must set score model first!" << endl;
exit(1);
}
if (! peak_prediction_models[model_type])
{
peak_prediction_models[model_type] = new PeakRankModel;
if (! peak_prediction_models[model_type]->read_peak_rank_model(model->get_config(),peak_model_str.c_str(),true))
{
cout << "Error: couldn't read peak model " << peak_model_str << endl;
exit(1);
}
}
if (! comp_assigner)
{
comp_assigner = new PeptideCompAssigner;
comp_assigner->read_and_init_from_tables(model->get_config(),comp_assign_str.c_str());
}
string score_model_name = model->get_model_name();
int num_models_read=0;
init_tables(silent_ind);
int c=0;
for (c=0; c<dnv_part_models.size(); c++)
{
int size_idx;
for (size_idx=0; size_idx<dnv_part_models[c].size(); size_idx++)
{
ostringstream oss;
oss << model->get_config()->get_resource_dir() << "/" <<score_model_name << "_" <<
type_string << "/" << dnv_model_name << "_" << c << "_" << size_idx << "_model.txt";
// cout << oss.str() << endl;
ifstream ifs(oss.str().c_str());
if (! ifs.is_open())
continue;
ifs.close();
dnv_part_models[c][size_idx]=new DeNovoPartitionModel;
if (dnv_part_models[c][size_idx]->read_denovo_part_model(oss.str().c_str(),model->get_config()))
{
if (! silent_ind)
cout << "Read de novo rank model " << c << " " << size_idx << endl;
num_models_read++;
}
}
}
if (! silent_ind)
cout << "Read " << num_models_read << " de novo rank models..." << endl;
}
void DeNovoRankScorer::write_denovo_rank_scorer_model(char *name)
{
if (! model || ! model->get_ind_pmcsqs_was_intialized())
{
cout << "Error: model not initialized!" << endl;
exit(1);
}
if (! peak_prediction_models[model_type] )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -