📄 peakrankmodel.cpp
字号:
frag_intens[i]=-999.0;
}
}
if (verbose)
{
cout << frag.label << "\t";
for (i=0; i<intens[f].size(); i++)
cout << " " << fixed << setprecision(3) << intens[f][i];
cout << endl;
}
}
if (verbose)
cout << endl;
}
void TrainingPeptide::get_ranks_for_frag_idx(int frag_idx, vector<int>& ranks) const
{
const int pos = get_frag_idx_pos(frag_idx);
if (pos<0)
{
cout << "Error: ranks not collect for frag " << frag_idx << endl;
exit(1);
}
find_ranks(intens[pos],ranks);
}
// made up score, higher means more basic amino acids
float TrainingPeptide::get_basicity_score() const
{
float basicity_score=0;
int i;
for (i=0; i<this->amino_acids.size(); i++)
{
if (amino_acids[i] == Arg)
basicity_score+=1.0;
if (amino_acids[i] == Lys)
basicity_score+=0.55;
if (amino_acids[i] == His)
basicity_score+=0.3;
}
return basicity_score;
}
void TrainingPeptide::write_to_stream(ofstream& ofs) const
{
if (frag_idxs.size() != intens.size())
{
cout << "Error: frag_idxs not same size ss intens!" << endl;
exit(1);
}
ofs << this->charge << " " << this->mobility << " " << fixed << setprecision(3) << this->pm_with_19 << endl;
ofs << this->amino_acids.size();
int i;
for (i=0; i<amino_acids.size(); i++)
ofs << " " << amino_acids[i];
ofs << endl;
ofs << this->frag_idxs.size();
for (i=0; i<frag_idxs.size(); i++)
ofs << " " << frag_idxs[i];
ofs << endl;
for (i=0; i<intens.size(); i++)
{
ofs << intens[i].size();
int j;
for (j=0; j<intens[i].size(); j++)
ofs << " " << intens[i][j];
ofs << endl;
}
}
bool TrainingPeptide::read_from_stream(ifstream& ifs)
{
char buff[2048];
ifs.getline(buff,512);
istringstream is(buff);
is >> charge >> mobility >> pm_with_19;
if (charge<=0 || charge >100 || mobility<MOBILE || mobility>NONMOBILE ||
pm_with_19<50 || pm_with_19>1000000 )
{
return false;
}
length=0;
ifs.getline(buff,1024);
is.clear();
is.str(buff);
is >> length;
if (length<=0 || length>1000)
{
return false;
}
int i;
this->amino_acids.resize(length,-1);
for (i=0; i<length; i++)
{
is >> amino_acids[i];
if (amino_acids[i]<0 || amino_acids[i]>1000)
{
return false;
}
}
int num_frags=0;
ifs.getline(buff,2048);
is.clear();
is.str(buff);
is >> num_frags;
if (num_frags<1 || num_frags>100)
{
return false;
}
this->frag_idxs.resize(num_frags,-1);
for (i=0; i<num_frags; i++)
{
is >> frag_idxs[i];
if (frag_idxs[i]<0 || frag_idxs[i]>1000)
{
return false;
}
}
intens.resize(num_frags);
for (i=0; i<num_frags; i++)
{
ifs.getline(buff,2048);
istringstream is(buff);
int size=0;
is >> size;
intens[i].resize(size,0);
int j;
for (j=0; j<size; j++)
is >> intens[i][j];
}
return true;
}
void TrainingPeptide::print(Config *config, ostream& ofs) const
{
const vector<string>& aa2label = config->get_aa2label();
const vector<mass_t>& aa2mass = config->get_aa2mass();
int i;
mass_t m=n_mass;
ofs << this->best_n_removed << " " << this->best_c_removed << " " << this->n_mass << " (" << pm_with_19 << ") ";
for (i=0; i<amino_acids.size(); i++)
{
ofs << aa2label[amino_acids[i]];
m+=aa2mass[amino_acids[i]];
}
ofs << " " << m << endl;
}
void read_data_into_training_peptides(const FileManager& fm,
Config *config,
PeakRankModel& rm,
vector<TrainingPeptide>& tps)
{
FileSet fs;
fs.select_all_files(fm);
while (1)
{
SingleSpectrumFile *ssf;
AnnotatedSpectrum as;
if (! fs.get_next_spectrum(fm,config,&as,&ssf))
break;
as.annotate_spectrum(as.get_true_mass_with_19());
TrainingPeptide tp;
tp.create_training_peptide(rm,as);
if (tp.frag_idxs.size()==0)
continue;
tps.push_back(tp);
}
}
void convert_list_to_trianing_peptide_file(char *list, char *tp_file,
char *model_name, char *ptm_line)
{
PeakRankModel rm;
RegularRankModel model;
FileManager fm;
vector<TrainingPeptide> all_tps;
if (! model_name)
{
model.read_model("LTQ_LOW_TRYP");
}
else
model.read_model(model_name);
Config *config= model.get_config();
if (! ptm_line)
{
config->apply_selected_PTMs("C+57:M+16:Q-17");
}
else
config->apply_selected_PTMs(ptm_line);
rm.set_mass_detection_defaults();
fm.init_from_list_file(config,list);
read_data_into_training_peptides(fm,config,rm,all_tps);
write_training_peptides_to_file(tp_file,all_tps);
}
void read_training_peptides_from_file(char *file, vector<TrainingPeptide>& all_tps,
int num_tp)
{
ifstream ifs(file);
char buff[64];
if (! ifs.is_open())
{
cout << "Error: couldn't open: " << file << endl;
exit(1);
}
int total_num_tp;
ifs.getline(buff,64);
sscanf(buff,"%d",&total_num_tp);
if (num_tp<0 || num_tp>total_num_tp)
num_tp = total_num_tp;
const int start_idx=all_tps.size();
all_tps.resize(start_idx+num_tp);
int num_errors=0;
int i;
for (i=0; i<num_tp; i++)
if (! all_tps[start_idx+i].read_from_stream(ifs))
num_errors++;
if (num_errors>0)
{
cout << "Warning: encountered " << num_errors << " errors reading " << num_tp << " training peptides..." << endl;
}
ifs.close();
}
void write_training_peptides_to_file(char *file, const vector<TrainingPeptide>& all_tps)
{
ofstream ofs(file);
ofs << all_tps.size() << endl;
int i;
for (i=0; i<all_tps.size(); i++)
{
all_tps[i].write_to_stream(ofs);
}
ofs.close();
}
void select_training_peptides(const vector<TrainingPeptide>& all_tps,
vector<int>& selected_idxs,
int charge,
int mobility,
int min_length,
int max_length,
mass_t min_pm_with_19,
mass_t max_pm_with_19)
{
selected_idxs.clear();
int i;
for (i=0; i<all_tps.size(); i++)
{
const TrainingPeptide& tp = all_tps[i];
if (charge>0 && charge != tp.charge)
continue;
if (mobility>0 && mobility != tp.mobility)
continue;
if (tp.length<min_length || tp.length>max_length ||
tp.pm_with_19<min_pm_with_19 || tp.pm_with_19>max_pm_with_19)
continue;
selected_idxs.push_back(i);
}
}
// for tps of a given charge
void size_mobility_stats(const vector<TrainingPeptide>& all_tps)
{
vector< vector<int> > counts; // size / mobiliy (0 = all);
int i;
counts.resize(100);
for (i=0; i<counts.size(); i++)
counts[i].resize(4,0);
for (i=0; i<all_tps.size(); i++)
{
int size_idx = (int)(all_tps[i].pm_with_19 * 0.01);
if (all_tps[i].mobility<MOBILE || all_tps[i].mobility>NONMOBILE)
{
cout << "Error: bad mobility value!" << endl;
exit(1);
}
if (size_idx<3 || size_idx>97)
{
cout << "Error: bad size_idx!" << endl;
exit(1);
}
counts[size_idx][0]++;
counts[size_idx][all_tps[i].mobility]++;
}
for (i=0; i<99; i++)
{
if (counts[i][0]==0)
continue;
cout << i*100;
int j;
for (j=0; j<4; j++)
{
cout << "\t" << counts[i][j];
counts[99][j]+=counts[i][j];
}
cout << endl;
}
cout << "---------------------------------------" << endl;
for (i=0; i<4; i++)
cout << "\t" << counts[99][i];
cout << endl << endl;
}
// for tps of a given charge
void aa_composition_stats(const vector<TrainingPeptide>& all_tps, Config *config)
{
const int num_aas = config->get_max_session_aa_idx()+1;
vector< vector<int> > counts; // size / mobiliy (0 = all);
int i;
counts.resize(num_aas+1);
for (i=0; i<=num_aas; i++)
counts[i].resize(4,0);
for (i=0; i<all_tps.size(); i++)
{
int size_idx = (int)(all_tps[i].pm_with_19 * 0.01);
if (all_tps[i].mobility<MOBILE || all_tps[i].mobility>NONMOBILE)
{
cout << "Error: bad mobility value!" << endl;
exit(1);
}
if (size_idx<3 || size_idx>97)
{
cout << "Error: bad size_idx!" << endl;
exit(1);
}
int j;
for (j=0; j<all_tps[i].amino_acids.size(); j++)
{
counts[all_tps[i].amino_acids[j]][0]++;
counts[all_tps[i].amino_acids[j]][all_tps[i].mobility]++;
}
}
for (i=0; i<num_aas; i++)
{
int j;
for (j=0; j<4; j++)
counts[num_aas][j]+=counts[i][j];
}
for (i=0; i<num_aas; i++)
{
if (counts[i][0]==0)
continue;
cout << config->get_aa2label()[i];
int j;
for (j=0; j<4; j++)
{
cout << "\t" << counts[i][j] << "\t" << fixed << setprecision(3) <<
counts[i][j]/(double)counts[num_aas][j];
}
cout << endl;
}
cout << "--------------------------------------------------------" << endl;
for (i=0; i<4; i++)
cout << "\t" << counts[num_aas][i] << "\t";
cout << endl << endl;
}
void fragment_detection_stats(const vector<TrainingPeptide>& all_tps, Config *config)
{
const int num_frags = config->get_all_fragments().size();
vector< vector<int> > counts; // size / mobiliy (0 = all);
vector< vector<int> > totals;
int i;
counts.resize(num_frags);
for (i=0; i<=num_frags; i++)
counts[i].resize(4,0);
totals = counts;
for (i=0; i<all_tps.size(); i++)
{
int size_idx = (int)(all_tps[i].pm_with_19 * 0.01);
if (all_tps[i].mobility<MOBILE || all_tps[i].mobility>NONMOBILE)
{
cout << "Error: bad mobility value!" << endl;
exit(1);
}
if (size_idx<3 || size_idx>97)
{
cout << "Error: bad size_idx!" << endl;
exit(1);
}
const TrainingPeptide& tp = all_tps[i];
int f;
for (f=0; f<tp.frag_idxs.size(); f++)
{
const int frag_idx = tp.frag_idxs[f];
int a;
for (a=0; a<tp.intens[f].size(); a++)
{
if (tp.intens[f][a]>=0)
{
totals[frag_idx][0]++;
totals[frag_idx][tp.mobility]++;
}
if (tp.intens[f][a]>0)
{
counts[frag_idx][0]++;
counts[frag_idx][tp.mobility]++;
}
}
}
}
for (i=0; i<num_frags; i++)
{
if (totals[i][0]==0)
continue;
cout << config->get_all_fragments()[i].label;
int j;
for (j=0; j<4; j++)
{
cout << "\t" << totals[i][j] << "\t" << fixed << setprecision(3) <<
counts[i][j]/(double)totals[i][j];
}
cout << endl;
}
cout << "--------------------------------------------------------" << endl;
}
void generate_size_reports()
{
PeakRankModel rm;
RegularRankModel model;
vector<FileManager> fms;
model.read_model("LTQ_LOW_TRYP");
Config *config= model.get_config();
config->apply_selected_PTMs("C+57:M+16:Q-17");
rm.set_mass_detection_defaults();
int charge;
for (charge=1; charge<=3; charge++)
{
vector<TrainingPeptide> tps;
tps.clear();
char shew_file[256],hek_file[256];
sprintf(shew_file,"C:\\Work\\msms5\\NewScore\\tps\\Shew_98_%d_unique_tps.txt",charge);
sprintf(hek_file,"C:\\Work\\msms5\\NewScore\\tps\\HEK_98_%d_unique_tps.txt",charge);
read_training_peptides_from_file(shew_file,tps);
read_training_peptides_from_file(hek_file,tps);
cout << "Cahrge " << charge << " Read " << tps.size() << " peptides..." << endl;
size_mobility_stats(tps);
cout << endl;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -