📄 spectrum.cpp
字号:
}
vector<rank_pair> mono_pairs, iso_pairs;
for (i=0; i<peaks.size(); i++)
{
rank_pair p;
p.idx=i;
p.inten = peaks[i].intensity;
if (peaks[i].iso_level == 0)
{
mono_pairs.push_back(p);
}
else
iso_pairs.push_back(p);
}
sort(mono_pairs.begin(),mono_pairs.end());
sort(iso_pairs.begin(),iso_pairs.end());
ranked_mono_peaks.resize(peaks.size());
for (i=0; i<mono_pairs.size(); i++)
ranked_mono_peaks[i]=mono_pairs[i].idx;
for (i=0; i<iso_pairs.size(); i++)
ranked_mono_peaks[mono_pairs.size()+i]=iso_pairs[i].idx;
}
/*****************************************************************
Find the strongest peaks by compairng their log local rank
to the number from the config file
******************************************************************/
void Spectrum::select_strong_peaks()
{
int i;
float thresh_log_level = log((float)(config->get_number_of_strong_peaks_per_local_window()));
strong_peak_idxs.clear();
for (i=0 ;i<peaks.size(); i++)
if (peaks[i].log_local_rank<=thresh_log_level && peaks[i].iso_level == 0)
strong_peak_idxs.push_back(i);
}
/***********************************************************************
calcs for each peak the probability of observing it at random (based on
the neighbor's distribution. Assumes the log_intens are distributed
according to a normal disribution.
************************************************************************/
void Spectrum::set_log_random_probs()
{
if (peaks.size()<2)
return;
const float log_add = log(1.2);
const float one_over_sqr_2pi = 1.0 / sqrt(2*3.1415927);
const mass_t peak_window_size = 0.6; // this is fixed and independent of the tolerance!
const mass_t margin = 25.0;
const mass_t window_size = 100.0;
const mass_t min_mass = peaks[0].mass;
const mass_t max_mass = peaks[peaks.size()-1].mass;
const mass_t viz_range = (max_mass - min_mass);
int i;
for (i=0; i<peaks.size(); i++)
{
const mass_t peak_mass = peaks[i].mass;
const mass_t rel_position = (peak_mass-min_mass)/viz_range;
const mass_t left_window = margin + rel_position * window_size;
const mass_t right_window = margin + window_size - left_window;
const PeakRange pr = get_peaks_in_range(peak_mass-left_window,peak_mass+right_window);
const float peak_window_prob = peak_window_size /(left_window + right_window);
const float zero_prob = pow((1.0 - peak_window_prob),pr.num_peaks);
// peaks[i].log_rand_prob = log(1 - zero_prob);
if (pr.num_peaks<5)
{
peaks[i].log_rand_prob = log(1.0-zero_prob) + log_add;
}
else
{
vector<float> log_intens;
int j;
for (j=pr.low_idx; j<=pr.high_idx; j++)
log_intens.push_back(peaks[j].log_intensity);
float mean=0,sd=1;
calc_mean_sd(log_intens,&mean,&sd);
const float e = (peaks[i].log_intensity - mean)/sd;
if (e<0)
{
peaks[i].log_rand_prob = log(1 - zero_prob) + log_add;
}
else
{
const float norm = (one_over_sqr_2pi/ sd) * exp(-0.5*e*e);
const float norm_const = (1 - zero_prob) / (one_over_sqr_2pi/ sd); //
peaks[i].log_rand_prob = log(norm*norm_const);
}
}
// cout << i << fixed << setprecision(2) << "\t" << peaks[i].mass << "\t" << peaks[i].log_intensity <<
// "\t" << peaks[i].rank << "\t" << pr.num_peaks << "\t" <<
// setprecision(4) << exp(peaks[i].log_rand_prob) << "\t( 0 prob " << zero_prob <<" )" << endl;
}
}
/**************************************************************
Sets all peaks that have iso level=0 as strong
***************************************************************/
void Spectrum::mark_all_non_iso_peaks_as_strong()
{
int i;
strong_peak_idxs.clear();
for (i=0 ;i<peaks.size(); i++)
if (peaks[i].iso_level == 0)
strong_peak_idxs.push_back(i);
}
/****************************************************************
Calculates different pm_with_19 values, for different charges
(calculates assumed pm_with_19 from the m/z value)
*****************************************************************/
void Spectrum::calc_original_pm_with_19_for_different_charges(
vector<mass_t>& pms_with_19) const
{
int i;
mass_t m_over_z = (org_pm_with_19 - MASS_PROTON) / charge;
pms_with_19.resize(5);
pms_with_19[0]=-1;
for (i=1; i<=4; i++)
pms_with_19[i] = m_over_z * i + MASS_PROTON;
}
/*********************************************************
Reads the spectrum info from a dta file.
returns false if there was a problem
**********************************************************/
bool Spectrum::read_from_dta( Config* _config, const char *dta_name,
int _charge , bool verbose)
{
ifstream fs(dta_name,ios::in);
if (! fs)
return false;
peaks.clear();
config = _config;
file_name = dta_name;
if (verbose)
cout << "Reading: " << file_name << endl;
char buff[1024];
fs.getline(buff,1024);
if (fs.bad())
return false;
while (buff[0] =='#')
{
istringstream is(buff);
string arg1,arg2;
is >> arg1 >> arg2;
if (! arg1.compare(0,5,"#FILE"))
{
file_name = arg2;
if (verbose)
cout << "#FILE " << file_name << endl;
}
else
if (! arg1.compare(0,4,"#SEQ"))
{
peptide.parse_from_string(config,arg2);
if (verbose)
cout << "#SEQ " << peptide.as_string(config) << endl;
}
fs.getline(buff,1024);
}
istringstream is(buff);
is >> org_pm_with_19 >> charge;
if (_charge>=0)
charge = _charge;
// charge =0;
if (charge == 0)
{
m_over_z = org_pm_with_19;
}
else
m_over_z = (org_pm_with_19 + (charge - 1) * 1.0078) / charge;
if (verbose)
cout << org_pm_with_19 << " " << charge << endl;
while (fs.getline(buff,1024))
{
istringstream is(buff);
Peak p;
is >> p.mass >> p.intensity;
if (p.mass <0 || p.intensity==0) // the peak probably got rounded down
continue;
peaks.push_back(p);
total_original_intensity+=p.intensity;
if (verbose)
cout << p.mass << " " << p.intensity << endl;
}
if (peaks.size() >0)
{
min_peak_mass = peaks[0].mass -1; // margin 1 Dalton
max_peak_mass = peaks[peaks.size()-1].mass +1;
}
else
return false;
if (charge>=2)
{
max_mass = org_pm_with_19 > max_peak_mass ?
org_pm_with_19 : max_peak_mass;
}
else
max_mass = max_peak_mass;
max_mass += 11.0; // margin of error
size_idx = config->calc_size_idx(charge,org_pm_with_19);
return true;
}
bool Spectrum::read_from_MGF_stream(Config *_config, FILE *stream, int _charge, bool verbose)
{
config = _config;
total_original_intensity=0;
char buff[1024];
peaks.clear();
/* while (1)
{
if( ! fgets(buff, 1024, stream) )
return false;
if (strlen(buff)<3)
continue;
if (strncmp(buff,"BEGIN IONS",10) )
continue;
break;
}*/
scan_number=-1;
retention_time=-1;
cluster_size=-1;
charge=-1;
m_over_z=-1;
org_pm_with_19=-1;
peaks.clear();
// read header info and first peak
while (1)
{
if( ! fgets(buff, 1024, stream))
return false;
if (! strncmp(buff,"END IONS",7))
return false;
if (! strncmp(buff,"TITLE=",6) )
{
buff[strlen(buff)-1]='\0';
file_name = buff+6;
continue;
}
else
if (! strncmp(buff,"SEQ=",4) )
{
string seq_string(buff+4);
peptide.parse_from_string(config,seq_string);
peptide.calc_mass(config);
continue;
}
else
if (! strncmp(buff,"SCAN=",5) )
{
if (sscanf(buff+5,"%d",&scan_number) != 1)
{
cout << "Error: couldn't read scan number!" << endl;
exit(1);
}
continue;
}
else
if (! strncmp(buff,"RT=",3) )
{
if (sscanf(buff+3,"%f",&retention_time) != 1)
{
cout << "Error: couldn't read retention_time!" << endl;
exit(1);
}
continue;
}
else
if (! strncmp(buff,"CLUSTER_SIZE=",13) )
{
if (sscanf(buff+13,"%d",&cluster_size) != 1)
{
cout << "Error: couldn't read cluster size!" << endl;
exit(1);
}
continue;
}
else
if ( ! strncmp(buff,"CHARGE=",6))
{
if (sscanf(buff,"CHARGE=%d",&charge) != 1)
{
cout << "Error: couldn't read charge!" << endl;
return false;
}
}
else
if (! strncmp(buff,"PEPMASS=",8))
{
istringstream is(buff+8);
is >> m_over_z;
if (m_over_z < 0)
{
cout << "Error: reading pepmass:" << m_over_z << endl;
return false;
}
}
else
{
istringstream is(buff);
Peak p;
is >> p.mass >> p.intensity;
if (p.mass >0 && p.intensity>0)
{
peaks.push_back(p);
total_original_intensity+=p.intensity;
break;
}
}
}
if (_charge > 0)
charge = _charge;
org_pm_with_19 = m_over_z * charge + MASS_PROTON * (1 - charge);
if (org_pm_with_19<0)
org_pm_with_19=-1;
// read rest of peaks
while (fgets(buff, 1024, stream))
{
istringstream is(buff);
Peak p;
if (! strncmp("END IONS",buff,7))
break;
is >> p.mass >> p.intensity;
if (p.mass <0 || p.intensity==0) // the peak probably got rounded down
{
continue;
}
peaks.push_back(p);
total_original_intensity+=p.intensity;
if (verbose)
cout << p.mass << " " << p.intensity << endl;
}
if (peaks.size() >0)
{
min_peak_mass = peaks[0].mass -1; // margin 1 Dalton
max_peak_mass = peaks[peaks.size()-1].mass +1;
}
else
return false;
max_mass = org_pm_with_19 > max_peak_mass ?
org_pm_with_19 : max_peak_mass;
max_mass += 11.0; // margin of error
size_idx = config->calc_size_idx(charge,org_pm_with_19);
if (peptide.get_num_aas()>0 && charge>0)
{
mass_t org_diff = org_pm_with_19 - peptide.get_mass() - 19.0183;
if (fabs(org_diff)>7.0)
{
// try and correct charge!
for (charge=1; charge<=4; charge++)
{
mass_t new_org_pm_with_19 = m_over_z * charge + MASS_PROTON * (1 - charge);
mass_t diff = fabs(new_org_pm_with_19 - peptide.get_mass() - MASS_OHHH);
if (diff<6)
{
org_pm_with_19 = new_org_pm_with_19;
return true;
}
}
cout << "Error: sequence mass doesn't add up: " << file_name << " (diff: "
<< setprecision(3) << fixed << org_diff << ")" << endl;
cout << "Pepitde: " << peptide.as_string(config) << endl;
cout << "Mass Cys = " << config->get_session_tables().get_aa2mass(Cys) << endl;
exit(1);
}
}
return true;
}
bool Spectrum::read_from_peak_arrays(Config* _config, Peptide& _pep, mass_t _pm_with_19,
int _charge, int _num_peaks, mass_t *_masses, intensity_t *_intensities)
{
peaks.clear();
config = _config;
file_name = "xxx";
peptide =_pep;
org_pm_with_19 = _pm_with_19;
charge = _charge;
m_over_z = (org_pm_with_19 + (charge - 1) * MASS_PROTON ) / charge;
peaks.resize(_num_peaks);
int i;
for (i=0; i<_num_peaks; i++)
{
peaks[i].mass = _masses[i];
peaks[i].intensity = _intensities[i];
total_original_intensity+=_intensities[i];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -