📄 spectrum.cpp
字号:
#include "Spectrum.h"
#include "Isotopes.h"
#include "QuickClustering.h"
#include "FileManagement.h"
#include "auxfun.h"
void Spectrum::init_spectrum(bool perform_filtering)
{
join_adjacent_peaks();
if (perform_filtering)
filter_peaks();
init_index_array();
calc_ranks();
calc_log_local_ranks();
normalize_intensities();
calc_isotope_levels();
select_strong_peaks();
set_log_random_probs();
}
/*********************************************************************
Initializes the index array.
For each rounded off Dalton m, it gives the index of the closest peak i
with mass m_i>= m.
**********************************************************************/
void Spectrum::init_index_array()
{
int i,c,size=(int)max_mass;
const int max_peak_idx = peaks.size()-1;
index_array.clear();
index_array.resize(size);
i=0;
int m=(int)peaks[0].mass;
while (i<m)
index_array[i++]=0;
c=0;
while (c< max_peak_idx)
{
int curr_m=(int)peaks[c].mass;
int next_m = curr_m;
int next_c = c;
while (next_m == curr_m && next_c<max_peak_idx)
next_m=(int)peaks[++next_c].mass;
while (i<next_m)
index_array[i++]=c;
c=next_c;
}
while (i<size)
index_array[i++]=max_peak_idx;
}
/**********************************************************************
It then joins all pairs that are less than max_proximity away from each
other.
**********************************************************************/
void Spectrum::join_adjacent_peaks()
{
vector<Peak> new_peaks;
mass_t max_proximity = config->get_tolerance() * 0.5;
int i;
if (peaks.size() == 0)
return;
new_peaks.clear();
new_peaks.push_back(peaks[0]);
int prev_idx=0;
for (i=1; i<peaks.size(); i++)
{
if (peaks[i].mass - new_peaks[prev_idx].mass < max_proximity)
{
// join peaks with proportion to their intensities
int tt = new_peaks.size();
intensity_t inten_sum=(new_peaks[prev_idx].intensity + peaks[i].intensity);
mass_t ratio = new_peaks[prev_idx].intensity/inten_sum;
mass_t new_mass = ratio *new_peaks[prev_idx].mass + (1-ratio)*peaks[i].mass;
new_peaks[prev_idx].intensity = inten_sum;
new_peaks[prev_idx].mass = new_mass;
}
else
{
new_peaks.push_back(peaks[i]);
prev_idx++;
}
}
// realocate peak memory
if (new_peaks.size() != peaks.size())
peaks = new_peaks;
}
/*******************************************************************
Filters the number of peaks in the spectra. Keeps only the highest
intensity peaks in the window. Uses values for number of peaks per
windows of 100 Da. from the Config.
Also keeps pairs of peaks that add up to mass+20 (b,y pairs)
Also keeps neutral losses for kept peaks (-H2O and -NH3)
********************************************************************/
void Spectrum::filter_peaks()
{
const mass_t max_allowed_peak_mass = max_mass;
const mass_t window_size = 0.5 * config->get_local_window_size();
const int num_peaks_in_window = config->get_max_number_peaks_per_local_window();
const mass_t tolerance = config->get_tolerance();
int i,j,min_idx,max_idx;
vector<bool> keep_peaks;
int new_num_peaks=0;
vector<Peak> new_peaks;
if (peaks.size()<5)
return;
int max_peak_idx = peaks.size() -1;
keep_peaks.resize(peaks.size(),false);
// keep first peak and last peak
keep_peaks[0]=true;
if (peaks[max_peak_idx].mass<max_allowed_peak_mass)
keep_peaks[max_peak_idx]=true;
min_idx=1;
max_idx=1;
// check the rest of the peaks
for (i=1; i<max_peak_idx; i++)
{
mass_t peak_mass=peaks[i].mass;
mass_t min_mass=peaks[min_idx].mass;
mass_t max_mass=peaks[max_idx].mass;
if (peaks[i].mass > max_allowed_peak_mass)
break;
// advance min/max pointers
while (peak_mass-min_mass > window_size)
min_mass=peaks[++min_idx].mass;
while (max_idx < max_peak_idx && max_mass - peak_mass <= window_size)
max_mass=peaks[++max_idx].mass;
if (max_mass - peak_mass > window_size)
max_idx--;
// this peak might already be marked for keeping (isotpoic peak)
if (keep_peaks[i])
continue;
// if there are less than the maximum number of peaks in the window, keep it.
if (max_idx-min_idx < num_peaks_in_window)
{
keep_peaks[i]=true;
continue;
}
// check if this is one of the top peaks in the window
int higher_count=0;
for (int j=min_idx; j<=max_idx; j++)
if (peaks[j].intensity > peaks[i].intensity)
higher_count++;
if (higher_count < num_peaks_in_window)
{
keep_peaks[i]=true;
}
}
// look for b/y pairs
mass_t pm_with_20 = (corrected_pm_with_19>0 ? corrected_pm_with_19 :
org_pm_with_19)+MASS_PROTON ;
mass_t pm_with_20_upper = pm_with_20 + tolerance;
mass_t pm_with_20_lower = pm_with_20 - tolerance;
int f_idx =0;
int b_idx = peaks.size()-1;
while (f_idx<peaks.size() && b_idx>=0)
{
if (! keep_peaks[f_idx])
{
f_idx++;
continue;
}
while (b_idx>=0 && peaks[f_idx].mass + peaks[b_idx].mass > pm_with_20_upper )
b_idx--;
if (b_idx<0)
break;
mass_t mass_sum = peaks[f_idx].mass + peaks[b_idx].mass;
if (mass_sum > pm_with_20_lower && mass_sum < pm_with_20_upper)
{
keep_peaks[f_idx]=true;
keep_peaks[b_idx]=true;
}
f_idx++;
}
// look for -H2O -NH3 peaks
// 17.0265, 18.0105
const mass_t frag_tolerance = tolerance * 0.6;
int p_idx = 0;
while (p_idx<peaks.size())
{
if (! keep_peaks[p_idx])
{
p_idx++;
continue;
}
const mass_t upper_H2O = peaks[p_idx].mass + MASS_H2O + frag_tolerance;
const mass_t lower_H2O = peaks[p_idx].mass + MASS_H2O - frag_tolerance;
const mass_t upper_NH3 = peaks[p_idx].mass + MASS_NH3 + frag_tolerance;
const mass_t lower_NH3 = peaks[p_idx].mass + MASS_NH3 - frag_tolerance;
int f_idx;
for (f_idx=p_idx+1; f_idx<peaks.size(); f_idx++)
{
if (peaks[f_idx].mass>upper_H2O)
break;
if (peaks[f_idx].mass>lower_H2O)
{
keep_peaks[f_idx]=true;
break;
}
if (peaks[f_idx].mass>=lower_NH3 && peaks[f_idx].mass<=upper_NH3)
keep_peaks[f_idx]=true;
}
p_idx++;
}
new_num_peaks=0;
for (i=0; i<peaks.size(); i++)
if (keep_peaks[i])
new_num_peaks++;
new_peaks.resize(new_num_peaks);
j=0;
for (i=0; i<peaks.size(); i++)
if (keep_peaks[i])
new_peaks[j++]=peaks[i];
peaks=new_peaks;
if (peaks.size() >0)
{
min_peak_mass = peaks[0].mass -1;
max_peak_mass = peaks[peaks.size()-1].mass +1;
}
}
struct peak_pair {
bool operator< (const peak_pair& other) const
{
return inten>other.inten;
}
int idx;
intensity_t inten;
};
void Spectrum::calc_ranks()
{
int i;
vector<peak_pair> pairs;
pairs.resize(peaks.size());
for (i=0; i<peaks.size(); i++)
{
pairs[i].idx=i;
pairs[i].inten=peaks[i].intensity;
}
sort(pairs.begin(), pairs.end());
for (i=0; i<pairs.size(); i++)
peaks[pairs[i].idx].rank=i+1;
}
/*********************************************************************
// gives each peak it log local rank
// good be done more effciently...
**********************************************************************/
void Spectrum::calc_log_local_ranks()
{
const mass_t half_window_size = config->get_local_window_size() * 0.5;
const int num_peaks = peaks.size();
int i;
for (i=0; i<num_peaks; i++)
{
int j, above=0;
PeakRange pr= get_peaks_in_range(peaks[i].mass - half_window_size,
peaks[i].mass + half_window_size);
mass_t peak_mass = peaks[i].mass;
for (j=pr.low_idx; j<=pr.high_idx; j++)
if (peaks[j].intensity>peaks[i].intensity)
above++;
peaks[i].log_local_rank = log(1.0 + (float)above);
}
}
/**************************************************************
Calculates the normalized intensity for each peaks.
The normalization is done so that the new sum of all intensities
equals 1000
***************************************************************/
void Spectrum::normalize_intensities()
{
int i;
if (! config->get_need_to_normalize())
return;
// remove the intensity of pm+20 / 2 and pm+2/2 from the total intensity
// if this is an issue, normalization might have to be done twice,
// before and after parent mass correction
intensity_t total_intensity = this->total_original_intensity;
const float norm_val = 1000.0 / total_intensity;
for (i=0; i<peaks.size(); i++)
{
peaks[i].intensity *= norm_val;
peaks[i].log_intensity = log(1.0 + peaks[i].intensity);
}
}
struct rank_pair {
bool operator< (const rank_pair& other) const
{
return inten > other.inten;
}
int idx;
intensity_t inten;
};
/**************************************************************
Sets the iso_level for each peak. Iso level 0 means that there
is no evidence that this peaks is an isotopic peak. The higher
the level, the more this looks like an isotopic peak
***************************************************************/
void Spectrum::calc_isotope_levels()
{
int i;
const mass_t iso_tolerance = (config->get_tolerance()<0.2) ?
config->get_tolerance() : 0.2;
const int last_peak_idx = peaks.size()-1;
for (i=0; i<last_peak_idx; i++)
{
if (peaks[i].iso_level>0)
continue;
// look for +1 peak
int idx1 = get_max_inten_peak(peaks[i].mass + 1.0033, iso_tolerance);
if (idx1<0)
continue;
float one_over_intensity = 1.0 / peaks[i].intensity;
float ratio1 = peaks[idx1].intensity * one_over_intensity;
// ignore strong +1
if ( ratio1 > 3)
continue;
// examine ratios
vector<float> expected_ratios, observed_ratios, relative_ratios;
vector<int> iso_idxs;
observed_ratios.resize(6);
observed_ratios[0]=1.0;
observed_ratios[1]= ratio1;
iso_idxs.resize(6);
iso_idxs[0]=i;
iso_idxs[1]=idx1;
// find additional peaks
int j;
for (j=2; j<=5; j++)
{
int idx = get_max_inten_peak(peaks[i].mass + j, iso_tolerance);
if (idx<0)
break;
observed_ratios[j] = peaks[idx].mass * one_over_intensity;
iso_idxs[j]=idx;
}
int last_iso = j-1;
// get expected iso ratios
calc_expected_iso_ratios(peaks[i].mass,expected_ratios,j);
// calc ratios between observed and expected
relative_ratios.resize(j);
relative_ratios[0]=1;
for (j=1; j<=last_iso; j++)
relative_ratios[j]=observed_ratios[j] / expected_ratios[j];
peaks[i].iso_level=0;
float level_mul=1.0;
for (j=1; j<= last_iso; j++)
{
float iso_level;
if (relative_ratios[j]>= 0.75 && relative_ratios[j]<=1.333)
{
iso_level=2.0;
}
else if (relative_ratios[j] >= 0.5 && relative_ratios[j] <=2)
{
iso_level=1.3333;
}
else if (relative_ratios[j] >= 0.3333 && relative_ratios[j] <=3)
{
iso_level=0.6666;
}
else if (relative_ratios[j] >= 0.25 && relative_ratios[j] <= 4)
{
iso_level=0.3333;
}
else
break;
// if (relative_ratios[j] / relative_ratios[j-1] > 3)
// break;
peaks[iso_idxs[j]].iso_level = peaks[iso_idxs[j-1]].iso_level + level_mul * iso_level;
level_mul *= 0.5;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -