⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 spectrum.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 3 页
字号:
	}

	vector<rank_pair> mono_pairs, iso_pairs;
	for (i=0; i<peaks.size(); i++)
	{
		rank_pair p;
		p.idx=i;
		p.inten = peaks[i].intensity;
		if (peaks[i].iso_level == 0)
		{
			mono_pairs.push_back(p);
		}
		else
			iso_pairs.push_back(p);

	}

	sort(mono_pairs.begin(),mono_pairs.end());
	sort(iso_pairs.begin(),iso_pairs.end());

	ranked_mono_peaks.resize(peaks.size());
	
	for (i=0; i<mono_pairs.size(); i++)
		ranked_mono_peaks[i]=mono_pairs[i].idx;

	for (i=0; i<iso_pairs.size(); i++)
		ranked_mono_peaks[mono_pairs.size()+i]=iso_pairs[i].idx;

}



/*****************************************************************
Find the strongest peaks by compairng their log local rank
to the number from the config file
******************************************************************/
void Spectrum::select_strong_peaks()
{
	int i;
	float thresh_log_level = log((float)(config->get_number_of_strong_peaks_per_local_window()));

	strong_peak_idxs.clear();
	for (i=0 ;i<peaks.size(); i++)
		if (peaks[i].log_local_rank<=thresh_log_level && peaks[i].iso_level == 0)
			strong_peak_idxs.push_back(i);
}

/***********************************************************************
calcs for each peak the probability of observing it at random (based on
 the neighbor's distribution. Assumes the log_intens are distributed
 according to a normal disribution.
************************************************************************/
void Spectrum::set_log_random_probs()
{
	if (peaks.size()<2)
		return;
	const float log_add = log(1.2);
	const float one_over_sqr_2pi = 1.0 / sqrt(2*3.1415927);
	const mass_t peak_window_size = 0.6; // this is fixed and independent of the tolerance!
	const mass_t margin      = 25.0;
	const mass_t window_size = 100.0;
	const mass_t min_mass = peaks[0].mass;
	const mass_t max_mass = peaks[peaks.size()-1].mass;
	const mass_t viz_range = (max_mass - min_mass);
	
	int i;
	for (i=0; i<peaks.size(); i++)
	{
		const mass_t peak_mass    = peaks[i].mass;
		const mass_t rel_position = (peak_mass-min_mass)/viz_range;
		const mass_t left_window  = margin + rel_position * window_size;
		const mass_t right_window = margin + window_size - left_window;
		const PeakRange pr = get_peaks_in_range(peak_mass-left_window,peak_mass+right_window);

		const float peak_window_prob = peak_window_size /(left_window + right_window);
		const float zero_prob = pow((1.0 - peak_window_prob),pr.num_peaks);

	//	peaks[i].log_rand_prob = log(1 - zero_prob);
		if (pr.num_peaks<5)
		{
			peaks[i].log_rand_prob = log(1.0-zero_prob) + log_add;
		}
		else
		{
			vector<float> log_intens;
			int j;
			for (j=pr.low_idx; j<=pr.high_idx; j++)
				log_intens.push_back(peaks[j].log_intensity);
		
			float mean=0,sd=1;
			calc_mean_sd(log_intens,&mean,&sd);
			const float e = (peaks[i].log_intensity - mean)/sd;
			if (e<0)
			{
				peaks[i].log_rand_prob = log(1 - zero_prob) + log_add;
			}
			else
			{
				const float norm = (one_over_sqr_2pi/ sd) * exp(-0.5*e*e);
				const float norm_const = (1 - zero_prob) / (one_over_sqr_2pi/ sd); //
				peaks[i].log_rand_prob = log(norm*norm_const);
			}
		} 

	//	cout << i << fixed << setprecision(2) << "\t" << peaks[i].mass << "\t" << peaks[i].log_intensity <<
	//		"\t" << peaks[i].rank << "\t" << pr.num_peaks << "\t" <<
	//		setprecision(4) << exp(peaks[i].log_rand_prob) << "\t( 0 prob " << zero_prob <<" )" << endl;
	}
}
								 


/**************************************************************
Sets all peaks that have iso level=0 as strong
***************************************************************/
void Spectrum::mark_all_non_iso_peaks_as_strong()
{
	int i;
	strong_peak_idxs.clear();
	for (i=0 ;i<peaks.size(); i++)
		if (peaks[i].iso_level == 0)
			strong_peak_idxs.push_back(i);
}



/****************************************************************
Calculates different pm_with_19 values, for different charges
(calculates assumed pm_with_19 from the m/z value)
*****************************************************************/
void Spectrum::calc_original_pm_with_19_for_different_charges(
								   vector<mass_t>& pms_with_19) const
{
	int i;
	mass_t m_over_z = (org_pm_with_19 - MASS_PROTON) / charge;

	pms_with_19.resize(5);
	pms_with_19[0]=-1;
	for (i=1; i<=4; i++)
		pms_with_19[i] = m_over_z * i + MASS_PROTON;
}


/*********************************************************
Reads the spectrum info from a dta file.
returns false if there was a problem
**********************************************************/
bool Spectrum::read_from_dta( Config* _config, const char *dta_name, 
							 int _charge , bool verbose)
{
	ifstream fs(dta_name,ios::in);
	if (! fs)
		return false;

	peaks.clear();
	config = _config;
	file_name = dta_name;

	if (verbose)
		cout << "Reading: " << file_name << endl;

	char buff[1024];

	fs.getline(buff,1024);
	if (fs.bad())
		return false;

	while (buff[0] =='#') 
	{
		istringstream is(buff);
		string arg1,arg2;

		is >> arg1 >> arg2;

		if (! arg1.compare(0,5,"#FILE"))
		{
			file_name = arg2;
			if (verbose)
				cout << "#FILE " << file_name << endl;
		}
		else 
		if (! arg1.compare(0,4,"#SEQ"))
		{
			peptide.parse_from_string(config,arg2);
			if (verbose)
				cout << "#SEQ " << peptide.as_string(config) << endl;

		}
		fs.getline(buff,1024);
	}
	
	istringstream is(buff);
	is >> org_pm_with_19 >> charge;

	if (_charge>=0)
		charge = _charge;

//	charge =0;

	if (charge == 0)
	{
		m_over_z = org_pm_with_19;
	}
	else
		m_over_z = (org_pm_with_19 + (charge - 1) * 1.0078)  / charge;

	if (verbose)
		cout << org_pm_with_19 << " " << charge << endl;


	while (fs.getline(buff,1024))
	{
		istringstream is(buff);
		Peak p;
		is >> p.mass >> p.intensity;
	
		if (p.mass <0 || p.intensity==0)   // the peak probably got rounded down
			continue;

		peaks.push_back(p);
		total_original_intensity+=p.intensity;

		if (verbose)
			cout << p.mass << " " << p.intensity << endl;
	}

	if (peaks.size() >0)
	{
		min_peak_mass = peaks[0].mass -1; // margin 1 Dalton
		max_peak_mass = peaks[peaks.size()-1].mass +1;
	}
	else
		return false;


	if (charge>=2)
	{
		max_mass =  org_pm_with_19 > max_peak_mass ? 
					org_pm_with_19 : max_peak_mass;
	}
	else
		max_mass = max_peak_mass;

	max_mass += 11.0; // margin of error

	size_idx = config->calc_size_idx(charge,org_pm_with_19);

	return true;
}





bool Spectrum::read_from_MGF_stream(Config *_config, FILE *stream, int _charge, bool verbose)
{
	config = _config;
	total_original_intensity=0;
	char buff[1024];

	peaks.clear();
	
/*	while (1)
	{
		if( ! fgets(buff, 1024, stream) )
			return false;

		if (strlen(buff)<3)
			continue;

		if (strncmp(buff,"BEGIN IONS",10) )
			continue;

		break;
	}*/
	scan_number=-1;
	retention_time=-1;
	cluster_size=-1;
	charge=-1;
	m_over_z=-1;
	org_pm_with_19=-1;

	peaks.clear();

	// read header info and first peak
	while (1)
	{
		if( ! fgets(buff, 1024, stream))
			return false;

		if (! strncmp(buff,"END IONS",7))
			return false;

		if (! strncmp(buff,"TITLE=",6) )
		{
			buff[strlen(buff)-1]='\0';
			file_name = buff+6;
			continue;
		}
		else
		if (! strncmp(buff,"SEQ=",4) )
		{
			string seq_string(buff+4);
			peptide.parse_from_string(config,seq_string);
			peptide.calc_mass(config);
			continue;		
		}
		else
		if (! strncmp(buff,"SCAN=",5) )
		{
			if (sscanf(buff+5,"%d",&scan_number) != 1)
			{
				cout << "Error: couldn't read scan number!" << endl;
				exit(1);
			}
			continue;
		}
		else
		if (! strncmp(buff,"RT=",3) )
		{
			if (sscanf(buff+3,"%f",&retention_time) != 1)
			{
				cout << "Error: couldn't read retention_time!" << endl;
				exit(1);
			}
			continue;
		}
		else
		if (! strncmp(buff,"CLUSTER_SIZE=",13) )
		{
			if (sscanf(buff+13,"%d",&cluster_size) != 1)
			{
				cout << "Error: couldn't read cluster size!" << endl;
				exit(1);
			}
			continue;
		}
		else	
		if ( ! strncmp(buff,"CHARGE=",6))
		{
			if (sscanf(buff,"CHARGE=%d",&charge) != 1)
			{
				cout <<  "Error: couldn't read charge!" << endl;
				return false;
			}
		}
		else
		if (! strncmp(buff,"PEPMASS=",8))
		{
			istringstream is(buff+8);
			is >> m_over_z;
			
			if (m_over_z < 0)
			{
				cout << "Error: reading pepmass:" << m_over_z << endl;
				return false;
			}		
		}
		else
		{
			istringstream is(buff);
			Peak p;
	
			is >> p.mass >> p.intensity;

			if (p.mass >0 && p.intensity>0)
			{
				peaks.push_back(p);
				total_original_intensity+=p.intensity;
				break;
			}
		}
	}



	if (_charge > 0)
			charge = _charge;

	org_pm_with_19 = m_over_z * charge + MASS_PROTON * (1 - charge);
	if (org_pm_with_19<0)
		org_pm_with_19=-1;



	// read rest of peaks
	
	while (fgets(buff, 1024, stream))
	{
		istringstream is(buff);
		Peak p;

		if (! strncmp("END IONS",buff,7))
			break;

		is >> p.mass >> p.intensity;
	
		if (p.mass <0 || p.intensity==0)   // the peak probably got rounded down
		{
			continue;
		}

		peaks.push_back(p);
		total_original_intensity+=p.intensity;

		if (verbose)
			cout << p.mass << " " << p.intensity << endl;
	}

	if (peaks.size() >0)
	{
		min_peak_mass = peaks[0].mass -1; // margin 1 Dalton
		max_peak_mass = peaks[peaks.size()-1].mass +1;
	}
	else
		return false;


	max_mass =      org_pm_with_19 > max_peak_mass ? 
					org_pm_with_19 : max_peak_mass;

	max_mass += 11.0; // margin of error

	size_idx = config->calc_size_idx(charge,org_pm_with_19);

	if (peptide.get_num_aas()>0 && charge>0)
	{
		mass_t org_diff = org_pm_with_19 - peptide.get_mass() - 19.0183;

		if (fabs(org_diff)>7.0)
		{
			// try and correct charge!
			for (charge=1; charge<=4; charge++)
			{

				mass_t new_org_pm_with_19 =  m_over_z * charge + MASS_PROTON * (1 - charge);
				mass_t diff = fabs(new_org_pm_with_19 - peptide.get_mass() - MASS_OHHH);
				if (diff<6)
				{
					org_pm_with_19 = new_org_pm_with_19;
					return true;
				}
			}
		
			cout << "Error: sequence mass doesn't add up: " << file_name << " (diff: "
				 << setprecision(3) << fixed << org_diff << ")" << endl;
			cout << "Pepitde: " << peptide.as_string(config) << endl;
			cout << "Mass Cys = " << config->get_session_tables().get_aa2mass(Cys) << endl;
			exit(1);
		}
	}

	return true;
}


bool Spectrum::read_from_peak_arrays(Config* _config, Peptide& _pep, mass_t _pm_with_19, 
		int _charge, int _num_peaks, mass_t *_masses, intensity_t *_intensities)
{
	peaks.clear();
	config = _config;
	file_name = "xxx";

	peptide =_pep;
	org_pm_with_19 = _pm_with_19;
	charge = _charge;
	m_over_z = (org_pm_with_19 + (charge - 1) * MASS_PROTON ) / charge;

	peaks.resize(_num_peaks);
	int i;
	for (i=0; i<_num_peaks; i++)
	{
		peaks[i].mass = _masses[i];
		peaks[i].intensity = _intensities[i];
		total_original_intensity+=_intensities[i];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -