⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 advancedscoremodel_fill.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 4 页
字号:
#include "AdvancedScoreModel.h"


struct FragStats {
	FragStats() : frag_idx(NEG_INF), is_viz(false), has_intensity(false), peak_idx(NEG_INF),
					   mass(NEG_INF), log_intensity(NEG_INF), log_local_rank(NEG_INF), log_global_rank(NEG_INF) {};

	void fill_from_breakage(const Breakage *breakage, Spectrum *spec, int f)
	{
		frag_idx = f;
		if (breakage->is_frag_type_visible(f))
		{
			is_viz=true;
			const int pos = breakage->get_position_of_frag_idx(f);
			if (pos>=0)
			{
				has_intensity=true;
				peak_idx = breakage->fragments[pos].peak_idx;

				const Peak& peak = spec->get_peak(peak_idx);
				mass		    = peak.mass;
				iso_level	    = peak.iso_level;
				log_intensity   = peak.log_intensity;
				log_local_rank  = peak.log_local_rank;
				log_global_rank = log(1.0+(float)peak.rank);
			}
		}
	}

	int frag_idx;
	bool is_viz;
	bool has_intensity;
	int    peak_idx;
	mass_t mass;
	float  iso_level;
	float  log_intensity;
	float  log_local_rank;
	float  log_global_rank;
};

void StrongFragModel::fill_constant_vals(
							   Spectrum *spec, 
							   mass_t pm_with_19,  
							   const Breakage *breakage, 
							   vector<fval>& f_vals) const
{
	const mass_t iso_tolerance = spec->get_config()->get_tolerance()*0.5;
	FragStats frag_stats;
	FragStats parent1_stats, parent2_stats;
	FragStats mirror1_stats, mirror2_stats;

	frag_stats.fill_from_breakage(breakage,spec,model_frag_idx);
	if (parent1_idx>=0)
		parent1_stats.fill_from_breakage(breakage,spec,parent1_idx);
	if (parent2_idx>=0)
		parent2_stats.fill_from_breakage(breakage,spec,parent2_idx);
	if (mirror1_idx>=0)
		mirror1_stats.fill_from_breakage(breakage,spec,mirror1_idx);
	if (mirror2_idx>=0)
		mirror2_stats.fill_from_breakage(breakage,spec,mirror2_idx);

	f_vals.clear();
	if (frag_stats.has_intensity) // fill features for visible fragment
	{
		f_vals.push_back(fval(SI_CONST,1.0));

		const float log_inten = frag_stats.log_intensity;

		// Mirror1 features
		if (mirror1_idx>=0)
		{
			if (mirror1_stats.is_viz)
			{
				f_vals.push_back(fval(SI_IND_MIRROR1_VIZ,1.0));
				if (mirror1_stats.has_intensity)
				{
					f_vals.push_back(fval(SI_IND_HAS_MIRROR1_INTEN,1.0));
					if (mirror1_stats.iso_level!=0)
						f_vals.push_back(fval(SI_MIRROR1_ISO_LEVEL,mirror1_stats.iso_level));
				
					vector<float> iso_intens;
					spec->get_iso_intens(mirror1_stats.peak_idx, iso_intens, iso_tolerance, mirror1_charge);	
					if (iso_intens[1]>=0)
					{
						f_vals.push_back(fval(SI_IND_MIRROR1_HAS_MINUS_1,1.0));
						f_vals.push_back(fval(SI_MIRROR1_MINUS_1_INTEN_DIFF,mirror1_stats.log_intensity-iso_intens[1]));
						if (iso_intens[0]>=0)
						{
							f_vals.push_back(fval(SI_IND_MIRROR1_HAS_MINUS_2,1.0));
							f_vals.push_back(fval(SI_MIRROR1_MINUS_2_INTEN_DIFF,mirror1_stats.log_intensity-iso_intens[0]));	
						}
					}
					if (iso_intens[2]>=0)
					{
						f_vals.push_back(fval(SI_IND_MIRROR1_HAS_PLUS_1,1.0));
						f_vals.push_back(fval(SI_MIRROR1_PLUS_1_INTEN_DIFF,mirror1_stats.log_intensity-iso_intens[2]));
						if (iso_intens[3]>=0)
						{
							f_vals.push_back(fval(SI_IND_MIRROR1_HAS_PLUS_2,1.0));
							f_vals.push_back(fval(SI_MIRROR1_PLUS_2_INTEN_DIFF,mirror1_stats.log_intensity-iso_intens[3]));	
						}
					}
						

					const mass_t sum_masses = frag_stats.mass * model_frag_charge + 
										mirror1_stats.mass * mirror1_charge;

					const mass_t offset = fabs(pm_with_19 - sum_masses + (model_frag_charge + mirror1_charge -1)*MASS_PROTON);
					const float offset_level = offset * one_over_tolerance;

					if (offset_level<0.25)
					{
						f_vals.push_back(fval(SI_MIRROR1_MASS_DIFF25,1.0));
					} 
					else if (offset_level<0.75)
					{
						f_vals.push_back(fval(SI_MIRROR1_MASS_DIFF75,1.0));
					}
					else
						f_vals.push_back(fval(SI_MIRROR1_MASS_DIFF_LARGE,1.0));
				}
			}
			else
				f_vals.push_back(fval(SI_IND_MIRROR1_NOT_VIZ,1.0));
		}

		// Mirror2 features
		if (mirror2_idx>=0)
		{
			if (mirror2_stats.is_viz)
			{
				f_vals.push_back(fval(SI_IND_MIRROR2_VIZ,1.0));
				if (mirror2_stats.has_intensity)
				{
					f_vals.push_back(fval(SI_IND_HAS_MIRROR2_INTEN,1.0));
					if (mirror2_stats.iso_level != 0)
						f_vals.push_back(fval(SI_MIRROR2_ISO_LEVEL,mirror2_stats.iso_level));

					vector<float> iso_intens;
					spec->get_iso_intens(mirror2_stats.peak_idx, iso_intens, iso_tolerance, mirror2_charge);
					if (iso_intens[1]>=0)
					{
						f_vals.push_back(fval(SI_IND_MIRROR2_HAS_MINUS_1,1.0));
						f_vals.push_back(fval(SI_MIRROR2_MINUS_1_INTEN_DIFF,mirror2_stats.log_intensity-iso_intens[1]));
						if (iso_intens[0]>=0)
						{
							f_vals.push_back(fval(SI_IND_MIRROR2_HAS_MINUS_2,1.0));
							f_vals.push_back(fval(SI_MIRROR2_MINUS_2_INTEN_DIFF,mirror2_stats.log_intensity-iso_intens[0]));	
						}
					}
					if (iso_intens[2]>=0)
					{
						f_vals.push_back(fval(SI_IND_MIRROR2_HAS_PLUS_1,1.0));
						f_vals.push_back(fval(SI_MIRROR2_PLUS_1_INTEN_DIFF,mirror2_stats.log_intensity-iso_intens[2]));
						if (iso_intens[3]>=0)
						{
							f_vals.push_back(fval(SI_IND_MIRROR2_HAS_PLUS_2,1.0));
							f_vals.push_back(fval(SI_MIRROR2_PLUS_2_INTEN_DIFF,mirror2_stats.log_intensity-iso_intens[3]));	
						}
					}



					const mass_t sum_masses = frag_stats.mass * model_frag_charge + 
										mirror2_stats.mass * mirror2_charge;

					const mass_t offset = fabs(pm_with_19 - sum_masses + (model_frag_charge + mirror2_charge -1)*MASS_PROTON);
					const float offset_level = offset * one_over_tolerance;

					if (offset_level<0.25)
					{
						f_vals.push_back(fval(SI_MIRROR2_MASS_DIFF25,1.0));
					} 
					else if (offset_level<0.75)
					{
						f_vals.push_back(fval(SI_MIRROR2_MASS_DIFF75,1.0));
					}
					else
						f_vals.push_back(fval(SI_MIRROR2_MASS_DIFF_LARGE,1.0));
				}
			}
			else
				f_vals.push_back(fval(SI_IND_MIRROR2_NOT_VIZ,1.0));
		}

		// Parent 1 features
		if (parent1_idx>=0)
		{
			if (parent1_stats.is_viz)
			{
				f_vals.push_back(fval(SI_IND_PARENT1_VIZ,1.0));
				if (parent1_stats.has_intensity)
				{
					const float inten_diff = parent1_stats.log_intensity - log_inten;
					const mass_t dis_min = parent1_stats.mass - spec->get_min_peak_mass();
					const mass_t dis_max = spec->get_max_peak_mass() - parent1_stats.mass;
					const mass_t dis = (dis_min<dis_max ? dis_min : dis_max);
					
					if (parent1_stats.iso_level != 0)
						f_vals.push_back(fval(SI_PARENT1_ISO_LEVEL,parent1_stats.iso_level));
					if (dis<100)
					{
						f_vals.push_back(fval(SI_IND_PARENT1_LESS_THAN_100_MIN_MAX,1.0));
					} 
					else if (dis<200)
						f_vals.push_back(fval(SI_IND_PARENT1_LESS_THAN_200_MIN_MAX,1.0));

					if (inten_diff>0)
					{
						f_vals.push_back(fval(SI_IND_PARENT1_INTEN_MORE,1.0));
						f_vals.push_back(fval(SI_PARENT1_INTEN_DIFF_MORE,inten_diff));
					}
					else
					{
						f_vals.push_back(fval(SI_IND_PARENT1_INTEN_LESS,1.0));
						f_vals.push_back(fval(SI_PARENT1_INTEN_DIFF_LESS,inten_diff));
					}
				}
				else
					f_vals.push_back(fval(SI_IND_PARENT1_NO_INTEN,1.0));
			}
			else
				f_vals.push_back(fval(SI_IND_PARENT1_NOT_VIZ,1.0));
		}

		// Parent 2 features
		if (parent2_idx>=0)
		{
			if (parent2_stats.is_viz)
			{
				f_vals.push_back(fval(SI_IND_PARENT2_VIZ,1.0));
				if (parent2_stats.has_intensity)
				{
					const float inten_diff = parent2_stats.log_intensity - log_inten;
					const mass_t dis_min = parent2_stats.mass - spec->get_min_peak_mass();
					const mass_t dis_max = spec->get_max_peak_mass() - parent2_stats.mass;
					const mass_t dis = (dis_min<dis_max ? dis_min : dis_max);
					
					if (parent2_stats.iso_level != 0)
						f_vals.push_back(fval(SI_PARENT2_ISO_LEVEL,parent2_stats.iso_level));
					if (dis<100)
					{
						f_vals.push_back(fval(SI_IND_PARENT2_LESS_THAN_100_MIN_MAX,1.0));
					} 
					else if (dis<200)
						f_vals.push_back(fval(SI_IND_PARENT2_LESS_THAN_200_MIN_MAX,1.0));

					if (inten_diff>0)
					{
						f_vals.push_back(fval(SI_IND_PARENT2_INTEN_MORE,1.0));
						f_vals.push_back(fval(SI_PARENT2_INTEN_DIFF_MORE,inten_diff));
					}
					else
					{
						f_vals.push_back(fval(SI_IND_PARENT2_INTEN_LESS,1.0));
						f_vals.push_back(fval(SI_PARENT2_INTEN_DIFF_LESS,inten_diff));
					}
				}
				else
					f_vals.push_back(fval(SI_IND_PARENT2_NO_INTEN,1.0));
			}
			else
				f_vals.push_back(fval(SI_IND_PARENT2_NOT_VIZ,1.0));
		}

		// self intensity
		f_vals.push_back(fval(SI_LOG_LOCAL_RANK,frag_stats.log_local_rank));
		f_vals.push_back(fval(SI_LOG_GLOBAL_RANK,frag_stats.log_global_rank));
		if (frag_stats.iso_level != 0)
			f_vals.push_back(fval(SI_ISO_LEVEL,frag_stats.iso_level));
		
		if (log_inten<1.0)
		{
			f_vals.push_back(fval(SI_IND_LOG_INTEN_LESS1,1.0));
			f_vals.push_back(fval(SI_LOG_INTEN_LESS1,log_inten));
		} 
		else if (log_inten<2.0)
		{
			f_vals.push_back(fval(SI_IND_LOG_INTEN_LESS2,1.0));
			f_vals.push_back(fval(SI_LOG_INTEN_LESS2,log_inten-1.0));
		}
		else if (log_inten<3.0)
		{
			f_vals.push_back(fval(SI_IND_LOG_INTEN_LESS3,1.0));
			f_vals.push_back(fval(SI_LOG_INTEN_LESS3,log_inten-2.0));
		}
		else if (log_inten<4.0)
		{
			f_vals.push_back(fval(SI_IND_LOG_INTEN_LESS4,1.0));
			f_vals.push_back(fval(SI_LOG_INTEN_LESS4,log_inten-3.0));
		}
		else
		{
			f_vals.push_back(fval(SI_IND_LOG_INTEN_MORE,1.0));
			f_vals.push_back(fval(SI_LOG_INTEN_MORE,log_inten-4.0));
		}

		// self distance
		const mass_t dis_min = frag_stats.mass - spec->get_min_peak_mass();
		const mass_t dis_max = spec->get_max_peak_mass() - frag_stats.mass;
		const mass_t dis = (dis_min<dis_max ? dis_min : dis_max);

		if (dis<50)
		{
			f_vals.push_back(fval(SI_IND_DIS_FROM_MINMAX_LESS_50,1.0));
			f_vals.push_back(fval(SI_DIS_FROM_MINMAX0,dis));
			f_vals.push_back(fval(SI_LOG_INTEN_DIS_50,log_inten));
		}
		else if (dis<150)
		{
			f_vals.push_back(fval(SI_IND_DIS_FROM_MINMAX_LESS_150,1.0));
			f_vals.push_back(fval(SI_DIS_FROM_MINMAX50,dis-50.0));
			f_vals.push_back(fval(SI_LOG_INTEN_DIS_150,log_inten));
		}
		else if (dis<250)
		{
			f_vals.push_back(fval(SI_IND_DIS_FROM_MINMAX_LESS_250,1.0));
			f_vals.push_back(fval(SI_DIS_FROM_MINMAX150,dis-150.0));
			f_vals.push_back(fval(SI_LOG_INTEN_DIS_250,log_inten));
		}
		else
		{
			f_vals.push_back(fval(SI_IND_DIS_FROM_MINMAX_MORE,1.0));
			f_vals.push_back(fval(SI_DIS_FROM_MINMAX250,dis-250.0));
			f_vals.push_back(fval(SI_LOG_INTEN_DIS_MORE,log_inten));
		}

		const int rel_pos = int(10*breakage->mass/pm_with_19);
		f_vals.push_back(fval(SI_REL_POS0+rel_pos,1.0));

		const float one_over_model_frag_charge = 1.0 / model_frag_charge;
		const mass_t forward_offsets[]={MASS_NH3,MASS_H2O,MASS_CO,MASS_H2ONH3,MASS_H2OH2O};
		const int num_forward_offsets = sizeof(forward_offsets)/sizeof(mass_t);
		const mass_t peak_mass = frag_stats.mass;

		int t;
		for (t=0; t<num_forward_offsets; t++)
		{
			const int forward_idx = spec->get_max_inten_peak(peak_mass + forward_offsets[t]*one_over_model_frag_charge,frag_tolerance);
			if (forward_idx>0)
			{
				f_vals.push_back(fval(SI_IND_HAS_PLUS_NH3+2*t,1.0));
				f_vals.push_back(fval(SI_IND_HAS_PLUS_NH3+2*t+1,spec->get_peak(forward_idx).log_intensity-log_inten));
			}
		}

		
		const int plus_idx = spec->get_max_inten_peak((peak_mass * model_frag_charge + MASS_PROTON)/(model_frag_charge+1.0),frag_tolerance);
		if (plus_idx>=0)
		{
			f_vals.push_back(fval(SI_IND_HAS_CHARGE_PLUS1,1.0));
			f_vals.push_back(fval(SI_CHARGE_PLUS1_INTEN_DIFF,spec->get_peak(plus_idx).log_intensity-log_inten));
		}

		if (model_frag_charge>1)
		{
			const int minus_idx = spec->get_max_inten_peak((peak_mass * model_frag_charge - MASS_PROTON)/(model_frag_charge-1.0),frag_tolerance);
			if (minus_idx>=0)
			{
				f_vals.push_back(fval(SI_IND_HAS_CHARGE_MINUS1,1.0));
				f_vals.push_back(fval(SI_CHARGE_MINUS1_INTEN_DIFF,spec->get_peak(minus_idx).log_intensity-log_inten));
			}
		}
	}
	else // Fill features for non-visible
	{
		f_vals.push_back(fval(SNI_CONST,1.0));

		// Mirror1 features
		if (mirror1_idx>=0)
		{
			if (mirror1_stats.is_viz)
			{
				f_vals.push_back(fval(SNI_IND_MIRROR1_VIZ,1.0));
				if (mirror1_stats.has_intensity)
				{
					f_vals.push_back(fval(SNI_IND_HAS_MIRROR1_INTEN,1.0));
					if (mirror1_stats.iso_level != 0)
						f_vals.push_back(fval(SNI_MIRROR1_ISO_LEVEL,mirror1_stats.iso_level));

					vector<float> iso_intens;
					spec->get_iso_intens(mirror1_stats.peak_idx, iso_intens, iso_tolerance, mirror1_charge);
					
					if (iso_intens[1]>=0)
					{
						f_vals.push_back(fval(SNI_IND_MIRROR1_HAS_MINUS_1,1.0));
						f_vals.push_back(fval(SNI_MIRROR1_MINUS_1_INTEN_DIFF,mirror1_stats.log_intensity-iso_intens[1]));
						if (iso_intens[0]>=0)
						{
							f_vals.push_back(fval(SNI_IND_MIRROR1_HAS_MINUS_2,1.0));
							f_vals.push_back(fval(SNI_MIRROR1_MINUS_2_INTEN_DIFF,mirror1_stats.log_intensity-iso_intens[0]));	
						}
					}
					if (iso_intens[2]>=0)
					{
						f_vals.push_back(fval(SNI_IND_MIRROR1_HAS_PLUS_1,1.0));
						f_vals.push_back(fval(SNI_MIRROR1_PLUS_1_INTEN_DIFF,mirror1_stats.log_intensity-iso_intens[2]));
						if (iso_intens[3]>=0)
						{
							f_vals.push_back(fval(SNI_IND_MIRROR1_HAS_PLUS_2,1.0));
							f_vals.push_back(fval(SNI_MIRROR1_PLUS_2_INTEN_DIFF,mirror1_stats.log_intensity-iso_intens[3]));	
						}
					}
				}
				else
					f_vals.push_back(fval(SNI_IND_MIRROR1_NO_INTEN,1.0));
			}
			else
				f_vals.push_back(fval(SNI_IND_MIRROR1_NOT_VIZ,1.0));
		}

		// Mirror2 features
		if (mirror2_idx>=0)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -