⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 discretepeakmodel.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 2 页
字号:

				for (t=0; t<all_tables[size_idx][region_idx][f].size(); t++)
				{
					if ( ! all_tables[size_idx][region_idx][f][t].are_relevant_fragments_visible(&breakages[b]) )
						 continue;

					all_tables[size_idx][region_idx][f][t].add_instance(&breakages[b]);
				}
			}
		}
		cout << endl;
	}

	// calculate probabilities and choose the best table for each fragment
	// (the one that has maximal dkl).
	// also assign for each fragment a table of independent probabilities
	// (what you get if the fragment has no parents).
	for (size_idx =0; size_idx < all_tables.size(); size_idx++)
	{
		int region_idx;
		for (region_idx=0; region_idx<all_tables[size_idx].size(); region_idx++)
		{
			const vector<int>& frag_type_idxs = regional_fragments[charge][size_idx][region_idx].get_frag_type_idxs();

			int f;
			for (f=0; f<all_tables[size_idx][region_idx].size(); f++)
			{
				int t;
				vector<double> dkl_sums;
				dkl_sums.resize(all_tables[size_idx][region_idx][f].size(),0);

				for (t=0; t<all_tables[size_idx][region_idx][f].size(); t++)
					all_tables[size_idx][region_idx][f][t].calc_probs();

				vector<score_t> ind_probs;
				ind_probs.resize(num_peak_levels);
				for (t=0; t<num_peak_levels; t++)
					ind_probs[t] = all_tables[size_idx][region_idx][f][0].score_probs[t];

				cout << "Tables for " << config.get_fragment(frag_type_idxs[f]).label << " ( charge: " <<
					charge << "  size: " << size_idx << "  region: " << region_idx << " )" << endl;

				all_tables[size_idx][region_idx][f][0].print_pretty(&config);
				cout<< endl;

				regional_models[charge][size_idx][region_idx].charge = charge;
				regional_models[charge][size_idx][region_idx].size_idx = size_idx;
				regional_models[charge][size_idx][region_idx].region_idx = region_idx;

				// set the independent probs 
				regional_models[charge][size_idx][region_idx].independent_frag_tables[f] =
					all_tables[size_idx][region_idx][f][0];

				// use the independent model if this fragment has no parents
				if (all_tables[size_idx][region_idx][f].size() == 1)
				{
					regional_models[charge][size_idx][region_idx].single_breakage_tables[f] =
						all_tables[size_idx][region_idx][f][0];

					continue;
				}

				vector<idx_dkl> pairs;
				pairs.clear();
				for (t=1; t<all_tables[size_idx][region_idx][f].size(); t++)
				{
					idx_dkl p;

					p.idx = t;
					p.dkl_sum = all_tables[size_idx][region_idx][f][t].calc_dkl_sum(ind_probs);

				//	all_tables[size_idx][region_idx][f][t].print_pretty(&config);	
				//	cout << "DKL SUM: " << all_tables[size_idx][region_idx][f][t].calc_dkl_sum(ind_probs) << endl << endl;
					pairs.push_back(p);
				}

				sort(pairs.begin(),pairs.end());
				int i;
				for (i=0; i<pairs.size() && i<7; i++)
				{
					string name;
					all_tables[size_idx][region_idx][f][pairs[i].idx].make_table_name(&config,name);
					cout << setw(6) << setprecision(4) << pairs[i].dkl_sum << " " << name << endl;
				}
				cout << endl;

				// find combo with highest dkl, might want to give a bit preference to 
				// tables with 1 parent, or strong parents...

				int two_frag_idx=-1, one_frag_idx =-1;
				double max_two_frag_dkl=0, max_one_frag_dkl=0;

				for (i=0; i<pairs.size(); i++)
				{
					const vector<int>& fields = all_tables[size_idx][region_idx][f][pairs[i].idx].get_fields();
					if (fields[1]>=0 && fields[2]>=0)
					{
						if (pairs[i].dkl_sum>max_two_frag_dkl)
						{
							max_two_frag_dkl = pairs[i].dkl_sum;
							two_frag_idx = pairs[i].idx;
						}
					}
					else
					{
						if (pairs[i].dkl_sum>max_one_frag_dkl)
						{
							max_one_frag_dkl = pairs[i].dkl_sum;
							one_frag_idx = pairs[i].idx;
						}
					}
				}

				int selected_table_idx=-1;
				if (max_two_frag_dkl>max_one_frag_dkl && (max_two_frag_dkl/max_one_frag_dkl > 1.1))
				{
					selected_table_idx = two_frag_idx;
				}
				else
					selected_table_idx = one_frag_idx;

				regional_models[charge][size_idx][region_idx].single_breakage_tables[f] =
						all_tables[size_idx][region_idx][f][selected_table_idx];

				string name;
				all_tables[size_idx][region_idx][f][selected_table_idx].make_table_name(&config,name);
				cout << "Selected: " << name << endl << endl; 
			}
		}
	}

	

}



// writes all relevant info:
// thresholds and tables
void DiscretePeakModel::write_score_model(const char *name) const
{
	string file_name = config.get_resource_dir() + "/" + string(name) + "_break_score.txt";
	ofstream os(file_name.c_str());

	if (! os.is_open() || ! os.good())
	{
		cout << "Error: couldn't open score model for writing: " << file_name << endl;
		exit(1);
	}
	write_model_specifics(os);

	int c;
	int max_charge = 0;
	for (c=0; c<regional_models.size(); c++)
		if (regional_models[c].size()>0)
			max_charge = c;

	os << "#MAX_CHARGE " << max_charge << endl;

	for (c=1; c<=max_charge; c++)
	{
		if (regional_models[c].size() == 0)
			continue;

		os << c << " " << regional_models[c].size() << endl; // num sizes
		int size_idx;
		for (size_idx=0; size_idx< regional_models[c].size(); size_idx++)
		{
			os << regional_models[c][size_idx].size() << endl; // num regions
			int region_idx;
			for (region_idx=0; region_idx<regional_models[c][size_idx].size(); region_idx++)
			{
				regional_models[c][size_idx][region_idx].write_regional_model(os);
			}
		}
	}

	os.close();
}



// converts all the probabilities to socres in the tables
void DiscretePeakModel::convert_probs_to_scores()
{
	int c,s,r;

	for (c=0; c<regional_models.size(); c++)
		for (s=0; s<regional_models[c].size(); s++)
			for (r=0; r<regional_models[c][s].size(); r++)
				regional_models[c][s][r].convert_to_scores(q_rand);

}


void DiscretePeakModel::read_score_model(const char *name, bool silent_ind)
{
	char buff[128];
	string file_name = config.get_resource_dir() + "/" + string(name) + "_break_score.txt";
	ifstream is(file_name.c_str());
	if (! is.good() || ! is.is_open())
	{
		cout << "Error: couldn't open break score model for reading: " << file_name << endl;
		exit(1);
	}

	read_model_specifics(is);
	
	is.getline(buff,128);

	if (sscanf(buff,"#MAX_CHARGE %d",&max_score_model_charge) != 1)
	{
		cout << "Error: bad line in score model: " << buff << endl;
		exit(1);
	}

	this->config.set_max_charge_for_size(max_score_model_charge);

	regional_models.resize(max_score_model_charge+3);

	int charge = 0;
	while (charge<max_score_model_charge)
	{
		int num_sizes,size_idx;
		is.getline(buff,128);
		if (sscanf(buff,"%d %d",&charge,&num_sizes) != 2)
		{
			cout << "Error: bad line in score model: " << buff << endl;
			exit(1);
		}

		regional_models[charge].resize(num_sizes);
		for (size_idx=0; size_idx<num_sizes; size_idx++)
		{
			int num_regions, region_idx;
			is.getline(buff,128);
			if (sscanf(buff,"%d",&num_regions) != 1)
			{
				cout << "Error: bad line in score model: " << buff << endl;
				exit(1);
			}
			regional_models[charge][size_idx].resize(num_regions);
			for (region_idx=0; region_idx<num_regions; region_idx++)
			{
				regional_models[charge][size_idx][region_idx].read_regional_model(&config,is);
				int f;
				for (f=0; f<regional_models[charge][size_idx][region_idx].single_breakage_tables.size(); f++)
				{
					regional_models[charge][size_idx][region_idx].single_breakage_tables[f].set_model_tolerance(config.get_tolerance());
					regional_models[charge][size_idx][region_idx].single_breakage_tables[f].config=&config;
				}
			}
		}
	}

	// copy models for other charges. Prefer charge 2 model
	int min_model_idx = -1;
	int max_model_idx = -1;

	if (max_score_model_charge<4)
		max_score_model_charge=4;

	int c;
	for (c=0; c<=max_score_model_charge; c++)
		if (regional_models[c].size() >0)
		{
			min_model_idx=c;
			break;
		}
	
	for (c=max_score_model_charge; c>=0; c--)
		if (regional_models[c].size() >0)
		{
			max_model_idx=c;
			break;
		}

	if (min_model_idx<0)
	{
		cout << "Error: no score models were read!" << endl;
		exit(1);
	}

	bool has_charge_2 = (regional_models[2].size()>0);
	if (regional_models[1].size()==0)
	{
		if (has_charge_2)
		{
			regional_models[1]=regional_models[2];
		}
		else
			regional_models[1]=regional_models[min_model_idx];
	}

	for (c=2; c<=max_score_model_charge; c++)
	{
		if (regional_models[c].size() == 0)
			regional_models[c]=regional_models[max_model_idx];
	}

	// read edge models..
	edge_model.read_edge_models(&config,(char *)config.get_model_name().c_str());


	is.close();

}





// calcs the score of participating fragments that have istopic peak scores
void DiscretePeakModel::add_breakage_iso_score(Spectrum *spec, 
												Breakage *breakage) const
{
	if (! breakage)
		return;

	const vector<int>& strong_frags = config.get_regional_fragment_sets()
		[breakage->parent_charge][breakage->parent_size_idx][breakage->region_idx].get_strong_frag_type_idxs();
	

	int i;
	for (i=0; i<strong_frags.size(); i++)
	{
		int pos = breakage->get_position_of_frag_idx(strong_frags[i]);
		if (pos>=0)
		{
			float iso_level = spec->get_peak_iso_level(breakage->fragments[pos].peak_idx);
			if (iso_level>0)
				breakage->score -= (2 + iso_level * 3);
		}
	}
}

void DiscretePeakModel::print_all_table_names(ostream& os) const
{
	int i,j,k;

	for (i=0; i<regional_models.size(); i++)
		for (j=0; j<regional_models[i].size(); j++)
			for (k=0; k<regional_models[i][j].size(); k++)
				regional_models[i][j][k].print_table_names(&config,os);
}


// prints joint scores of two top  fragments
void DiscretePeakModel::print_joint_scores(ostream& os) const
{
	print_level_legend(os);
	RegionalPepNovoModel rpm = regional_models[2][0][1];

	int f1_idx = rpm.frag_type_idxs[0];
	int f2_idx = rpm.frag_type_idxs[1];

	os << "joint score for fragments " << config.get_fragment(f1_idx).label << " and " <<
		config.get_fragment(f2_idx).label << endl;

	vector< vector<score_t> > joint_scores;
	joint_scores.resize(num_peak_levels);
	int i;
	for (i=0; i<num_peak_levels; i++)
		joint_scores[i].resize(num_peak_levels,NEG_INF);

	for (i=0; i<num_peak_levels; i++)
	{
		int j;
		for (j=0; j<num_peak_levels; j++)
		{
			table_entry e;

			e[0]=i;
			e[1]=j;
			e[2]=0;
			e[3]=0;
			e[4]=0;
					  
			int idx1= rpm.single_breakage_tables[0].calc_table_idx(e);
			int idx1b= rpm.independent_frag_tables[0].calc_table_idx(e);

			e[0]=j;
			e[1]=i;

			int idx2= rpm.single_breakage_tables[1].calc_table_idx(e);
			int idx2b= rpm.independent_frag_tables[1].calc_table_idx(e);

			joint_scores[i][j]= rpm.single_breakage_tables[0].score_probs[idx1] +
								rpm.single_breakage_tables[1].score_probs[idx2];
			joint_scores[i][j]-= rpm.independent_frag_tables[0].score_probs[idx1b] +
								 rpm.independent_frag_tables[1].score_probs[idx2b];

		}
	}

	os << "   ";
	for (i=0; i<num_peak_levels; i++)
		os << setw(3) << right << i << "    ";
	os << endl << endl;

	for (i=0; i<num_peak_levels; i++)
	{
		int j;
		os << left << setw(3) << i;
		for (j=0; j<num_peak_levels; j++)
		{
			os << setw(6) << right << joint_scores[i][j] << " ";
		}
		os << endl << endl;
	}


}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -