⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 advancedscoremodel.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "AdvancedScoreModel.h"
#include "DeNovoRankScore.h"

void AdvancedScoreModel::read_score_model(const char *name, bool silent_ind)
{
	rank_models[0]=NULL;
	rank_models[1]=NULL;
	rank_models[2]=NULL;

	int i;
	for (i=0; i<10; i++)
		rank_tag_models[i]=NULL;

	// resize regional breakage score models according to regional fragment sets
	const vector< vector< vector< RegionalFragments > > >& all_rfs = config.get_regional_fragment_sets();

	int c;
	regional_breakage_score_models.resize(all_rfs.size());
	for (c=0; c<all_rfs.size(); c++)
	{
		regional_breakage_score_models[c].resize(all_rfs[c].size());
		int s;
		for (s=0; s<all_rfs[c].size(); s++)
		{
			regional_breakage_score_models[c][s].resize(all_rfs[c][s].size());
			int r;
			for (r=0; r<regional_breakage_score_models[c][s].size(); r++)
				if (! regional_breakage_score_models[c][s][r].was_initialized)
					regional_breakage_score_models[c][s][r].init(&config,c,s,r);
		}
	}
	
	int num_models_read=0;
	for (c=0; c<regional_breakage_score_models.size(); c++)
	{
		int s;
		for (s=0; s<regional_breakage_score_models[c].size(); s++)
		{
			int r;
			for (r=0; r<regional_breakage_score_models[c][s].size(); r++)
			{
				if (regional_breakage_score_models[c][s][r].read_regional_score_model(name,silent_ind))
				{
					if (! silent_ind)
						cout << "Read breakage model: " << c << " " << s << " " << r << endl;
					num_models_read++;
				}
			}
		}
	}

	if (! silent_ind)
		cout << "Read " << num_models_read << " regional breakage models.." << endl;

	// read rank models if they exist (same name as the score model, with suffix CDNV, PDNV and DB
	//read_rank_models(name,silent_ind);
}
	
void AdvancedScoreModel::write_score_model(const char *name) const
{
	int num_models_read=0;
	int c;
	for (c=0; c<regional_breakage_score_models.size(); c++)
	{
		int s;
		for (s=0; s<regional_breakage_score_models[c].size(); s++)
		{
			int r;
			for (r=0; r<regional_breakage_score_models[c][s].size(); r++)
			{
				if (regional_breakage_score_models[c][s][r].was_initialized)
					regional_breakage_score_models[c][s][r].write_regional_score_model(name);
				
			}
		}
	}
}


void AdvancedScoreModel::read_rank_models(const char *name, bool silent_ind)
{
	const string suffixes[]={"DB","DNVPART","DNVCOMP"};

	const int num_models = sizeof(rank_models)/sizeof(void *);

	int i;
	for (i=0; i<num_models; i++)
	{
		const string suffix = suffixes[i];
		DeNovoRankScorer *rank_model = (DeNovoRankScorer *)rank_models[i];
	
		string rank_name = string(name) + "_" + suffix;
		string path = config.get_resource_dir() + "/" + rank_name + "/" + suffix + "_rank_model.txt";

		ifstream fs(path.c_str());
		if (! fs.is_open() || ! fs.good())
		{
			if (! silent_ind)
				cout << "No " << path << endl;
			continue;
		}

		fs.close();
		rank_model = new DeNovoRankScorer;
		rank_model->set_type(i);
		rank_model->set_model(this);
		rank_model->read_denovo_rank_scorer_model(path.c_str(),suffix,silent_ind);
		rank_models[i] = (void *)rank_model;
	
		if (! silent_ind)
			cout << "Read " << path << endl;

	}

	// read tag models
	for (i=3; i<10; i++)
	{
		DeNovoRankScorer *rank_model = (DeNovoRankScorer *)rank_tag_models[i];
	
		
		ostringstream oss;
		oss << "TAG" << i;
		string suffix = oss.str();
		string rank_name = string(name) + "_" + suffix;
		string path = config.get_resource_dir() + "/" + rank_name + "/" + suffix + "_rank_model.txt";

		ifstream fs(path.c_str());
		if (! fs.is_open() || ! fs.good())
		{
			if (! silent_ind)
				cout << "No " << path << endl;
			continue;
		}

		fs.close();
		rank_model = new DeNovoRankScorer;
		rank_model->set_type(3);
		rank_model->set_model(this);
		rank_model->read_denovo_rank_scorer_model(path.c_str(),suffix,silent_ind);
		rank_tag_models[i] = (void *)rank_model;
	
		if (! silent_ind)
			cout << "Read " << path << endl;

	}

//	cout << endl;
}

// simple Dancik-style score
void AdvancedScoreModel::score_breakage(Spectrum *spec, Breakage *breakage, bool verbose) const
{
	const RegionalScoreModel& breakage_model = regional_breakage_score_models[breakage->parent_charge]
														  [breakage->parent_size_idx][breakage->region_idx];

	if (0 || breakage_model.has_all_breakage_models)
	{
	}
	else
	{
		score_t breakage_score=0;
		int i;
		for (i=0; i<breakage_model.frag_type_idxs.size(); i++)
		{
			const int frag_idx = breakage_model.frag_type_idxs[i];
			if (breakage->is_frag_type_visible(frag_idx))
			{
				if (breakage->get_position_of_frag_idx(frag_idx)>=0)
				{
					breakage_score += breakage_model.frag_inten_scores[frag_idx];
				}
				else
					breakage_score += breakage_model.frag_no_inten_scores[frag_idx];
			}
		}
		breakage->score = breakage_score;
	}
}
	


void AdvancedScoreModel::score_graph_edges(PrmGraph& prm) const
{
	edge_model.score_graph_edges(prm);
}



int AdvancedScoreModel::get_max_score_model_charge() const
{
	return regional_breakage_score_models.size();
}


void AdvancedScoreModel::train_score_model(const char *name, const FileManager& fm, 
						int charge, int size_idx, int region_idx)
{
	// resize regional breakage score models according to regional fragment sets
	const vector< vector< vector< RegionalFragments > > >& all_rfs = config.get_regional_fragment_sets();

	int c;
	regional_breakage_score_models.resize(all_rfs.size());
	for (c=0; c<all_rfs.size(); c++)
	{
		regional_breakage_score_models[c].resize(all_rfs[c].size());
		int s;
		for (s=0; s<all_rfs[c].size(); s++)
		{
			regional_breakage_score_models[c][s].resize(all_rfs[c][s].size());
			int r;
			for (r=0; r<regional_breakage_score_models[c][s].size(); r++)
				if (! regional_breakage_score_models[c][s][r].was_initialized)
					regional_breakage_score_models[c][s][r].init(&config,c,s,r);
		}
	}


	// train models
	for (c=0; c<regional_breakage_score_models.size(); c++)
	{
		if (regional_breakage_score_models.size() == 0 || (charge>0 && charge != c))
			continue;

		int s;
		for (s=0; s<regional_breakage_score_models[c].size(); s++)
		{
			if (size_idx>=0 && s != size_idx)
				continue;

			int r;
			for (r=0; r<regional_breakage_score_models[c][s].size(); r++)
			{
				if (region_idx>=0 && r != region_idx)
					continue;
				
				regional_breakage_score_models[c][s][r].train_regional_score_model(this,name,fm);
			}
		}
	}
}



/***********************************************************************
Creates a score table for each node that has an enry for each possible
entrance and exit combination of amino aicds.
************************************************************************/
void AdvancedScoreModel::score_all_node_combos(PrmGraph *prm) const
{
	const vector<int>& org_aas = config.get_org_aa();
	const vector<MultiEdge>& multi_edges = prm->get_multi_edges();
	const int num_nodes = prm->get_num_nodes();
	const mass_t mid_mass = prm->get_pm_with_19() * 0.5;
	int i;
	for (i=0; i<num_nodes; i++)
	{
		Node& node = prm->get_non_const_node(i);
		const RegionalScoreModel& score_model = 
			regional_breakage_score_models[prm->get_charge()][prm->get_size_idx()][node.breakage.region_idx];

		vector<BreakageInfo> infos;

		BreakageInfo double_gap_info;
		prm->fill_breakage_info(this,&double_gap_info,i,NEG_INF,NEG_INF,NEG_INF,NEG_INF);
		infos.push_back(double_gap_info);

		int j;
		for (j=0; j<node.in_edge_idxs.size(); j++)
		{
			const int in_edge_idx = node.in_edge_idxs[j];
			const MultiEdge& in_edge = multi_edges[in_edge_idx];
			const int num_in_variants = in_edge.get_num_variants();

			int k;
			for (k=0; k<num_in_variants; k++)
			{
				BreakageInfo c_gap_info;
				prm->fill_breakage_info(this,&c_gap_info,i,in_edge_idx,k,NEG_INF,NEG_INF);
				infos.push_back(c_gap_info);
			}
		}

		for (j=0; j<node.out_edge_idxs.size(); j++)
		{
			const int out_edge_idx = node.out_edge_idxs[j];
			const MultiEdge& out_edge = multi_edges[out_edge_idx];
			const int num_out_variants = out_edge.get_num_variants();

			int k;
			for (k=0; k<num_out_variants; k++)
			{
				BreakageInfo n_gap_info;
				prm->fill_breakage_info(this,&n_gap_info,i,NEG_INF,NEG_INF,out_edge_idx,k);
				infos.push_back(n_gap_info);
			}
		}
		
		for (j=0; j<node.in_edge_idxs.size(); j++)
		{
			const int in_edge_idx = node.in_edge_idxs[j];
			const MultiEdge& in_edge = multi_edges[in_edge_idx];
			const int num_in_variants = in_edge.get_num_variants();

			int k;
			for (k=0; k<num_in_variants; k++)
			{
				BreakageInfo c_gap_info;
				prm->fill_breakage_info(this,&c_gap_info,i,in_edge_idx,k,NEG_INF,NEG_INF);
				infos.push_back(c_gap_info);

				int a;
				for (a=0; a<node.out_edge_idxs.size(); a++)
				{
					const int out_edge_idx = node.out_edge_idxs[a];
					const MultiEdge& out_edge = multi_edges[out_edge_idx];
					const int num_out_variants = out_edge.get_num_variants();

					int b;
					for (b=0; b<num_out_variants; b++)
					{
						BreakageInfo info;
						prm->fill_breakage_info(this,&info,i,in_edge_idx,k,out_edge_idx,b);
						infos.push_back(info);
					}
				}
			}
		}

	
		node.score_combos.clear();

		for (j=0; j<infos.size(); j++)
			infos[j].score = score_model.score_a_single_breakage_combo(prm, node, &node.breakage,infos[j]);

		for (j=0; j<infos.size(); j++)
			node.score_combos[ScoreComboLoc(infos[j])]=infos[j].score;

		score_t max_score = NEG_INF;
	
		if (i>0 && i<num_nodes-1)
		{
			if (node.mass>mid_mass)
			{
				int j;
				for (j=0; j<infos.size(); j++)
					if (infos[j].n_aa >= Ala && infos[j].c_aa <= Gap && infos[j].score>max_score)
						max_score=infos[j].score;
			}
			else
			{
				int j;
				for (j=0; j<infos.size(); j++)
					if (infos[j].n_aa <= Gap && infos[j].c_aa >= Ala && infos[j].score>max_score)
						max_score=infos[j].score;
			}
		}
		else
			max_score = infos[0].score;

		node.score = max_score; 
		node.breakage.score = node.score;
	}
	prm->set_has_node_combo_scores(true);
}



void AdvancedScoreModel::score_peptide_node_combos(PrmGraph *prm, const Peptide& peptide ) const
{
	const vector<int>& org_aas		= config.get_org_aa();
	const vector<mass_t>& aa2mass	= config.get_aa2mass();
	const vector<MultiEdge>& multi_edges = prm->get_multi_edges();
	const int num_nodes		   = prm->get_num_nodes();
	const vector<int>& pep_aas = peptide.get_amino_acids();
	const int num_pep_aas = pep_aas.size();

	mass_t p_mass=0;
	int aa_idx=0;

	int i;
	for (i=0; i<num_nodes; i++)
	{
		Node& node = prm->get_non_const_node(i);
		const RegionalScoreModel& score_model = 
			regional_breakage_score_models[prm->get_charge()][prm->get_size_idx()][node.breakage.region_idx];

		int in_edge_idx=NEG_INF,  in_edge_variant=NEG_INF;
		int out_edge_idx=NEG_INF, out_edge_variant=NEG_INF;

	//	cout << "N: " << node.mass << endl;
		while (aa_idx<pep_aas.size() && fabs(p_mass-node.mass)>0.1)
		{
			p_mass += aa2mass[pep_aas[aa_idx]];
			aa_idx++;
	//		cout << aa_idx << "\t" << p_mass << endl;
		}
		
		if (aa_idx == pep_aas.size() && i != num_nodes-1)
		{
			int j;
			for (j=0; j<num_nodes; j++)
				cout << j << "\t" << prm->get_node(j).mass << endl;

			cout << endl << "PEP:" << endl;
			vector<mass_t> exp_masses;
			peptide.calc_expected_breakage_masses((Config *)&config,exp_masses);
			for (j=0; j<exp_masses.size(); j++)
				cout << j << "\t" << exp_masses[j] << endl;

			cout << "Error: mismatch between nodes and peptide!" << endl;
			exit(1);
		}

		if (node.in_edge_idxs.size()>0)
		{
			int j;
			for (j=0; j<node.in_edge_idxs.size(); j++)
			{
				const int edge_idx = node.in_edge_idxs[j];
				const MultiEdge& in_edge = multi_edges[edge_idx];
				const int num_aa = in_edge.num_aa;

				if (num_aa>aa_idx)
					continue;

				const int var_idx = in_edge.get_variant_idx(num_aa,&pep_aas[aa_idx-num_aa]);
				if (var_idx<0)
					continue;

				in_edge_idx = edge_idx;
				in_edge_variant = var_idx;
				break;
			}
		}

		if (node.out_edge_idxs.size()>0)
		{
			int j;
			for (j=0; j<node.out_edge_idxs.size(); j++)
			{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -