⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 denovopartmodel.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 5 页
字号:
		{
			int cut_idx;
			for (cut_idx=1; cut_idx<prediction_ranks.size(); cut_idx++)
				if (prediction_ranks[cut_idx]==pred_rank)
					break;
			
			if (cut_idx==prediction_ranks.size())
				break;

			const int obs_rank = inten_ranks[cut_idx];
			if (frag_intens[cut_idx]>0)
				score_offsets[pred_rank]=predicted_scores[cut_idx]-sorted_predicted_scores[obs_rank];

		//	cout << pred_rank << "\t" << cut_idx << "\t" << score_offsets[pred_rank] << endl;
		}

		for (i=0; i<10 && i<num_obs_frags; i++)
			rbs.add_real_feature(f_idx+i,score_offsets[i]);
	
		f_idx+=10;



		
	}


//	ppp.print_ranks_vs_intens(intens);

	vector<score_pair> pairs;
	if (num_ppp_frags>1)
	{
		const int frag1_idx = ppp_frag_type_idxs[0];
		const int frag2_idx = ppp_frag_type_idxs[1];
		int i;
		int num_comp_pairs=0;
		const int min_size = (intens[frag1_idx].size()>intens[frag2_idx].size() ? intens[frag2_idx].size() : 
									intens[frag1_idx].size());
		for (i=1; i<min_size; i++)
		{
			const float inten1 = intens[frag1_idx][i];
			const float inten2 = intens[frag2_idx][i];
			const int pred1 = ppp_prediction_ranks[0][i];
			const int pred2 = ppp_prediction_ranks[1][i];

			if (inten1>0 && inten2>0)
				num_comp_pairs++;

			if (inten1>=0 && inten2>=0 && pred1<POS_INF && pred2<POS_INF)
				pairs.push_back(score_pair(i,pred1+pred2));
		}

		rbs.add_real_feature(f_idx, (float)num_comp_pairs);

		sort(pairs.begin(),pairs.end());
		
		int counter=0;
		for (i=pairs.size()-1; i>=0; i--)
		{
			if (counter++==7)
				break;

			const int cut_idx = pairs[i].idx;
			int stat=0;
			if (intens[frag1_idx][cut_idx]>0)
				stat++;
			if (intens[frag2_idx][cut_idx]>0)
				stat++;

			rbs.add_real_feature(f_idx+counter, (float)stat);
		}
	}
	f_idx += 8;
}



void DeNovoPartitionModel::fill_prm_features(const PeptideSolution& sol, 
											 const SeqPath& path, 
											 int model_type, 
											 RankBoostSample& rbs) const
{
	int f_idx = prm_start_idx;

	const PrmGraph *prm = path.prm_ptr;
	const Config *config = prm->get_config();
	const vector<PathPos>& positions = path.positions;
	const int num_aas = path.get_num_aa();
	vector<int> amino_acids;
	path.get_amino_acids(amino_acids);
	if (! prm)
	{
		cout << "Error: SeqPath has not prm ptr!" << endl;
		exit(1);
	}

	int i;
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	mass_t pep_mass = 0;
	for (i=0; i<num_aas; i++)
		pep_mass += aa2mass[amino_acids[i]];

	const mass_t delta = (positions[positions.size()-1].mass - positions[0].mass) - pep_mass;

	if (fabs(delta)>20)
	{
		int i;
		for (i=0; i<positions.size(); i++)
		{
			cout << "Error: mismatch between peptide and Seqpath: " << i << "\t" << 
				positions[i].node_idx << " " << positions[i].mass << endl;
		}
		exit(0);
	}

	vector<float> breakage_scores;
	breakage_scores.clear();
	for (i=0; i<positions.size(); i++)
	{
		const int node_idx = positions[i].node_idx;
		if (node_idx>=0)
		{
			const Node& node = prm->get_node(node_idx);
			if (node.type != NODE_N_TERM && node.type != NODE_C_TERM)
				breakage_scores.push_back(positions[i].breakage->score);
		}
	}

	float min_consec_two_breaks = POS_INF;
	float min_consec_three_breaks = POS_INF;
	float max_consec_two_breaks = NEG_INF;
	float max_consec_three_breaks = NEG_INF;

	float total_breakage_score = NEG_INF;
	if (breakage_scores.size()>1)
	{
		total_breakage_score=0;
		for (i=0; i<breakage_scores.size()-1; i++)
		{
			const float sum_score = breakage_scores[i]+breakage_scores[i+1];
			if (sum_score<min_consec_two_breaks)
				min_consec_two_breaks = sum_score;
			if (sum_score>max_consec_two_breaks)
				max_consec_two_breaks = sum_score;

			if (i>0)
			{
				const float sum_three= sum_score + breakage_scores[i-1];
				if (sum_three<min_consec_three_breaks)
					min_consec_three_breaks = sum_three;
				if (sum_three>max_consec_three_breaks)
					max_consec_three_breaks = sum_three;
			}
			total_breakage_score+=breakage_scores[i];
		}
		total_breakage_score+=breakage_scores[i];
	}


		


	int aa_start=0;
	if (positions[0].node_idx==0 && positions[0].edge_idx<0)
		while (aa_start<positions.size() && positions[aa_start].node_idx<0)
			aa_start++;
	int aa_end = positions.size()-2;
	while (aa_end>aa_start && positions[aa_end].node_idx<0)
		aa_end--;

	int eff_num_aas = (aa_end-aa_start);
	if (sol.reaches_c_terminal)
		eff_num_aas++;

	if (eff_num_aas<6)
		eff_num_aas=6;

	const int num_breakage_scores = breakage_scores.size();
	const int delta_num_breakages = eff_num_aas-breakage_scores.size();

//	int num_missing_edges=0;
//	for (i=0; i<positions.size()-1; i++)
//		if (positions[i].node_idx>=0 && positions[i].edge_idx<0)
//			num_missing_edges++;

	sort(breakage_scores.begin(),breakage_scores.end());
	
	// add features

	int combo_idx=0;
	if (sol.reaches_n_terminal && ! sol.reaches_c_terminal)
	{
		combo_idx=1;
	}
	else if (! sol.reaches_n_terminal && sol.reaches_c_terminal)
	{
		combo_idx=2;
	}
	else if (! sol.reaches_n_terminal && ! sol.reaches_c_terminal)
		combo_idx = 3;

	const int idx_shift = ( (model_type == 1 || model_type == 3) ? combo_idx * 6 : 0);
	int sfidx = f_idx + idx_shift;

	rbs.add_real_feature(sfidx,   delta);

	if (total_breakage_score>NEG_INF)
		rbs.add_real_feature(sfidx+1, total_breakage_score);

	if (num_breakage_scores>0 && total_breakage_score>NEG_INF)
		rbs.add_real_feature(sfidx+2, total_breakage_score/num_breakage_scores);

	if (eff_num_aas>0 && total_breakage_score>NEG_INF)
		rbs.add_real_feature(sfidx+3, total_breakage_score/eff_num_aas);
	
	rbs.add_real_feature(sfidx+4, path.path_score);

	if (eff_num_aas>0)
		rbs.add_real_feature(sfidx+5, path.path_score/eff_num_aas);
	
	f_idx+= ((model_type == 1 || model_type == 3) ? 24 : 6);

	if (model_type == 1 || model_type == 3)
	{
		int path_rank = path.org_rank;
		if (model_type == 1 && path_rank > 200)
			path_rank = 200;
		if (model_type == 3 && path_rank> 75)
			path_rank = 75;

		rbs.add_real_feature(f_idx++, total_breakage_score);
		rbs.add_real_feature(f_idx++, path.path_score);
		rbs.add_real_feature(f_idx++, path_rank);
		rbs.add_real_feature(f_idx++, path.multi_path_score);

		if (path.delta_score>=0)
		{
			rbs.add_real_feature(f_idx,path.delta_score);
			if (path.delta_score<=1.5)
			{
				rbs.add_real_feature(f_idx+1,path_rank);
			}
			else if (path.delta_score<=7.5)
			{
				rbs.add_real_feature(f_idx+2,path_rank);
			}
			else if (path.delta_score<=15.0)
			{
				rbs.add_real_feature(f_idx+3,path_rank);
			}
			else
				rbs.add_real_feature(f_idx+4,path_rank);
				
		}
		f_idx+=5;	


		if (model_type == 3)
		{
			
			if (path.tag_percent_top_5>0)
				rbs.add_real_feature(f_idx,path.tag_percent_top_5);

			if (path.tag_percent_top_20>0)
				rbs.add_real_feature(f_idx+1,path.tag_percent_top_20);

			if (path.tag_percent_all>0)
				rbs.add_real_feature(f_idx+2,path.tag_percent_all);

			if (path.tag_percent_top_5>0)
			{
				rbs.add_real_feature(f_idx+3,path.org_rank);
			}
			else if (path.tag_percent_top_20>0)
			{
				rbs.add_real_feature(f_idx+4,path.org_rank);
			}
			else if (path.tag_percent_all>0)
			{
				rbs.add_real_feature(f_idx+5,path.org_rank);
			}

			if (path.multi_path_rank<POS_INF)
				rbs.add_real_feature(f_idx+5,path.multi_path_rank);
		
			f_idx+=7;
		}
	}
	
	rbs.add_real_feature(f_idx++,delta_num_breakages);
	rbs.add_real_feature(f_idx++,path.num_forbidden_nodes);
	rbs.add_real_feature(f_idx++,num_breakage_scores);
	if (num_breakage_scores>0)
		rbs.add_real_feature(f_idx,breakage_scores[0]);
	f_idx++;

	if (num_breakage_scores>2)
		rbs.add_real_feature(f_idx,breakage_scores[1]);
	f_idx++;

	if (num_breakage_scores>4)
		rbs.add_real_feature(f_idx,breakage_scores[2]);
	f_idx++;

	if (min_consec_three_breaks<POS_INF)
		rbs.add_real_feature(f_idx,min_consec_three_breaks);
	f_idx++;
	if (max_consec_three_breaks>NEG_INF)
		rbs.add_real_feature(f_idx,max_consec_three_breaks);
	f_idx++;

	if (min_consec_two_breaks<POS_INF)
		rbs.add_real_feature(f_idx,min_consec_two_breaks);
	f_idx++;
	if (max_consec_two_breaks>NEG_INF)
		rbs.add_real_feature(f_idx,max_consec_two_breaks);
	f_idx++;


	int num1=0,num2=0,num3=0,num4=0,num5=0;

	for (i=0; i<breakage_scores.size(); i++)
	{
		const float& score = breakage_scores[i];
		if (score<-10)
		{
			num1++;
		}
		else if (score<0)
		{
			num2++;
		}
		else if (score<8)
		{
			num3++;
		}
		else if (score<15)
		{
			num4++;
		}
		else
			num5++;
	}	

	rbs.add_real_feature(f_idx++,num1);
	rbs.add_real_feature(f_idx++,num2);
	rbs.add_real_feature(f_idx++,num3);
	rbs.add_real_feature(f_idx++,num4);
	rbs.add_real_feature(f_idx++,num5);
	if (num_breakage_scores>0)
	{
		rbs.add_real_feature(f_idx++,(float)num1/(float)num_breakage_scores);
		rbs.add_real_feature(f_idx++,(float)(num1+num2)/(float)num_breakage_scores);
		rbs.add_real_feature(f_idx++,(float)(num3+num4+num5)/(float)num_breakage_scores);
		rbs.add_real_feature(f_idx++,(float)(num4+num5)/(float)num_breakage_scores);
	}
	else
		f_idx+=4;


	if (sol.reaches_n_terminal && positions[0].edge_idx>=0 && num_breakage_scores>2)
	{
		int i=1;
		while (positions[i].node_idx<0)
			i++;
		rbs.add_real_feature(f_idx,positions[i].node_score);
	}
	f_idx++;

	if (sol.reaches_c_terminal && num_breakage_scores>2)
	{
		int i=positions.size()-2;
		while (positions[i].node_idx<0)
			i++;
		if (positions[i].edge_idx>=0)
		{
			rbs.add_real_feature(f_idx,positions[i].node_score);
		}
	}
	f_idx++;

	static vector<int> frag_idx_oris;
	if (frag_idx_oris.size() == 0)
	{
		const vector<FragmentType>& all_frags = config->get_all_fragments();
		frag_idx_oris.resize(all_frags.size(),NEG_INF);
		int i;
		for (i=0; i<all_frags.size(); i++)
			frag_idx_oris[i]=all_frags[i].orientation;
	}

	int num_dual_ori=0;
	int num_with1=0, num_with2=0, num_with_alot=0;
	vector<int> oris;
	for (i=0; i<positions.size(); i++)
	{
		const Breakage *breakage = positions[i].breakage;
		if (! breakage)
			continue;
		const int num_frags = breakage->fragments.size();
		if (num_frags ==1)
		{
			num_with1++;
		}
		else if (num_frags == 2)
		{
			num_with2++;
		}
		else if (num_frags > 5)
		{
			num_with_alot++;
		}

		int num_pre=0,num_suf=0;
		int j;
		for (j=0; j<breakage->fragments.size(); j++)
		{
			const int ori = frag_idx_oris[breakage->fragments[j].frag_type_idx];
			if (ori==PREFIX)
			{
				num_pre++;
			}
			else
				num_suf++;
		}
		if (num_pre == 0 || num_suf - num_pre > 5)
		{
			oris.push_back(SUFFIX);
		}
		else if (num_suf == 0 || num_pre - num_suf>5)
		{
			oris.push_back(PREFIX);
		}
		else
			oris.push_back(99);

		if (num_pre>0 && num_suf>0)
			num_dual_ori++;
	}

	int prev=99;
	int switches=0;
	for (i=0; i<oris.size(); i++)
	{
		if (oris[i] != 99)
		{
			if (prev != 99 && prev != oris[i])
				switches++;
			prev=oris[i];
		}
	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -