⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 config.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 3 页
字号:
	for (i=0; i<num_frags; i++)
	{
		FragmentType ft;
		ft.read_fragment(is);
		all_fragments.add_fragment_type(ft);
	}
}

/************************************************************
reads the fragment sets from a stream
*************************************************************/
void Config::read_regional_fragment_sets(istream& is)
{
	int i,num_frags=-1;
	char buff[128];
	is.getline(buff,128);

	
	if (strncmp(buff,"#FRAGMENT SETS",12))
	{
		cout << "Error: bad line in fragments file: " << buff << endl;
		if (strlen(buff)<4)
		{
			cout << "This error can possibly be due to Wnix/Windows issues. Try running dos2unix on all \".txt\" files in the Models directory." << endl;
		}
		exit(1);
	}

	regional_fragment_sets.clear();

	while ( ! is.eof())
	{
		int charge,size_idx,region_idx,num_fragments;
		is.getline(buff,128);
		if (is.gcount()<2)
			continue;

		// some other parameter set is in this file, put back line and return
		if (buff[0]=='#')
		{
			int i;
			for (i=strlen(buff)-1; i>=0; i--)
				is.putback(buff[i]);
			return;
		}

		float rand_prob=-1;
		if (sscanf(buff,"%d %d %d %d %f",&charge,&size_idx,&region_idx,&num_fragments,
			&rand_prob) != 5)
		{
			cout << "Error: bad line in fragments file: " << buff << endl;
			if (strlen(buff)<4)
			{
				cout << "This error can possibly be due to Wnix/Windows issues. Try running dos2unix on all \".txt\" files in the Models directory." << endl;
			}
			exit(1);
		}
		
		// make sure fragment_sets vectors are large enough
		if (regional_fragment_sets.size()<charge+1)
			regional_fragment_sets.resize(charge+1);
		if (regional_fragment_sets[charge].size()<size_idx+1)
			regional_fragment_sets[charge].resize(size_idx+1);
		if (regional_fragment_sets[charge][size_idx].size()<region_idx+1)
			regional_fragment_sets[charge][size_idx].resize(region_idx+1);
		
		RegionalFragments& rf = regional_fragment_sets[charge][size_idx][region_idx];

		rf.set_rand_prob(rand_prob);

		rf.frag_type_idxs.clear();

		// read fragments
		for (i=0; i<num_fragments; i++)
		{
			string  label;
			score_t prob = 0;
			is.getline(buff,128);
			istringstream iss(buff);

			iss >> label >> prob;
			
			const int frag_idx = this->get_frag_idx_from_label(label);
			if (frag_idx<0)
			{
				cout << "Error: unrecognized fragment: " << label << endl;
				exit(1);
			}

			all_fragments.fragments[frag_idx].prob += prob;

			rf.frag_type_idxs.push_back(frag_idx);
			rf.frag_probs.push_back(prob);
		}
		
		// read strong fragments
		int i;
		is.getline(buff,128);
		if (strncmp(buff,"strong",6))
		{
			cout << "Error: expected to see list of strong frags! : " << buff << endl;
			exit(1);
		}
		int num_strong=0;
		istringstream iss(buff+7);
		iss >> num_strong;
		rf.strong_frag_type_idxs.clear();
		for (i=0; i<num_strong; i++)
		{
			string label;
			iss >> label;
			int f_idx=get_frag_idx_from_label(label);
			if (f_idx>=0)
			{
				rf.strong_frag_type_idxs.push_back(f_idx);
				int j;
				for (j=0; j<all_strong_fragment_type_idxs.size(); j++)
					if (all_strong_fragment_type_idxs[j] == f_idx)
						break;
				if (j==all_strong_fragment_type_idxs.size())
					all_strong_fragment_type_idxs.push_back(f_idx);
			}
			else
				break;	
		}

		// read combos
		is.getline(buff,128);
		if (strncmp(buff,"combos",6))
		{
			cout << "Error: expected to see list of combos! : " << buff << endl;
			exit(1);
		}
		int num_combos;
		istringstream iss2(buff+7);
		iss2 >> num_combos;
		rf.frag_type_combos.clear();
		for (i=0; i<num_combos; i++)
		{
			FragmentCombo fc;
			fc.read_combo(this,is);
			int j;
			int  strong_frag_pos=-1;
			for (j=0; j<fc.frag_inten_idxs.size(); j++)
			{
				int k;
				for (k=0; k<rf.strong_frag_type_idxs.size(); k++)
				{
					if (fc.frag_inten_idxs[j] == rf.strong_frag_type_idxs[k])
					{
						strong_frag_pos = j;
						break;
					}
				}
			}

			if (strong_frag_pos<0)
			{
				cout << "Error: combo must have at least one strong fragment type with intensity!" << endl;
				exit(1);
			}

			// swap to make the strong frag first
			if (strong_frag_pos>0)
			{
				int tmp;
				tmp=fc.frag_inten_idxs[strong_frag_pos];
				fc.frag_inten_idxs[strong_frag_pos] = fc.frag_inten_idxs[0];
				fc.frag_inten_idxs[0] = tmp;
			}
			rf.frag_type_combos.push_back(fc);
		}
	}

	// clone regional fragment sets if needed
	if (regional_fragment_sets[2].size()>0)
	{
		if (regional_fragment_sets[1].size() == 0)
			clone_regional_fragment_sets(2,1);

		if (regional_fragment_sets.size()<=4)
			regional_fragment_sets.resize(5);

		if (regional_fragment_sets[3].size() == 0)
			clone_regional_fragment_sets(2,3);

		if (regional_fragment_sets[4].size() == 0)
			clone_regional_fragment_sets(3,4);
	}



}


void Config::clone_regional_fragment_sets(int source_charge, int target_charge)
{
	if (regional_fragment_sets.size()<=target_charge)
		regional_fragment_sets.resize(target_charge+1);

	regional_fragment_sets[target_charge] = regional_fragment_sets[source_charge];
}



bool read_mass_type_line(const char* prefix, char *line, mass_t& val)
{
	int len = strlen(prefix);
	if (strncmp(prefix,line,len))
		return false;
	istringstream is(line+len);
	is >> val;
	return true;
}

bool read_score_type_line(const char* prefix, char *line, score_t& val)
{
	int len = strlen(prefix);
	if (strncmp(prefix,line,len))
		return false;
	istringstream is(line+len);
	is >> val;
	return true;
}

/**********************************************************************
// parses a line that is assumed to be from a config file
// all parameters are assumed to start with
// #CONF <PARAMETER_NAME> VALUES
***********************************************************************/
void Config::parse_config_parameter(char *current_line)
{
	char buff[256];
	int number;

	// fragments file
	if ( sscanf(current_line,"#CONF FRAGMENTS_FILE %s",buff) == 1)
	{
		string path = resource_dir + "/" + string(buff);

		fstream fs(path.c_str(),ios::in);
		if (! fs.good())
		{
			cout << "Error openening fragment sets: " << path << endl;
			exit(1);
		}
		this->read_fragments(fs);
		fragments_file=buff;
		return;
	}

	// regional fragment sets
	if ( sscanf(current_line,"#CONF REGIONAL_FRAGMENT_SETS_FILE %s",buff) == 1)
	{
		string path = resource_dir + "/" + string(buff);

		fstream fs(path.c_str(),ios::in);
		if (! fs.good())
		{
			cout << "Error openening fragment sets: " << buff << endl;
			exit(1);
		}
		this->read_regional_fragment_sets(fs);
		regional_fragment_sets_file=buff;
		set_all_regional_fragment_relationships();
		return;
	}


	// mass spec type
	if ( sscanf(current_line,"#CONF MASS_SPEC_TYPE %d",&number) == 1)
	{
		mass_spec_type = number;
		return;
	}

	// digest
	if ( sscanf(current_line,"#CONF DIGEST_TYPE %d",&number) == 1)
	{
		set_digest_type(number);
		return;
	}


	// resource dir
	if ( sscanf(current_line,"#CONF RESOURCE_DIR %s",buff) == 1)
	{
		resource_dir = string(buff);
		return;
	}

	if ( sscanf(current_line,"#CONF MAX_NUMBER_OF_PEAKS_PER_LOCAL_WINDOW %d",&number) == 1)
	{
		max_number_peaks_per_local_window = number;
		return;
	}

	if ( sscanf(current_line,"#CONF NUMBER_OF_STRONG_PEAKS_PER_LOCAL_WINDOW %d",&number) == 1)
	{
		number_of_strong_peaks_per_local_window = number;
		return;
	}

	if (read_mass_type_line("#CONF LOCAL_WINDOW_SIZE",current_line,local_window_size))
		return;

	if (read_mass_type_line("#CONF TOLERANCE",current_line,tolerance))
		return;

	if (read_mass_type_line("#CONF PM_TOLERANCE",current_line,pm_tolerance))
		return;

	if ( sscanf(current_line,"#CONF MAX_EDGE_LENGTH %d",&number) == 1)
	{
		max_edge_length = number;
		return;
	}

	
	if (read_score_type_line("#CONF TERMINAL_SCORE",current_line,terminal_score))
		return;

	if (read_score_type_line("#CONF DIGEST_SCORE",current_line,digest_score))
		return;

	if (read_score_type_line("#CONF FORBIDDEN_PAIR_PENALTY",current_line,forbidden_pair_penalty))
		return;

	if (sscanf(current_line,"#CONF NEED_TO_CORRECT_PM %d",&number) == 1)
	{
		need_to_estimate_pm = number;
		return;
	}


	if (!strncmp("#CONF SIZE_THRESHOLDS",current_line,21))
	{
		istringstream is(current_line+22);
		int i,num_sizes;

		is >> num_sizes;
		size_thresholds.resize(num_sizes);
		for (i=0; i<num_sizes; i++)
		{
			int j,size;
			is >> size;
			size_thresholds[i].resize(size);
			for (j=0; j<size; j++)
				is >> size_thresholds[i][j];
		}
		return;
	}

	if (!strncmp("#CONF REGION_THRESHOLDS",current_line,22))
	{
		istringstream is(current_line+23);
		int i,num_sizes;

		is >> num_sizes;
		region_thresholds.resize(num_sizes); 
		for (i=0; i<num_sizes; i++)
			is >> region_thresholds[i];

		return;
	}

	if ( !strncmp(current_line,"#CONF MASS_EXCLUDE_RANGE",23))
	{
		istringstream is(current_line+24);
		mass_t min_inp_range=-1.0;
		mass_t max_inp_range=-2.0;

		is >> min_inp_range >> max_inp_range;
		if (max_inp_range<min_inp_range)
		{
			cout << "Error: paramter \"#CONF MASS_EXCLUDE_RANGE\" should be followed a pair of numbers: minimal mass range and maximal mass range to exclude!" << endl;
			cout << "BAD Line: " << current_line << endl;
		}

		add_exclude_range(min_inp_range,max_inp_range);
	
		return;
	}

	if (current_line[0] == '#' || strlen(current_line)<3)
		return;

	cout << "Warning: bad line in config file: " << current_line << endl;
}




void Config::read_config(const char* file_name)
{
	config_file = file_name;
	
	init_with_defaults();
	
	string path = resource_dir + "/" + string(file_name);

	fstream fs(path.c_str(),ios::in);

	while (! fs.eof() && fs.good())
	{
		char buff[1024];

		fs.getline(buff,1024);
		if (fs.gcount()<4)
			continue;

		parse_config_parameter(buff);
	}
}



void Config::write_config()
{

	if (fragments_file.length()>0)
	{
		fragments_file = model_name + "_fragments.txt";
		string path = resource_dir + "/" + fragments_file;
		ofstream fs(path.c_str(),ios::out);
		print_fragments(fs);
	}

	if (regional_fragment_sets_file.length()>0)
	{
		regional_fragment_sets_file = model_name + "_fragment_sets.txt";
		string path = resource_dir + "/" + regional_fragment_sets_file;
		ofstream rfs(path.c_str(),ios::out);
		print_regional_fragment_sets(rfs);
	}

	ofstream os(config_file.c_str(), ios::out);
	print_config_parameters(os);
}



/****************************************************************
	calclates the masses of the different aa_combos
	combos are sorted aa lists.
	lazy version, hardcoded, more elegant to do recursive way 
	(without real recursion).

  And also finds the maximum combo mass

  Fills in the aa_edge_combos vector which holds all lengths together
*****************************************************************/
void Config::calc_aa_combo_masses()
{
	const vector<int>& session_aas = get_session_aas();
	const vector<mass_t>& aa2mass = get_aa2mass();
	const int num_aas = session_aas.size();
	const int last_aa = session_aas[num_aas-1];


	vector< vector<AA_combo> > aa_combo_by_length; // first dim is edge length
	aa_combo_by_length.resize(max_edge_length+1);

	int i;

	if (max_edge_length > MAX_EDGE_SIZE)
	{
		cout << "Error: code doesn't support edges larger than "<< MAX_EDGE_SIZE << endl;
		exit(0);
	}

	if (max_edge_length>5)
	{
		cout << "Error: code doesn't support such large edges!"<< endl;
		exit(0);
	}

	aa_combo_by_length[1].clear();
	for (i=0; i<num_aas; i++)
	{
		const int aa1 = session_aas[i];
		if (aa1 == Ile)
			continue;

		AA_combo ac;
		ac.amino_acids[0]=aa1;
		ac.num_aa=1;
		ac.total_mass=aa2mass[aa1];
		
		aa_combo_by_length[1].push_back(ac);
	}
	sort(aa_combo_by_length[1].begin(),aa_combo_by_length[1].end());
	

	if (max_edge_length>=2)
	{
		aa_combo_by_length[2].clear();
		for (i=0; i<num_aas; i++)
		{
			int j;
			const int aa1 = session_aas[i];
			const mass_t mass1 = aa2mass[aa1];
			if (aa1 == Ile)
				continue;

			for (j=i; j<num_aas; j++)
			{
				const int aa2 = session_aas[j];
				if (aa2 == Ile)
					continue;
		
				AA_combo ac;
				ac.amino_acids[0]=aa1;
				ac.amino_acids[1]=aa2;
				ac.num_aa=2;
				ac.total_mass+=mass1 + aa2mass[aa2];
				
				aa_combo_by_length[2].push_back(ac);
			}
		}
		sort(aa_combo_by_length[2].begin(),aa_combo_by_length[2].end());
	}

	if (max_edge_length>=3)
	{
		aa_combo_by_length[3].clear();
		for (i=0; i<num_aas; i++)
		{
			int j;
			const int aa1 = session_aas[i];
			const mass_t mass1 = aa2mass[aa1];
			if (aa1 == Ile)
				continue;

			for (j=i; j<num_aas; j++)
			{
				const int aa2 = session_aas[j];
				if (aa2 == Ile)
					continue;

				const mass_t mass2 = mass1 + aa2mass[aa2];
				int k;

				for (k=j; k<num_aas; k++)
				{
					const int aa3 = session_aas[k];
					if (aa3 == Ile)
						continue;

					AA_combo ac;
					ac.amino_acids[0]=aa1;
					ac.amino_acids[1]=aa2;
					ac.amino_acids[2]=aa3;
					ac.num_aa=3;
					ac.total_mass+=mass2 + aa2mass[aa3];
					
					aa_combo_by_length[3].push_back(ac);
				}
			}
		}
		sort(aa_combo_by_length[3].begin(),aa_combo_by_length[3].end());
	}


	if (max_edge_length>=4)
	{
		aa_combo_by_length[4].clear();
		for (i=0; i<num_aas; i++)
		{
			int j;
			const int aa1 = session_aas[i];
			const mass_t mass1 = aa2mass[aa1];
			if (aa1 == Ile)
				continue;

			for (j=i; j<num_aas; j++)
			{
				const int aa2 = session_aas[j];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -