⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 model.cpp

📁 MS-Clustering is designed to rapidly cluster large MS/MS datasets. The program merges similar spectr
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "Model.h"
#include "FragmentSelection.h"


// reads a model and all relevant files
// the model files are assumed to be in the resource_dir
// all this model's files are assumed to have a name <model_name>_XXXXX.txt
// the main model file is <model_name>.txt
void Model::read_model(const char* name, bool silent_ind)
{
	char file[256];

	model_name = name;

	if (config.get_resource_dir().length()<2)
	{
		config.set_resource_dir("Models");
	}

	config.set_model_name(string(name));

	strcpy(file,config.get_resource_dir().c_str());
	strcat(file,"/");
	strcat(file,name); 
	strcat(file,".txt");   

	fstream fs(file,ios::in);
	if (! fs.good() )  
	{
		cout << "Error: couldn't open model file: " << file << endl;
		exit(1);
	}

	while (! fs.eof())
	{
		char buff[1024];
		fs.getline(buff,1024);
		if (fs.gcount()<4)
			continue;

		char arg[128];
		if (sscanf(buff,"#CONFIG_FILE %s",arg) == 1)
		{
			config.read_config(arg);
			config.set_model_name(string(model_name));
			continue;
		}

		if (! strncmp("#CONF",buff,5))
		{
			string path = config.get_resource_dir() + "/" + string(buff);
			config.parse_config_parameter((char *)path.c_str());
			continue;
		}

		if (sscanf(buff,"#BREAK_SCORE_MODEL %s",arg) ==1)
		{
			read_score_model(arg,silent_ind);
			continue;
		}

		if (sscanf(buff,"#EDGE_MODEL %s",arg) ==1)
		{
			edge_model.read_edge_models(&config,arg,silent_ind);
			continue;
		}

		if (sscanf(buff,"#SQS_MODEL %s",arg) == 1)
		{
			pmcsqs.read_sqs_models(&config,arg);
			continue;
		}
		
		if (sscanf(buff,"#PMCR_MODEL %s",arg) == 1)
		{
			pmcsqs.read_pmc_rank_models(&config,arg);
			continue;
		}

		if (sscanf(buff,"#COMP_ASSIGNER %s",arg) == 1)
		{
			comp_assigner.read_and_init_from_tables(&config,arg);
			continue;
		}
	}

	// check if some of the defaults need to be changed
	if (config.get_max_edge_length() != 2)
		config.calc_aa_combo_masses();

}



// writes a model and all relevant files
// the model files are assumed to be in the resource_dir
// all this model's files are assumed to have a name <model_name>_XXXXX.txt
// the main model file is <model_name>.txt
void Model::write_model()
{
	string model_file;

	model_file = config.get_resource_dir() + "/" + model_name + ".txt";

	fstream os(model_file.c_str(),ios::out);
	if ( ! os.good())
	{
		cout << "Error writing model to " << model_file << endl;
		exit(1);
	}


	string config_file = config.get_resource_dir() + "/" + model_name + "_config.txt";
	config.set_config_file(config_file);
	config.set_model_name(model_name);
	os << "#CONFIG_FILE " << model_name + "_config.txt" << endl;
	config.write_config();


	if (pmcsqs.get_ind_initialized_pmcr())
	{
		os << "#PMCR_MODEL " << model_name + "_PMCR.txt" << endl;
		string path = config.get_resource_dir() + "/" + model_name + "_PMCR.txt";
		pmcsqs.write_pmc_rank_models(path.c_str());
	}

	if (pmcsqs.get_ind_initialized_sqs())
	{
		os << "#SQS_MODEL " << model_name + "_SQS.txt" << endl;
		string path = config.get_resource_dir() + "/" + model_name + "_SQS.txt";
		pmcsqs.write_sqs_models(path.c_str());
	}

	if (comp_assigner.get_ind_was_initialized())
	{
		os << "#COMP_ASSIGNER " << comp_assigner.get_model_name() << endl;
	}

	os << "#BREAK_SCORE_MODEL " << model_name << endl;

	write_score_model(model_name.c_str());

	os << "#EDGE_MODEL " << model_name << endl;

	edge_model.write_edge_models(model_name.c_str());
}


/*************************************************************************
This function performs the entire training process of the model
Allows for training in stages, gives better output and checks that
previous stages are intialized
**************************************************************************/
void Model::train_model_in_stages(
			const char *name, 
			const FileManager& fm, 
			mass_t initial_tolerance, 
			int start_stage, 
			int end_stage,
			int specific_charge, 
			int specific_size, 
			int specific_region,
			char *neg_sqs_list)
{
	if (end_stage>1000)
		end_stage = 20;
	stages_intialized.resize(end_stage,false);
	
	model_name = name;
	config.set_model_name(string(name));

/*	int i;
	for (i=0; i<start_stage; i++)
		if (! stages_intialized[i])
		{
			cout << "Error: started training from stage " << start_stage << endl;
			cout << "However stage " << i << " was not intialized!" << endl;
			int j;

			cout << "Status:" << endl;
			for (j=0; j<end_stage; j++)
				cout << j << "\t" << stages_intialized[j] << endl;
			exit(1);
		}*/


	cout << endl << "STAGE 0: Partitioning according to size/charge " << endl;
	cout <<         "**********************************************" <<endl;
	if (start_stage>0)
	{
		cout << endl << "Already done." << endl;	
	}
	else
	{
		cout << endl;
		int charge;
		for (charge = fm.get_min_charge(); charge<= fm.get_max_charge(); charge++)
		{
			vector<mass_t> spectra_masses;
			FileSet fs;
			fs.select_all_files(fm);
			const vector<SingleSpectrumFile *>& all_ssf = fs.get_ssf_pointers();
			int i;
			for (i=0; i<all_ssf.size(); i++)
				if (all_ssf[i]->charge == charge)
					spectra_masses.push_back(all_ssf[i]->org_pm_with_19);

			config.set_size_thresholds_according_to_set_of_masses(charge,spectra_masses);
		}
	}
	cout << endl << "Using following thresholds:" << endl;
	config.print_size_thresholds();


	cout << endl << "STAGE 1: Select Fragment types" << endl;
	cout <<         "******************************" <<endl;
	if (start_stage>1)
	{
		cout << endl << "Already done." << endl;	
	}
	else
	{
		config.set_tolerances(initial_tolerance);
		cout << endl;
		select_fragments(name,fm,15,0.01);
		config.set_all_regional_fragment_relationships();
	}
	cout << endl << "Fragments being used:" << endl;
	config.print_all_fragments();

	cout << endl << "STAGE 2: calculating fragment and PM tolerances" << endl;
	cout <<         "***********************************************" <<endl;
	if (start_stage>2)
	{
		cout << endl << "Already done." << endl;
	}
	else
	{
		int c;
		for (c=0; c<config.get_max_charge_for_size(); c++)
			if (config.get_size_thresholds()[c].size()>0)
				config.select_strong_fragments(c,0.5,3);
		
		cout << "Calculating precursor mass tolerance..." << endl;
		mass_t pm_tol = calc_parent_mass_tolerance_distribution(this, fm, 0.95);

		cout << "Calculating fragment mass tolerance..." << endl;
		mass_t tol    = calc_tolerance_distribution(this, fm , initial_tolerance*1.2,0.96);

		config.set_pm_tolerance(pm_tol);

		if (pm_tol <0.000001)
		{
			pm_tol = tol;
		}

		if (pm_tol<tol)
		{
			config.set_tolerance(tol+pm_tol);
		}
		else
			config.set_tolerance(tol);
	}
	cout << endl << "PM tolerance " << fixed << setprecision(4) << config.get_pm_tolerance() << endl;
	cout << "Need to correct PM: " << config.get_need_to_estimate_pm() << endl;
	cout << "Fragment tolerance  " << config.get_tolerance() << endl;

	
//	config.print_all_regional_fragment_relationships();
	
	cout << endl << "STAGE 3: Train breakage score models" << endl;
	cout <<         "************************************" <<endl;
	cout << endl;
	if (start_stage>3)
	{
		cout << endl << "Already done." << endl;
	}
	else
	{
		if (specific_charge>0)
			cout << "+++ Only Specified model  " << specific_charge << " " << 
					specific_size << " " << specific_region << endl << endl;

		this->train_score_model(name,fm,specific_charge, specific_size, specific_region);	
	}

	if (end_stage<=3)
	{
		write_model();
		exit(0);
	}



	cout << endl << "STAGE 4: Train SQS models" << endl;
	cout <<         "*************************" << endl << endl;
	if (start_stage>4)
	{
		cout << endl << "Already done." << endl;
	}
	else
	{
		if (specific_charge>0)
			cout << "+++ Only specified charge " <<  specific_charge << endl << endl;

		vector< vector<float> > weights;
		int max_c = 4;
		if (fm.get_max_charge()+1>max_c)
			max_c = fm.get_max_charge()+1;
		weights.resize(max_c);
		
		int i;
		for (i=1; i<max_c; i++)
			weights[i].resize(3,0);
	
		weights[1][0] = 0.1; weights[1][1] = 0.1;  weights[1][2] = 0.4;
		weights[2][0] = 0.6; weights[2][1] = 0.75; weights[2][2] = 0.5;
		weights[3][0] = 0.3; weights[3][1] = 0.15; weights[3][2] = 0.1;
		for (i=4; i<max_c; i++)
			weights[i]=weights[3];

		train_sqs(fm,neg_sqs_list,specific_charge,&weights);
	}

	if (end_stage<=4)
	{
		write_model();
		exit(0);
	}

	cout << endl << "STAGE 5: Train PMCR models" << endl;
	cout <<         "**************************" << endl << endl;
	if (start_stage>5)
	{
		cout << endl << "Already done." << endl;
	}
	else
	{
		if (specific_charge>0)
			cout << "+++ Only specified charge " <<  specific_charge << endl << endl;


		train_pmc_rank_models(fm,specific_charge);
	} 


	if (end_stage<=5)
	{
		write_model();
		exit(0);
	}

	cout << endl << "STAGE 6: Train edge models" << endl;
	cout <<         "**************************" << endl << endl;
	if (start_stage>6)
	{
		cout << endl << "Already done." << endl;
	}
	else
	{
		if (specific_charge>0)
			cout << "+++ Only specified charge " <<  specific_charge << endl << endl;


		edge_model.train_all_edge_models(fm,this,specific_charge);
	} 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -