📄 model.cpp
字号:
#include "Model.h"
#include "FragmentSelection.h"
// reads a model and all relevant files
// the model files are assumed to be in the resource_dir
// all this model's files are assumed to have a name <model_name>_XXXXX.txt
// the main model file is <model_name>.txt
void Model::read_model(const char* name, bool silent_ind)
{
char file[256];
model_name = name;
if (config.get_resource_dir().length()<2)
{
config.set_resource_dir("Models");
}
config.set_model_name(string(name));
strcpy(file,config.get_resource_dir().c_str());
strcat(file,"/");
strcat(file,name);
strcat(file,".txt");
fstream fs(file,ios::in);
if (! fs.good() )
{
cout << "Error: couldn't open model file: " << file << endl;
exit(1);
}
while (! fs.eof())
{
char buff[1024];
fs.getline(buff,1024);
if (fs.gcount()<4)
continue;
char arg[128];
if (sscanf(buff,"#CONFIG_FILE %s",arg) == 1)
{
config.read_config(arg);
config.set_model_name(string(model_name));
continue;
}
if (! strncmp("#CONF",buff,5))
{
string path = config.get_resource_dir() + "/" + string(buff);
config.parse_config_parameter((char *)path.c_str());
continue;
}
if (sscanf(buff,"#BREAK_SCORE_MODEL %s",arg) ==1)
{
read_score_model(arg,silent_ind);
continue;
}
if (sscanf(buff,"#EDGE_MODEL %s",arg) ==1)
{
edge_model.read_edge_models(&config,arg,silent_ind);
continue;
}
if (sscanf(buff,"#SQS_MODEL %s",arg) == 1)
{
pmcsqs.read_sqs_models(&config,arg);
continue;
}
if (sscanf(buff,"#PMCR_MODEL %s",arg) == 1)
{
pmcsqs.read_pmc_rank_models(&config,arg);
continue;
}
if (sscanf(buff,"#COMP_ASSIGNER %s",arg) == 1)
{
comp_assigner.read_and_init_from_tables(&config,arg);
continue;
}
}
// check if some of the defaults need to be changed
if (config.get_max_edge_length() != 2)
config.calc_aa_combo_masses();
}
// writes a model and all relevant files
// the model files are assumed to be in the resource_dir
// all this model's files are assumed to have a name <model_name>_XXXXX.txt
// the main model file is <model_name>.txt
void Model::write_model()
{
string model_file;
model_file = config.get_resource_dir() + "/" + model_name + ".txt";
fstream os(model_file.c_str(),ios::out);
if ( ! os.good())
{
cout << "Error writing model to " << model_file << endl;
exit(1);
}
string config_file = config.get_resource_dir() + "/" + model_name + "_config.txt";
config.set_config_file(config_file);
config.set_model_name(model_name);
os << "#CONFIG_FILE " << model_name + "_config.txt" << endl;
config.write_config();
if (pmcsqs.get_ind_initialized_pmcr())
{
os << "#PMCR_MODEL " << model_name + "_PMCR.txt" << endl;
string path = config.get_resource_dir() + "/" + model_name + "_PMCR.txt";
pmcsqs.write_pmc_rank_models(path.c_str());
}
if (pmcsqs.get_ind_initialized_sqs())
{
os << "#SQS_MODEL " << model_name + "_SQS.txt" << endl;
string path = config.get_resource_dir() + "/" + model_name + "_SQS.txt";
pmcsqs.write_sqs_models(path.c_str());
}
if (comp_assigner.get_ind_was_initialized())
{
os << "#COMP_ASSIGNER " << comp_assigner.get_model_name() << endl;
}
os << "#BREAK_SCORE_MODEL " << model_name << endl;
write_score_model(model_name.c_str());
os << "#EDGE_MODEL " << model_name << endl;
edge_model.write_edge_models(model_name.c_str());
}
/*************************************************************************
This function performs the entire training process of the model
Allows for training in stages, gives better output and checks that
previous stages are intialized
**************************************************************************/
void Model::train_model_in_stages(
const char *name,
const FileManager& fm,
mass_t initial_tolerance,
int start_stage,
int end_stage,
int specific_charge,
int specific_size,
int specific_region,
char *neg_sqs_list)
{
if (end_stage>1000)
end_stage = 20;
stages_intialized.resize(end_stage,false);
model_name = name;
config.set_model_name(string(name));
/* int i;
for (i=0; i<start_stage; i++)
if (! stages_intialized[i])
{
cout << "Error: started training from stage " << start_stage << endl;
cout << "However stage " << i << " was not intialized!" << endl;
int j;
cout << "Status:" << endl;
for (j=0; j<end_stage; j++)
cout << j << "\t" << stages_intialized[j] << endl;
exit(1);
}*/
cout << endl << "STAGE 0: Partitioning according to size/charge " << endl;
cout << "**********************************************" <<endl;
if (start_stage>0)
{
cout << endl << "Already done." << endl;
}
else
{
cout << endl;
int charge;
for (charge = fm.get_min_charge(); charge<= fm.get_max_charge(); charge++)
{
vector<mass_t> spectra_masses;
FileSet fs;
fs.select_all_files(fm);
const vector<SingleSpectrumFile *>& all_ssf = fs.get_ssf_pointers();
int i;
for (i=0; i<all_ssf.size(); i++)
if (all_ssf[i]->charge == charge)
spectra_masses.push_back(all_ssf[i]->org_pm_with_19);
config.set_size_thresholds_according_to_set_of_masses(charge,spectra_masses);
}
}
cout << endl << "Using following thresholds:" << endl;
config.print_size_thresholds();
cout << endl << "STAGE 1: Select Fragment types" << endl;
cout << "******************************" <<endl;
if (start_stage>1)
{
cout << endl << "Already done." << endl;
}
else
{
config.set_tolerances(initial_tolerance);
cout << endl;
select_fragments(name,fm,15,0.01);
config.set_all_regional_fragment_relationships();
}
cout << endl << "Fragments being used:" << endl;
config.print_all_fragments();
cout << endl << "STAGE 2: calculating fragment and PM tolerances" << endl;
cout << "***********************************************" <<endl;
if (start_stage>2)
{
cout << endl << "Already done." << endl;
}
else
{
int c;
for (c=0; c<config.get_max_charge_for_size(); c++)
if (config.get_size_thresholds()[c].size()>0)
config.select_strong_fragments(c,0.5,3);
cout << "Calculating precursor mass tolerance..." << endl;
mass_t pm_tol = calc_parent_mass_tolerance_distribution(this, fm, 0.95);
cout << "Calculating fragment mass tolerance..." << endl;
mass_t tol = calc_tolerance_distribution(this, fm , initial_tolerance*1.2,0.96);
config.set_pm_tolerance(pm_tol);
if (pm_tol <0.000001)
{
pm_tol = tol;
}
if (pm_tol<tol)
{
config.set_tolerance(tol+pm_tol);
}
else
config.set_tolerance(tol);
}
cout << endl << "PM tolerance " << fixed << setprecision(4) << config.get_pm_tolerance() << endl;
cout << "Need to correct PM: " << config.get_need_to_estimate_pm() << endl;
cout << "Fragment tolerance " << config.get_tolerance() << endl;
// config.print_all_regional_fragment_relationships();
cout << endl << "STAGE 3: Train breakage score models" << endl;
cout << "************************************" <<endl;
cout << endl;
if (start_stage>3)
{
cout << endl << "Already done." << endl;
}
else
{
if (specific_charge>0)
cout << "+++ Only Specified model " << specific_charge << " " <<
specific_size << " " << specific_region << endl << endl;
this->train_score_model(name,fm,specific_charge, specific_size, specific_region);
}
if (end_stage<=3)
{
write_model();
exit(0);
}
cout << endl << "STAGE 4: Train SQS models" << endl;
cout << "*************************" << endl << endl;
if (start_stage>4)
{
cout << endl << "Already done." << endl;
}
else
{
if (specific_charge>0)
cout << "+++ Only specified charge " << specific_charge << endl << endl;
vector< vector<float> > weights;
int max_c = 4;
if (fm.get_max_charge()+1>max_c)
max_c = fm.get_max_charge()+1;
weights.resize(max_c);
int i;
for (i=1; i<max_c; i++)
weights[i].resize(3,0);
weights[1][0] = 0.1; weights[1][1] = 0.1; weights[1][2] = 0.4;
weights[2][0] = 0.6; weights[2][1] = 0.75; weights[2][2] = 0.5;
weights[3][0] = 0.3; weights[3][1] = 0.15; weights[3][2] = 0.1;
for (i=4; i<max_c; i++)
weights[i]=weights[3];
train_sqs(fm,neg_sqs_list,specific_charge,&weights);
}
if (end_stage<=4)
{
write_model();
exit(0);
}
cout << endl << "STAGE 5: Train PMCR models" << endl;
cout << "**************************" << endl << endl;
if (start_stage>5)
{
cout << endl << "Already done." << endl;
}
else
{
if (specific_charge>0)
cout << "+++ Only specified charge " << specific_charge << endl << endl;
train_pmc_rank_models(fm,specific_charge);
}
if (end_stage<=5)
{
write_model();
exit(0);
}
cout << endl << "STAGE 6: Train edge models" << endl;
cout << "**************************" << endl << endl;
if (start_stage>6)
{
cout << endl << "Already done." << endl;
}
else
{
if (specific_charge>0)
cout << "+++ Only specified charge " << specific_charge << endl << endl;
edge_model.train_all_edge_models(fm,this,specific_charge);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -