📄 config.cpp
字号:
for (i=0; i<num_frags; i++)
{
FragmentType ft;
ft.read_fragment(is);
all_fragments.add_fragment_type(ft);
}
}
/************************************************************
reads the fragment sets from a stream
*************************************************************/
void Config::read_regional_fragment_sets(istream& is)
{
int i,num_frags=-1;
char buff[128];
is.getline(buff,128);
if (strncmp(buff,"#FRAGMENT SETS",12))
{
cout << "Error: bad line in fragments file: " << buff << endl;
if (strlen(buff)<4)
{
cout << "This error can possibly be due to Wnix/Windows issues. Try running dos2unix on all \".txt\" files in the Models directory." << endl;
}
exit(1);
}
regional_fragment_sets.clear();
while ( ! is.eof())
{
int charge,size_idx,region_idx,num_fragments;
is.getline(buff,128);
if (is.gcount()<2)
continue;
// some other parameter set is in this file, put back line and return
if (buff[0]=='#')
{
int i;
for (i=strlen(buff)-1; i>=0; i--)
is.putback(buff[i]);
return;
}
float rand_prob=-1;
if (sscanf(buff,"%d %d %d %d %f",&charge,&size_idx,®ion_idx,&num_fragments,
&rand_prob) != 5)
{
cout << "Error: bad line in fragments file: " << buff << endl;
if (strlen(buff)<4)
{
cout << "This error can possibly be due to Wnix/Windows issues. Try running dos2unix on all \".txt\" files in the Models directory." << endl;
}
exit(1);
}
// make sure fragment_sets vectors are large enough
if (regional_fragment_sets.size()<charge+1)
regional_fragment_sets.resize(charge+1);
if (regional_fragment_sets[charge].size()<size_idx+1)
regional_fragment_sets[charge].resize(size_idx+1);
if (regional_fragment_sets[charge][size_idx].size()<region_idx+1)
regional_fragment_sets[charge][size_idx].resize(region_idx+1);
RegionalFragments& rf = regional_fragment_sets[charge][size_idx][region_idx];
rf.set_rand_prob(rand_prob);
rf.frag_type_idxs.clear();
// read fragments
for (i=0; i<num_fragments; i++)
{
string label;
score_t prob = 0;
is.getline(buff,128);
istringstream iss(buff);
iss >> label >> prob;
const int frag_idx = this->get_frag_idx_from_label(label);
if (frag_idx<0)
{
cout << "Error: unrecognized fragment: " << label << endl;
exit(1);
}
all_fragments.fragments[frag_idx].prob += prob;
rf.frag_type_idxs.push_back(frag_idx);
rf.frag_probs.push_back(prob);
}
// read strong fragments
int i;
is.getline(buff,128);
if (strncmp(buff,"strong",6))
{
cout << "Error: expected to see list of strong frags! : " << buff << endl;
exit(1);
}
int num_strong=0;
istringstream iss(buff+7);
iss >> num_strong;
rf.strong_frag_type_idxs.clear();
for (i=0; i<num_strong; i++)
{
string label;
iss >> label;
int f_idx=get_frag_idx_from_label(label);
if (f_idx>=0)
{
rf.strong_frag_type_idxs.push_back(f_idx);
int j;
for (j=0; j<all_strong_fragment_type_idxs.size(); j++)
if (all_strong_fragment_type_idxs[j] == f_idx)
break;
if (j==all_strong_fragment_type_idxs.size())
all_strong_fragment_type_idxs.push_back(f_idx);
}
else
break;
}
// read combos
is.getline(buff,128);
if (strncmp(buff,"combos",6))
{
cout << "Error: expected to see list of combos! : " << buff << endl;
exit(1);
}
int num_combos;
istringstream iss2(buff+7);
iss2 >> num_combos;
rf.frag_type_combos.clear();
for (i=0; i<num_combos; i++)
{
FragmentCombo fc;
fc.read_combo(this,is);
int j;
int strong_frag_pos=-1;
for (j=0; j<fc.frag_inten_idxs.size(); j++)
{
int k;
for (k=0; k<rf.strong_frag_type_idxs.size(); k++)
{
if (fc.frag_inten_idxs[j] == rf.strong_frag_type_idxs[k])
{
strong_frag_pos = j;
break;
}
}
}
if (strong_frag_pos<0)
{
cout << "Error: combo must have at least one strong fragment type with intensity!" << endl;
exit(1);
}
// swap to make the strong frag first
if (strong_frag_pos>0)
{
int tmp;
tmp=fc.frag_inten_idxs[strong_frag_pos];
fc.frag_inten_idxs[strong_frag_pos] = fc.frag_inten_idxs[0];
fc.frag_inten_idxs[0] = tmp;
}
rf.frag_type_combos.push_back(fc);
}
}
// clone regional fragment sets if needed
if (regional_fragment_sets[2].size()>0)
{
if (regional_fragment_sets[1].size() == 0)
clone_regional_fragment_sets(2,1);
if (regional_fragment_sets.size()<=4)
regional_fragment_sets.resize(5);
if (regional_fragment_sets[3].size() == 0)
clone_regional_fragment_sets(2,3);
if (regional_fragment_sets[4].size() == 0)
clone_regional_fragment_sets(3,4);
}
}
void Config::clone_regional_fragment_sets(int source_charge, int target_charge)
{
if (regional_fragment_sets.size()<=target_charge)
regional_fragment_sets.resize(target_charge+1);
regional_fragment_sets[target_charge] = regional_fragment_sets[source_charge];
}
bool read_mass_type_line(const char* prefix, char *line, mass_t& val)
{
int len = strlen(prefix);
if (strncmp(prefix,line,len))
return false;
istringstream is(line+len);
is >> val;
return true;
}
bool read_score_type_line(const char* prefix, char *line, score_t& val)
{
int len = strlen(prefix);
if (strncmp(prefix,line,len))
return false;
istringstream is(line+len);
is >> val;
return true;
}
/**********************************************************************
// parses a line that is assumed to be from a config file
// all parameters are assumed to start with
// #CONF <PARAMETER_NAME> VALUES
***********************************************************************/
void Config::parse_config_parameter(char *current_line)
{
char buff[256];
int number;
// fragments file
if ( sscanf(current_line,"#CONF FRAGMENTS_FILE %s",buff) == 1)
{
string path = resource_dir + "/" + string(buff);
fstream fs(path.c_str(),ios::in);
if (! fs.good())
{
cout << "Error openening fragment sets: " << path << endl;
exit(1);
}
this->read_fragments(fs);
fragments_file=buff;
return;
}
// regional fragment sets
if ( sscanf(current_line,"#CONF REGIONAL_FRAGMENT_SETS_FILE %s",buff) == 1)
{
string path = resource_dir + "/" + string(buff);
fstream fs(path.c_str(),ios::in);
if (! fs.good())
{
cout << "Error openening fragment sets: " << buff << endl;
exit(1);
}
this->read_regional_fragment_sets(fs);
regional_fragment_sets_file=buff;
set_all_regional_fragment_relationships();
return;
}
// mass spec type
if ( sscanf(current_line,"#CONF MASS_SPEC_TYPE %d",&number) == 1)
{
mass_spec_type = number;
return;
}
// digest
if ( sscanf(current_line,"#CONF DIGEST_TYPE %d",&number) == 1)
{
set_digest_type(number);
return;
}
// resource dir
if ( sscanf(current_line,"#CONF RESOURCE_DIR %s",buff) == 1)
{
resource_dir = string(buff);
return;
}
if ( sscanf(current_line,"#CONF MAX_NUMBER_OF_PEAKS_PER_LOCAL_WINDOW %d",&number) == 1)
{
max_number_peaks_per_local_window = number;
return;
}
if ( sscanf(current_line,"#CONF NUMBER_OF_STRONG_PEAKS_PER_LOCAL_WINDOW %d",&number) == 1)
{
number_of_strong_peaks_per_local_window = number;
return;
}
if (read_mass_type_line("#CONF LOCAL_WINDOW_SIZE",current_line,local_window_size))
return;
if (read_mass_type_line("#CONF TOLERANCE",current_line,tolerance))
return;
if (read_mass_type_line("#CONF PM_TOLERANCE",current_line,pm_tolerance))
return;
if ( sscanf(current_line,"#CONF MAX_EDGE_LENGTH %d",&number) == 1)
{
max_edge_length = number;
return;
}
if (read_score_type_line("#CONF TERMINAL_SCORE",current_line,terminal_score))
return;
if (read_score_type_line("#CONF DIGEST_SCORE",current_line,digest_score))
return;
if (read_score_type_line("#CONF FORBIDDEN_PAIR_PENALTY",current_line,forbidden_pair_penalty))
return;
if (sscanf(current_line,"#CONF NEED_TO_CORRECT_PM %d",&number) == 1)
{
need_to_estimate_pm = number;
return;
}
if (!strncmp("#CONF SIZE_THRESHOLDS",current_line,21))
{
istringstream is(current_line+22);
int i,num_sizes;
is >> num_sizes;
size_thresholds.resize(num_sizes);
for (i=0; i<num_sizes; i++)
{
int j,size;
is >> size;
size_thresholds[i].resize(size);
for (j=0; j<size; j++)
is >> size_thresholds[i][j];
}
return;
}
if (!strncmp("#CONF REGION_THRESHOLDS",current_line,22))
{
istringstream is(current_line+23);
int i,num_sizes;
is >> num_sizes;
region_thresholds.resize(num_sizes);
for (i=0; i<num_sizes; i++)
is >> region_thresholds[i];
return;
}
if ( !strncmp(current_line,"#CONF MASS_EXCLUDE_RANGE",23))
{
istringstream is(current_line+24);
mass_t min_inp_range=-1.0;
mass_t max_inp_range=-2.0;
is >> min_inp_range >> max_inp_range;
if (max_inp_range<min_inp_range)
{
cout << "Error: paramter \"#CONF MASS_EXCLUDE_RANGE\" should be followed a pair of numbers: minimal mass range and maximal mass range to exclude!" << endl;
cout << "BAD Line: " << current_line << endl;
}
add_exclude_range(min_inp_range,max_inp_range);
return;
}
if (current_line[0] == '#' || strlen(current_line)<3)
return;
cout << "Warning: bad line in config file: " << current_line << endl;
}
void Config::read_config(const char* file_name)
{
config_file = file_name;
init_with_defaults();
string path = resource_dir + "/" + string(file_name);
fstream fs(path.c_str(),ios::in);
while (! fs.eof() && fs.good())
{
char buff[1024];
fs.getline(buff,1024);
if (fs.gcount()<4)
continue;
parse_config_parameter(buff);
}
}
void Config::write_config()
{
if (fragments_file.length()>0)
{
fragments_file = model_name + "_fragments.txt";
string path = resource_dir + "/" + fragments_file;
ofstream fs(path.c_str(),ios::out);
print_fragments(fs);
}
if (regional_fragment_sets_file.length()>0)
{
regional_fragment_sets_file = model_name + "_fragment_sets.txt";
string path = resource_dir + "/" + regional_fragment_sets_file;
ofstream rfs(path.c_str(),ios::out);
print_regional_fragment_sets(rfs);
}
ofstream os(config_file.c_str(), ios::out);
print_config_parameters(os);
}
/****************************************************************
calclates the masses of the different aa_combos
combos are sorted aa lists.
lazy version, hardcoded, more elegant to do recursive way
(without real recursion).
And also finds the maximum combo mass
Fills in the aa_edge_combos vector which holds all lengths together
*****************************************************************/
void Config::calc_aa_combo_masses()
{
const vector<int>& session_aas = get_session_aas();
const vector<mass_t>& aa2mass = get_aa2mass();
const int num_aas = session_aas.size();
const int last_aa = session_aas[num_aas-1];
vector< vector<AA_combo> > aa_combo_by_length; // first dim is edge length
aa_combo_by_length.resize(max_edge_length+1);
int i;
if (max_edge_length > MAX_EDGE_SIZE)
{
cout << "Error: code doesn't support edges larger than "<< MAX_EDGE_SIZE << endl;
exit(0);
}
if (max_edge_length>5)
{
cout << "Error: code doesn't support such large edges!"<< endl;
exit(0);
}
aa_combo_by_length[1].clear();
for (i=0; i<num_aas; i++)
{
const int aa1 = session_aas[i];
if (aa1 == Ile)
continue;
AA_combo ac;
ac.amino_acids[0]=aa1;
ac.num_aa=1;
ac.total_mass=aa2mass[aa1];
aa_combo_by_length[1].push_back(ac);
}
sort(aa_combo_by_length[1].begin(),aa_combo_by_length[1].end());
if (max_edge_length>=2)
{
aa_combo_by_length[2].clear();
for (i=0; i<num_aas; i++)
{
int j;
const int aa1 = session_aas[i];
const mass_t mass1 = aa2mass[aa1];
if (aa1 == Ile)
continue;
for (j=i; j<num_aas; j++)
{
const int aa2 = session_aas[j];
if (aa2 == Ile)
continue;
AA_combo ac;
ac.amino_acids[0]=aa1;
ac.amino_acids[1]=aa2;
ac.num_aa=2;
ac.total_mass+=mass1 + aa2mass[aa2];
aa_combo_by_length[2].push_back(ac);
}
}
sort(aa_combo_by_length[2].begin(),aa_combo_by_length[2].end());
}
if (max_edge_length>=3)
{
aa_combo_by_length[3].clear();
for (i=0; i<num_aas; i++)
{
int j;
const int aa1 = session_aas[i];
const mass_t mass1 = aa2mass[aa1];
if (aa1 == Ile)
continue;
for (j=i; j<num_aas; j++)
{
const int aa2 = session_aas[j];
if (aa2 == Ile)
continue;
const mass_t mass2 = mass1 + aa2mass[aa2];
int k;
for (k=j; k<num_aas; k++)
{
const int aa3 = session_aas[k];
if (aa3 == Ile)
continue;
AA_combo ac;
ac.amino_acids[0]=aa1;
ac.amino_acids[1]=aa2;
ac.amino_acids[2]=aa3;
ac.num_aa=3;
ac.total_mass+=mass2 + aa2mass[aa3];
aa_combo_by_length[3].push_back(ac);
}
}
}
sort(aa_combo_by_length[3].begin(),aa_combo_by_length[3].end());
}
if (max_edge_length>=4)
{
aa_combo_by_length[4].clear();
for (i=0; i<num_aas; i++)
{
int j;
const int aa1 = session_aas[i];
const mass_t mass1 = aa2mass[aa1];
if (aa1 == Ile)
continue;
for (j=i; j<num_aas; j++)
{
const int aa2 = session_aas[j];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -