📄 config.cpp
字号:
if (aa2 == Ile)
continue;
const mass_t mass2 = mass1 + aa2mass[aa2];
int k;
for (k=j; k<num_aas; k++)
{
const int aa3 = session_aas[k];
if (aa3 == Ile)
continue;
const mass_t mass3 = mass2 + aa2mass[aa3];
int l;
for (l=k; l<num_aas; l++)
{
const int aa4 = session_aas[l];
if (aa4 == Ile)
continue;
AA_combo ac;
ac.amino_acids[0]=aa1;
ac.amino_acids[1]=aa2;
ac.amino_acids[2]=aa3;
ac.amino_acids[3]=aa4;
ac.num_aa=4;
ac.total_mass+=mass3 + aa2mass[aa4];
aa_combo_by_length[4].push_back(ac);
}
}
}
}
sort(aa_combo_by_length[4].begin(),aa_combo_by_length[4].end());
}
if (max_edge_length>=5)
{
aa_combo_by_length[5].clear();
for (i=0; i<num_aas; i++)
{
int j;
const int aa1 = session_aas[i];
const mass_t mass1 = aa2mass[aa1];
if (aa1 == Ile)
continue;
for (j=i; j<num_aas; j++)
{
const int aa2 = session_aas[j];
if (aa2 == Ile)
continue;
const mass_t mass2 = mass1 + aa2mass[aa2];
int k;
for (k=j; k<num_aas; k++)
{
const int aa3 = session_aas[k];
if (aa3 == Ile)
continue;
const mass_t mass3 = mass2 + aa2mass[aa3];
int l;
for (l=k; l<num_aas; l++)
{
const int aa4 = session_aas[l];
if (aa4 == Ile)
continue;
const mass_t mass4 = mass3 + aa2mass[aa4];
int p;
for (p=l; p<num_aas; p++)
{
const int aa5 = session_aas[p];
if (aa5 == Ile)
continue;
AA_combo ac;
ac.amino_acids[0]=aa1;
ac.amino_acids[1]=aa2;
ac.amino_acids[2]=aa3;
ac.amino_acids[3]=aa4;
ac.amino_acids[4]=aa5;
ac.num_aa=5;
ac.total_mass+=mass4 + aa2mass[aa5];
aa_combo_by_length[5].push_back(ac);
}
}
}
}
}
sort(aa_combo_by_length[5].begin(),aa_combo_by_length[5].end());
}
max_combo_mass = aa_combo_by_length[max_edge_length][aa_combo_by_length[max_edge_length].size()-1].total_mass + 1.0;
// fill in aa_edge_combos
aa_edge_combos.clear();
aa_edge_combos.reserve((int)(1.5 * aa_combo_by_length[max_edge_length].size()));
for (i=1; i<aa_combo_by_length.size(); i++)
{
int j;
for (j=0; j<aa_combo_by_length[i].size(); j++)
aa_edge_combos.push_back(aa_combo_by_length[i][j]);
}
sort(aa_edge_combos.begin(),aa_edge_combos.end());
// make edge variant vector and fill in the variant pointers
// and make combo_idxs_by_length vectors
combo_idxs_by_length.resize(max_edge_length+1);
for (i=0; i<=max_edge_length; i++)
combo_idxs_by_length[i].clear();
variant_vector.clear();
variant_vector.reserve(aa_edge_combos.size()*5);
for (i=0; i<aa_edge_combos.size(); i++)
{
AA_combo& combo = aa_edge_combos[i];
combo_idxs_by_length[combo.num_aa].push_back(i);
if (combo.num_aa == 1)
{
combo.num_variants = 1;
combo.variant_start_idx = variant_vector.size();
variant_vector.push_back(1);
variant_vector.push_back(combo.amino_acids[0]);
continue;
}
if (combo.num_aa == 2)
{
if (combo.amino_acids[0]==combo.amino_acids[1])
{
combo.num_variants = 1;
combo.variant_start_idx = variant_vector.size();
variant_vector.push_back(2);
variant_vector.push_back(combo.amino_acids[0]);
variant_vector.push_back(combo.amino_acids[1]);
continue;
}
else
{
combo.num_variants = 2;
combo.variant_start_idx = variant_vector.size();
variant_vector.push_back(2);
variant_vector.push_back(combo.amino_acids[0]);
variant_vector.push_back(combo.amino_acids[1]);
variant_vector.push_back(2);
variant_vector.push_back(combo.amino_acids[1]);
variant_vector.push_back(combo.amino_acids[0]);
continue;
}
}
// generate variant using permutation generating function
vector<int> org_perm;
vector< vector<int> > all_perms;
int i;
org_perm.clear();
for (i=0; i<combo.num_aa; i++)
org_perm.push_back(combo.amino_acids[i]);
generate_all_permutations(org_perm,all_perms);
combo.num_variants = all_perms.size();
combo.variant_start_idx = variant_vector.size();
for (i=0; i<all_perms.size(); i++)
{
int j;
variant_vector.push_back(combo.num_aa);
for (j=0; j<combo.num_aa; j++)
variant_vector.push_back(all_perms[i][j]);
}
continue;
}
// fill the combo_start_idxs
int last_combo_idx = aa_edge_combos.size()-1;
mass_t largest_combo_mass = aa_edge_combos[last_combo_idx].total_mass;
int last_mass_idx = (int)(largest_combo_mass + 1.0);
combo_start_idxs.resize(last_mass_idx+5,-1);
for (i=0; i<aa_edge_combos.size(); i++)
{
int mass_idx = (int)(aa_edge_combos[i].total_mass);
if (combo_start_idxs[mass_idx]<0)
combo_start_idxs[mass_idx]=i;
}
// fill in combo idxs for entries with -1
int previous=-1;
for (i=0; i<combo_start_idxs.size(); i++)
{
if (combo_start_idxs[i]<0)
{
combo_start_idxs[i]=previous;
}
else
previous = combo_start_idxs[i];
}
}
const int * Config::get_first_variant_ptr(int combo_idx) const
{
return &variant_vector[aa_edge_combos[combo_idx].variant_start_idx];
}
// returns a pointer and number of variants which have masses in the given mass ranges
int Config::get_ptrs_for_combos_in_mass_range(mass_t min_mass, mass_t max_mass,
int& num_combos) const
{
num_combos = 0;
int min_idx = (int)(min_mass);
if (min_idx>=combo_start_idxs.size())
return -1;
int combo_idx = combo_start_idxs[min_idx];
while (combo_idx<aa_edge_combos.size())
{
if (aa_edge_combos[combo_idx].total_mass>max_mass)
return -1;
if (aa_edge_combos[combo_idx].total_mass>=min_mass)
break;
combo_idx++;
}
if (combo_idx == aa_edge_combos.size())
return -1;
int combos_start_idx = combo_idx;
while (combo_idx<aa_edge_combos.size())
{
if (aa_edge_combos[combo_idx].total_mass>max_mass)
break;
combo_idx++;
}
num_combos = combo_idx-combos_start_idx;
return combos_start_idx;
}
// returns true if there is a combo that contains the ordered variant of the given aas
bool Config::combos_have_variant(const vector<int>& combos, int num_aa, int *var_aas) const
{
int c;
for (c=0; c<combos.size(); c++)
{
const AA_combo& combo = aa_edge_combos[combos[c]];
int pos = combo.variant_start_idx;
int v;
for (v=0; v<combo.num_variants; v++)
if (variant_vector[pos++]==num_aa)
{
int a;
for (a=0; a<num_aa; a++)
if (variant_vector[pos+a] != var_aas[a])
break;
if (a==num_aa)
return true;
pos += num_aa;
}
}
return false;
}
/**********************************************************
calculates the aa_variants vectors (for terminalss and
amino acids Ala-Val
***********************************************************/
void Config::set_aa_variants()
{
const vector<int>& org_aas = this->get_org_aa();
int a;
aa_variants.clear();
aa_variants.resize(Val+1);
aa_variants[N_TERM].push_back(N_TERM);
aa_variants[C_TERM].push_back(C_TERM);
aa_variants[Xle].push_back(Xle);
for (a=0; a<session_aas.size(); a++)
{
int aa = session_aas[a];
int org_aa=org_aas[aa];
aa_variants[org_aa].push_back(aa);
}
}
/**********************************************************
Fills the array of amino acid combinations that can be
double edeges. Based on Yingying Huang et al 2003
***********************************************************/
void Config::fill_allowed_double_edges(bool allow_all)
{
int i;
int max_aa = Val;
for (i=0; i<session_aas.size(); i++)
if (session_aas[i]>max_aa)
max_aa= session_aas[i];
int num_aa = max_aa+1;
allowed_double_edge.clear();
double_edge_with_same_mass_as_single.clear();
allowed_double_edge.resize(num_aa);
double_edge_with_same_mass_as_single.resize(num_aa);
for (i=0; i<num_aa; i++)
{
allowed_double_edge[i].resize(num_aa,allow_all);
double_edge_with_same_mass_as_single[i].resize(num_aa,false);
}
if (tolerance<0.05)
allow_all=true;
if (! allow_all)
{
for (i=Ala; i<=Val; i++)
{
allowed_double_edge[Pro][i]=true;
allowed_double_edge[Gly][i]=true;
allowed_double_edge[Ser][i]=true;
}
// allowed_double_edge[Gly][Ala]=false;
allowed_double_edge[Ser][Ser]=false;
allowed_double_edge[Ser][Tyr]=false;
allowed_double_edge[Ser][Pro]=false;
allowed_double_edge[Ser][His]=false;
allowed_double_edge[Ser][Gly]=false;
allowed_double_edge[Ser][Val]=false;
allowed_double_edge[Thr][Val]=true;
allowed_double_edge[Thr][Gln]=true;
allowed_double_edge[Thr][His]=true;
allowed_double_edge[Thr][Glu]=true;
allowed_double_edge[Thr][Asp]=true;
allowed_double_edge[Asn][Asp]=true;
allowed_double_edge[Asn][Glu]=true;
allowed_double_edge[Asn][His]=true;
allowed_double_edge[Asn][Ile]=true;
allowed_double_edge[Asn][Leu]=true;
allowed_double_edge[Asn][Met]=true;
allowed_double_edge[Asn][Asn]=true;
allowed_double_edge[Asn][Gln]=true;
allowed_double_edge[Asn][Val]=true;
allowed_double_edge[Asn][Tyr]=true;
allowed_double_edge[Tyr][Asp]=true;
allowed_double_edge[Tyr][Glu]=true;
allowed_double_edge[Tyr][Thr]=true;
for (i=Ala; i<=Val; i++)
{
allowed_double_edge[i][Lys]=true;
allowed_double_edge[i][Arg]=true;
}
allowed_double_edge[Phe][Glu]=true;
allowed_double_edge[Ala][Leu]=true;
allowed_double_edge[Leu][Ala]=true;
allowed_double_edge[Ala][Ala]=true;
allowed_double_edge[Trp][Glu]=true;
// add these double edges, give penalty if they are used as a double edge
allowed_double_edge[Ala][Gly]=true;
allowed_double_edge[Gly][Ala]=true;
allowed_double_edge[Gly][Gly]=true;
allowed_double_edge[Ala][Asp]=true;
allowed_double_edge[Asp][Ala]=true;
allowed_double_edge[Val][Ser]=true;
allowed_double_edge[Ser][Val]=true;
allowed_double_edge[Gly][Glu]=true;
allowed_double_edge[Glu][Gly]=true;
}
double_edge_with_same_mass_as_single[Ala][Gly]=true;
double_edge_with_same_mass_as_single[Gly][Ala]=true;
double_edge_with_same_mass_as_single[Gly][Gly]=true;
double_edge_with_same_mass_as_single[Ala][Asp]=true;
double_edge_with_same_mass_as_single[Asp][Ala]=true;
double_edge_with_same_mass_as_single[Val][Ser]=true;
double_edge_with_same_mass_as_single[Ser][Val]=true;
double_edge_with_same_mass_as_single[Gly][Glu]=true;
double_edge_with_same_mass_as_single[Glu][Gly]=true;
// add fix for PTMs
const vector<int>& org_aas = session_tables.get_org_aa();
for (i=0; i<session_aas.size(); i++)
{
int j;
for (j=0; j<session_aas.size(); j++)
{
int aa1 = session_aas[i];
int aa2 = session_aas[j];
if (org_aas[aa1]>=Ala && org_aas[aa2]>=Ala &&
allowed_double_edge[org_aas[aa1]][org_aas[aa2]])
allowed_double_edge[aa1][aa2]=true;
}
}
}
/************************************************************
Add new fragments if they do not appear in list
*************************************************************/
void Config::add_fragment_types(const FragmentTypeSet& fts)
{
int i;
for (i=0; i<fts.get_fragments().size(); i++)
{
all_fragments.add_fragment_type(fts.get_fragment(i));
}
}
void Config::print_config_parameters(ostream& os) const
{
int i;
if (fragments_file.length()>0)
os << "#CONF FRAGMENTS_FILE " << fragments_file << endl;
if (regional_fragment_sets_file.length()>0)
os << "#CONF REGIONAL_FRAGMENT_SETS_FILE " << regional_fragment_sets_file << endl;
if (aa_combo_file.length()>0)
os << "#CONF AA_COMBO_FILE " << aa_combo_file << endl;
// if (resource_dir.length()>0)
// os << "#CONF RESOURCE_DIR " << resource_dir << endl;
os << "#CONF MASS_SPEC_TYPE " << mass_spec_type << "(currently only option 1) " << endl;
os << "#CONF DIGEST_TYPE " << digest_type << " (" << NON_SPECIFIC_DIGEST << " - non specific, " <<
TRYPSIN_DIGEST << " - trypsin)" << endl;
os << "#CONF MAX_NUMBER_OF_PEAKS_PER_LOCAL_WINDOW " <<
max_number_peaks_per_local_window << endl;
os << "#CONF NUMBER_OF_STRONG_PEAKS_PER_LOCAL_WINDOW " <<
number_of_strong_peaks_per_local_window << endl;
os << "#CONF LOCAL_WINDOW_SIZE " << setprecision(4) << local_window_size << endl;
os << "#CONF TOLERANCE " << setprecision(4) << tolerance << endl;
os << "#CONF PM_TOLERANCE " << setprecision(4) << pm_tolerance << endl;
os << "#CONF MAX_EDGE_LENGTH " << max_edge_length << endl;
os << "#CONF TERMINAL_SCORE " << terminal_score << endl;
os << "#CONF DIGEST_SCORE " << digest_score << endl;
os << "#CONF FORBIDDEN_PAIR_PENALTY " << forbidden_pair_penalty << endl;
os << "#CONF NEED_TO_CORRECT_PM " << need_to_estimate_pm << " (0 - no, 1 - yes)" << endl;
os << "#CONF SIZE_THRESHOLDS " << size_thresholds.size() << " ";
for (i=0; i<size_thresholds.size(); i++)
{
int j;
os << size_thresholds[i].size() << " ";
for (j=0; j<size_thresholds[i].size(); j++)
os << fixed << setprecision(4) << size_thresholds[i][j] << " ";
}
os << endl;
os << "#CONF REGION_THRESHOLDS " << region_thresholds.size();
for (i=0; i<region_thresholds.size(); i++)
os << " " << setprecision(4) << region_thresholds[i];
os << endl;
for (i=0; i<min_ranges.size(); i++)
{
os << "#CONF MASS_EXCLUDE_RANGE " << setprecision(3) << min_ranges[i] << " " << max_ranges[i] << endl;
}
}
/************************************************************
outputs the selected aas for the different regions
*************************************************************/
void Config::print_table_aas(const ConversionTables& table,
const vector<int>& aas) const
{
int i;
cout << aas.size() << " amino acids:" << endl;
for (i=0; i<aas.size(); i++)
cout << setw(6) << left << aas[i] << setprecision(4) << right << fixed << setw(8)
<< table.get_aa2mass(aas[i]) << " " << left <<
table.get_aa2label(aas[i]) << endl;
}
void Config::print_session_aas() const
{
cout << endl << "AMINO ACIDS" << endl;
print_table_aas(session_tables,session_aas);
cout << "N_TERM " << session_tables.get_aa2mass(N_TERM) << endl;
cout << "C_TERM " << session_tables.get_aa2mass(C_TERM) << endl;
}
void Config::print_aa_variants() const
{
int i;
const vector<string>& aa2label = get_aa2label();
for (i=0; i<aa_edge_combos.size(); i++)
{
cout << left << i << " " << aa_edge_combos[i].num_variants << " , " <<
aa_edge_combos[i].variant_start_idx << " ";
int j;
int pos = aa_edge_combos[i].variant_start_idx;
cout << "(" << pos << ") ";
for (j=0; j<aa_edge_combos[i].num_variants; j++)
{
int k;
int num_aa = variant_vector[pos++];
cout << " ";
for (k=0; k<num_aa; k++)
cout << aa2label[variant_vector[pos++]];
}
cout << endl;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -