📄 homeomorphic.cpp
字号:
#include "Homeomorphic.h"
#include "AnnotatedSpectrum.h"
#include "DeNovoDP.h"
#include "auxfun.h"
void hgraph::create_graph(Config *_config, Peptide& p)
{
config = _config;
org_pep = p;
int i,j;
const int num_aa = p.get_num_aas();
const vector<int>& amino_acids = p.get_amino_acids();
const vector<int>& session_aas = config->get_session_aas();
const vector<string>& aa2label = config->get_aa2label();
const vector<mass_t>& aa2mass = config->get_aa2mass();
const mass_t peptide_mass = p.get_mass();
const mass_t p18_mass = peptide_mass + MASS_H2O;
const mass_t tolerance = config->get_tolerance();
vector<mass_t> break_masses;
p.calc_expected_breakage_masses(config,break_masses);
nodes.resize(2*num_aa+2);
nodes[0].mass=0;
nodes[0].type = 0;
nodes[0].idx =0;
nodes[1].mass = p18_mass;
nodes[1].type = 1;
nodes[1].idx = 0;
for (i=1; i<=num_aa; i++)
{
int n_idx=2*i;
nodes[n_idx].mass = nodes[n_idx-2].mass + aa2mass[amino_acids[i-1]];
nodes[n_idx].type = 0;
nodes[n_idx].idx = i;
n_idx++;
nodes[n_idx].mass = p18_mass - nodes[n_idx-1].mass;
nodes[n_idx].type = 1;
nodes[n_idx].idx = i;
}
sort(nodes.begin(),nodes.end());
// connect edges (single only)
i=0;
j=1;
while (i<nodes.size()-2)
{
int k=i+1;
while (nodes[k].type != 0)
k++;
hedge e;
e.n1=i;
e.n2=k;
nodes[i].add_edge(e);
i=k;
}
while (j<nodes.size()-2)
{
int k=j+1;
while (nodes[k].type != 1)
k++;
hedge e;
e.n1=j;
e.n2=k;
nodes[j].add_edge(e);
j=k;
}
// connect crossover edges
for (i=0; i<nodes.size()-2; i++)
{
int j;
for (j=0; j<session_aas.size(); j++)
{
const int aa= session_aas[j];
if (aa==Ile)
continue;
int k;
for (k=i+1; k<nodes.size(); k++)
{
if (nodes[i].type != nodes[k].type)
{
hedge e;
e.n1=i;
e.n2=k;
e.num_aa=1;
e.score=1;
e.is_crossover = (nodes[i].type != nodes[k].type);
e.aa[0] = aa;
nodes[i].add_edge(e);
break;
}
}
}
}
}
bool hgraph::has_two_paths()
{
const int num_aa = org_pep.get_num_aas();
const int goal_node_idx = nodes.size()-2;
vector<int> out_idxs, prev_idxs;
bool print = true;
// init different vectors
out_idxs.clear();
out_idxs.resize(nodes.size(),0);
prev_idxs.clear();
prev_idxs.resize(nodes.size(),-1);
// do a DFS
int p=0;
int count =0;
while (1)
{
while (out_idxs[p]<nodes[p].edges.size())
{
// take this edge
const hedge& e = nodes[p].edges[out_idxs[p]];
const int node_idx = nodes[e.n2].idx ;
const hnode& node = nodes[e.n2];
int old_p=p;
p= e.n2;
prev_idxs[p]=old_p;;
out_idxs[old_p]++;
if (p == goal_node_idx )
{
// check for forbidden
count++;
if (count>1)
return true;
break;
}
}
// back track
out_idxs[p]=0;
p=prev_idxs[p];
if (p<0)
break;
}
return false;
}
void hgraph::get_delta_path_nums(vector<int>& counts, int max_print)
{
const int num_aa = org_pep.get_num_aas();
const int goal_node_idx = nodes.size()-2;
vector<bool> used_idxs;
vector<int> out_idxs, prev_idxs;
// init different vectors
counts.clear();
counts.resize(num_aa*2,0);
used_idxs.resize(num_aa+1,false);
used_idxs[0]=true;
out_idxs.clear();
out_idxs.resize(nodes.size(),0);
prev_idxs.clear();
prev_idxs.resize(nodes.size(),-1);
// do a DFS
int score=0;
int p=0;
// int n;
// for (n=0; n<nodes.size(); n++)
// cout << n << " " << nodes[n].idx << (nodes[n].type==0? " " : "' ") <<
// nodes[n].edges.size() << endl;
while (1)
{
while (out_idxs[p]<nodes[p].edges.size())
{
// take this edge
const hedge& e = nodes[p].edges[out_idxs[p]];
const int node_idx = nodes[e.n2].idx ;
const hnode& node = nodes[e.n2];
if (used_idxs[node_idx]) // forbidden pair check
{
out_idxs[p]++;
continue;
}
int old_p=p;
p= e.n2;
score+= e.score;
prev_idxs[p]=old_p;
used_idxs[node_idx]=true;
out_idxs[old_p]++;
if (p == goal_node_idx )
{
// check for forbidden
int bin = num_aa-score;
counts[bin]++;
if (bin<=max_print)
{
int q=p;
vector<int> ns;
while (q>=0)
{
ns.push_back(q);
q=prev_idxs[q];
}
sort(ns.begin(),ns.end());
// count alternating
int type=0;
int alt_count =0;
int i;
for (i=0; i<ns.size(); i++)
{
if (nodes[ns[i]].type != type)
{
alt_count++;
type = nodes[ns[i]].type;
}
}
if (alt_count>4 && bin == 0)
{
cout << alt_count << " PEP: ";
for (q=0; q<ns.size(); q++)
{
cout << nodes[ns[q]].idx;
if (nodes[ns[q]].type == 1)
cout << "'";
cout << " ";
}
cout << endl;
}
}
break;
}
}
// back track
out_idxs[p]=0;
used_idxs[nodes[p].idx]=false;
p=prev_idxs[p];
if (p<0)
break;
score -= nodes[p].edges[out_idxs[p]-1].score;
}
}
void hgraph::print() const
{
int i;
int type;
for (type =0; type<=1; type++)
{
for (i=0; i<nodes.size(); i++)
{
if (nodes[i].type == type)
{
const vector<hedge>& edges = nodes[i].edges;
int j;
for (j=0; j<edges.size(); j++)
if (edges[j].is_crossover)
{
if (type == 0)
{
cout << nodes[edges[j].n1].idx << " -> " << nodes[edges[j].n2].idx << "' *" << endl;
}
else
cout << nodes[edges[j].n1].idx << "' ->" << nodes[edges[j].n2].idx << " *" <<endl;
}
/* else
{
if (nodes[edges[j].n1].type == 0)
{
cout << nodes[edges[j].n1].idx << " -> " << nodes[edges[j].n2].idx << endl;
}
else
{
cout << nodes[edges[j].n1].idx << "' -> " << nodes[edges[j].n2].idx << "'" << endl;
}
}*/
}
}
}
}
void hexp1(Config *config)
{
int i,size;
int total = 5000;
for (size =5; size<=20; size++)
{
int count =0;
for (i=0; i<total; i++)
{
Peptide p;
vector<int> counts;
p.generate_random_peptide(config,size);
hgraph hg;
hg.create_graph(config,p);
if (hg.has_two_paths())
{
count++;
hg.print();
cout << endl;
}
}
cout << size << " " << count << "/" << total << "=" << (double)count/(double)total << endl;
}
}
//-------------------------------------------
struct cross_jump {
cross_jump() : idx1(-1), idx2(-1), aa(-1), aa2(-1) {}
int idx1,idx2;
int aa,aa2;
};
struct node_mass {
bool operator< ( const node_mass& other) const
{
return mass<other.mass;
}
int source;
mass_t mass;
int idx;
};
int get_num_paths(Config *config, Peptide& peptide,
mass_t tolerance, bool check_forbidden, bool allow_double,
bool print)
{
const vector<int>& session_aas = config->get_session_aas();
const vector<string>& aa2label = config->get_aa2label();
const vector<mass_t>& aa2mass = config->get_aa2mass();
const mass_t peptide_mass = peptide.get_mass();
vector<mass_t> break_masses;
vector<mass_t> other_masses;
peptide.calc_expected_breakage_masses(config,break_masses);
int i;
for (i=0; i<break_masses.size(); i++)
{
other_masses.push_back(peptide_mass - break_masses[i]+18);
if (print)
cout << i<< " " << setw(8) << left <<break_masses[i] << " " << setw(8) << left << other_masses[i] << endl;
}
vector<cross_jump> over,back;
over.clear();
back.clear();
int length = break_masses.size();
for (i=0; i<break_masses.size(); i++)
{
int j;
for (j=0; j<other_masses.size(); j++)
{
int a;
for (a=0; a<session_aas.size(); a++)
{
const int& aa = session_aas[a];
if (aa == Ile || aa == Lys)
continue;
if (fabs(break_masses[i] + aa2mass[aa] - other_masses[j])<=tolerance)
{
cross_jump cj;
cj.aa = aa;
cj.idx1 = i;
cj.idx2 = j;
over.push_back(cj);
}
else
{
if (allow_double)
{
int b;
for (b=0; b<session_aas.size(); b++)
{
const int& bb = session_aas[b];
if (bb == Ile || bb == Lys)
continue;
if (! config->is_allowed_double_edge(aa,bb))
continue;
if (fabs(break_masses[i] + aa2mass[aa] + aa2mass[bb] - other_masses[j])<=tolerance)
{
cross_jump cj;
cj.aa = aa;
cj.aa2 = bb;
cj.idx1 = i;
cj.idx2 = j;
over.push_back(cj);
}
}
}
}
}
}
}
for (i=0; i<break_masses.size(); i++)
{
int j;
for (j=other_masses.size()-1; j>=0; j--)
{
int a;
for (a=0; a<session_aas.size(); a++)
{
const int& aa = session_aas[a];
if (aa == Ile || aa == Lys)
continue;
if (fabs(break_masses[i] - aa2mass[aa] - other_masses[j])<=tolerance)
{
cross_jump cj;
cj.aa = aa;
cj.idx1 = i;
cj.idx2 = j;
back.push_back(cj);
}
else
{
if (allow_double)
{
int b;
for (b=0; b<session_aas.size(); b++)
{
const int& bb = session_aas[b];
if (bb == Ile || bb == Lys)
continue;
if (fabs(break_masses[i] - aa2mass[aa] - aa2mass[bb] - other_masses[j])<=tolerance)
{
cross_jump cj;
cj.aa = aa;
cj.aa2 = bb;
cj.idx1 = i;
cj.idx2 = j;
back.push_back(cj);
}
}
}
}
}
}
}
// if (over.size()==0 || back.size()==0)
// return;
if (print && over.size()>0 && back.size() >0)
{
int i;
for (i=0; i<over.size(); i++)
{
string aa_label = aa2label[over[i].aa] ;
mass_t aa_mass = aa2mass[over[i].aa];
if (over[i].aa2>=0)
{
aa_label += aa2label[over[i].aa2];
aa_mass += aa2mass[over[i].aa2];
}
cout << break_masses[over[i].idx1] << " -> " << aa_label << " (" << aa_mass
<< ") " << " -> " << other_masses[over[i].idx2] << endl;
}
for (i=0; i<back.size(); i++)
{
string aa_label = aa2label[back[i].aa] ;
mass_t aa_mass = aa2mass[back[i].aa];
if (back[i].aa2>=0)
{
aa_label += aa2label[back[i].aa2];
aa_mass += aa2mass[back[i].aa2];
}
cout << break_masses[back[i].idx1] << " <- " << aa_label << " (" <<
aa_mass << ") " << " <- " << other_masses[back[i].idx2] << endl;
}
}
// create graph
vector<node_mass> nodes;
vector< vector<bool> > edges;
vector< int > break_idxs, other_idxs;
// fill in nodes
for (i=0; i<break_masses.size(); i++)
{
node_mass nm;
nm.idx=i;
nm.mass = break_masses[i];
nm.source=0;
nodes.push_back(nm);
}
for (i=0; i<other_masses.size(); i++)
{
node_mass nm;
nm.idx=i;
nm.mass = other_masses[i];
nm.source=1;
nodes.push_back(nm);
}
sort(nodes.begin(),nodes.end());
int num_nodes = nodes.size();
// fill in edges
edges.resize(nodes.size());
for (i=0; i<nodes.size(); i++)
edges[i].resize(nodes.size(),false);
// make translation vectors
break_idxs.resize(length,-1);
other_idxs.resize(length,-1);
for (i=0; i<nodes.size(); i++)
if (nodes[i].source == 0)
{
break_idxs[nodes[i].idx]=i;
}
else
other_idxs[nodes[i].idx]=i;
// fill in regular edges
for (i=0; i<length-1; i++)
{
edges[break_idxs[i]][break_idxs[i+1]]=true;
edges[other_idxs[i+1]][other_idxs[i]]=true;
}
// fill in crossover edges
for (i=0; i<over.size(); i++)
edges[break_idxs[over[i].idx1]][other_idxs[over[i].idx2]]=true;
for (i=0; i<back.size(); i++)
edges[other_idxs[back[i].idx2]][break_idxs[back[i].idx1]]=true;
// fill forbidden pairs
vector<int> forbidden_pairs;
forbidden_pairs.resize(nodes.size(),-1);
for (i=0; i<break_idxs.size(); i++)
{
int n1=break_idxs[i];
int n2=other_idxs[i];
forbidden_pairs[n1]=n2;
forbidden_pairs[n2]=n1;
}
// print graph
if (print)
{
for (i=0; i<nodes.size(); i++)
cout << i << " " << nodes[i].mass << " " << nodes[i].idx <<" (" << nodes[i].source << ")" << endl;
cout << endl;
cout << " ";
for (i=0; i<nodes.size(); i++)
cout << i % 10;
cout << endl;
for (i=0; i<nodes.size(); i++)
{
int j;
cout << setw(2) << left << i << " ";
for (j=0; j<nodes.size(); j++)
if (edges[i][j])
{
cout << "+";
}
else
cout << " ";
cout << endl;
}
cout << endl;
}
// find number of paths
int num_paths_found=0;
vector<bool> used;
vector<int> out_idxs, prev_idxs;
out_idxs.resize(num_nodes);
for (i=0; i<num_nodes; i++)
out_idxs[i]=i;
used.resize(num_nodes,false);
prev_idxs.resize(num_nodes,-1);
int p=0;
used[0]=true;
while (1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -