📄 rna_profile_alignment.cpp
字号:
color=g2_ink(device_id,0.75,0.75,0.75); g2_pen(device_id,color); g2_rectangle(device_id,center_x-box_size/2,center_y-box_size/2,center_x+box_size/2,center_y+box_size/2);}#endifinline double RNAProfileAlignment::getMlBaseFreq(const BaseProbs &bp) const{ return max(max(max(bp.a,bp.c),bp.g),bp.u);}char RNAProfileAlignment::getMlBase(const BaseProbs &bp) const{ double p = getMlBaseFreq(bp); if(bp.a==p) return ALPHA_BASE_A; if(bp.c==p) return ALPHA_BASE_C; if(bp.g==p) return ALPHA_BASE_G; if(bp.u==p) return ALPHA_BASE_U; assert(false); // you should never get so far ... return 'x'; }
void RNAProfileAlignment::getSeqAli(string &seq,Uint row, Uint i, Uint j) const
{
if(i==0 && j==getMaxLength(0))
seq="";
if(j==0)
return;
// basepair=internal node
if(isPair(i))
{
getSeqAli(seq,row,i+1,noc(i));
getSeqAli(seq,row,rb(i),j-1);
}
else
{
// base=leaf
seq+=m_lb[i].columnStr[row];
getSeqAli(seq,row,rb(i),j-1);
}
}void RNAProfileAlignment::getStructAli(string &s,Uint row) const
{
s=""; map<Uint,Uint> pairs; makePairTable(pairs, row); // iterate through leaves nodes and use information of pairs for(Uint i=0;i<m_size;i++) { RNA_Alphabet c; c=m_lb[i].columnStr[row]; if(isBase(i)) { if(c != '-') { if(pairs.find(i) != pairs.end()) // is base paired ? { Uint j=pairs[i]; if(i<j) s+='('; else s+=')'; } else s+='.'; } else s+='-'; } }
}
void RNAProfileAlignment::makePairTable(map<Uint,Uint> &pairs, Uint row) const{ pair<int,int> *baseIndex; // left and right base RNA_Alphabet c; assert(row<m_numStructures); baseIndex=new pair<int,int>[m_size]; // initialize pairs, all gaps for(int i=size()-1;i>=0;i--) { baseIndex[i].first=-1; baseIndex[i].second=-1; } for(int i=size()-1;i>=0;i--) { c=m_lb[i].columnStr[row]; if(isLeave(i)) { if(c!=ALPHA_GAP) { baseIndex[i].first=i; baseIndex[i].second=i; } } else { // internal node // leftmost and rightmost base bool lmBaseFound=false; for(size_type r=0,h=i+1;r<getMaxLength(i+1);r++,h=rb(h)) { // leftmost base if(!lmBaseFound && baseIndex[h].first != -1) { baseIndex[i].first=baseIndex[h].first; lmBaseFound=true; } // rightmost base if(baseIndex[h].second != -1) { baseIndex[i].second=baseIndex[h].second; } } // report pairing bases if P node if(c==ALPHA_BASEPAIR) { assert(baseIndex[i].first != -1); assert(baseIndex[i].second != -1); assert(baseIndex[i].first < baseIndex[i].second); pairs[baseIndex[i].first]=baseIndex[i].second; pairs[baseIndex[i].second]=baseIndex[i].first; } } } delete[] baseIndex; }void RNAProfileAlignment::filterConsensus(string &structure, deque<double> &pairprob, deque<BaseProbs> &baseprobs, double minFreq) const{ deque<double>::iterator itPP; // deque<BaseProbs>::iterator itBP; string::iterator it; for(itPP=pairprob.begin(),it=structure.begin();itPP!=pairprob.end();itPP++,it++) { if(*itPP<minFreq) { *itPP=0.0; *it='.'; } } /* for(it=structure.begin(),itPP=pairprob.begin(),itBP=baseprobs.begin();it!=structure.end();) { if(itBP->base<minFreq) { structure.erase(it); baseprobs.erase(itBP); pairprob.erase(itPP); it=structure.begin(); itPP=pairprob.begin(); itBP=baseprobs.begin(); } else { it++; itPP++; itBP++; } } */}/* ****************************************** *//* Public functions *//* ****************************************** */void RNAProfileAlignment::getSequenceAlignment(deque<BaseProbs> &baseprob) const{ BaseProbs bp; // generate base strings for(size_type i=0;i<size();i++) { if(isBase(i)) { bp.a=label(i).p[ALPHA_PRO_BASE_A]; bp.c=label(i).p[ALPHA_PRO_BASE_C]; bp.g=label(i).p[ALPHA_PRO_BASE_G]; bp.u=label(i).p[ALPHA_PRO_BASE_U]; bp.gap=label(i).p[ALPHA_PRO_GAP]; bp.base=bp.a+bp.c+bp.g+bp.u; baseprob.push_back(bp); } } }void RNAProfileAlignment::getStructureAlignment(double t,string &s, deque<double> &pairprob) const{ WATCH(DBG_GET_PROFILE_STRUCTURE,"Profile_RNA_Alignment_Forest::getStructureAlignment",size()); s=""; getStructureAlignmentFromCSF(s,pairprob,t,0,getMaxLength(0));}#ifdef HAVE_LIBG2 // This features require the g2 libraryvoid RNAProfileAlignment::squigglePlot(const string &filename, SquigglePlotOptions &options) const{ const double base_fontsize=8; const Uint num_grey_colors=100; const double min_grey_color=1.0; string seq,structure; string base,structname; float *X,*Y,min_X=0,max_X=0,min_Y=0,max_Y=0; Uint i; short *pair_table; int id_PS,id; int ps_grey_colors[num_grey_colors]; int ps_color_red; int ps_color_black; double xpos,ypos; deque<double> pairprob; deque<BaseProbs> baseprobs; getStructureAlignment(options.minPairProb,structure,pairprob); getSequenceAlignment(baseprobs); // filterConsensus(structure,pairprob,baseprobs,0.5); //assert(baseprobs.size() == structure.size()); if(baseprobs.size() != structure.size()) cerr << "Error in resolving consensus structure!" << endl; X = new float[structure.size()]; Y = new float[structure.size()]; pair_table = make_pair_table(structure.c_str()); i = naview_xy_coordinates(pair_table, X, Y); if(i!=structure.size()) cerr << "strange things happening in squigglePlot ..." << endl; // calculate image dimesions for(i=0;i<structure.size();i++) { min_X=min(min_X,X[i]); max_X=max(max_X,X[i]); min_Y=min(min_Y,Y[i]); max_Y=max(max_Y,Y[i]); } // id_PS = g2_open_PS("ali.ps", g2_A4, g2_PS_port); id_PS = g2_open_EPSF((char*)filename.c_str()); id = g2_open_vd(); g2_attach(id,id_PS); // cout << "min_X: " << min_X <<",max_X: " << max_X << ",min_Y: " << min_Y << "max_Y: " << max_Y << endl; g2_set_coordinate_system(id_PS,595/2.0,842/2.0,0.5,0.5); g2_set_line_width(id,0.2); // set colors double intv=min_grey_color/(double)num_grey_colors; for(i=0;i<num_grey_colors;i++) { double grey_color=min_grey_color-i*intv; ps_grey_colors[i]=g2_ink(id_PS,grey_color,grey_color,grey_color); } ps_color_black=g2_ink(id_PS,0,0,0); if(options.greyColors) ps_color_red=g2_ink(id_PS,0,0,0); else ps_color_red=g2_ink(id_PS,1,0,0); // draw sequence g2_set_font_size(id,base_fontsize); for(i=0;i<structure.size();i++) { if(options.mostLikelySequence) { double p=getMlBaseFreq(baseprobs[i]); //base color if(p==1) g2_pen(id,ps_color_red); else g2_pen(id,ps_grey_colors[(int)floor(p*num_grey_colors-1)]); base=getMlBase(baseprobs[i]); xpos=X[i]-base.length()*base_fontsize/2.0; ypos=Y[i]-4; g2_string(id,xpos,ypos,(char*)base.c_str()); } else { drawBaseCircles(id_PS,baseprobs[i],X[i],Y[i]); } // connection to next base if(i<structure.size()-1) { if((1-baseprobs[i].gap)*(1-baseprobs[i+1].gap)==1) g2_pen(id,ps_color_red); else g2_pen(id,ps_grey_colors[(int)floor((1-baseprobs[i].gap)*(1-baseprobs[i+1].gap)*num_grey_colors-1)]); g2_line(id,X[i],Y[i],X[i+1],Y[i+1]); } } // draw pairings // !!! pair_table indexing begins at 1 !!! for(i=0;i<structure.size();i++) { if((unsigned short)pair_table[i+1]>i+1) { // pairs in both structures if(pairprob[i]==1) g2_pen(id,ps_color_red); else g2_pen(id,ps_grey_colors[(int)floor(pairprob[i]*num_grey_colors-1)]); g2_line(id,X[i],Y[i],X[pair_table[i+1]-1],Y[pair_table[i+1]-1]); } } g2_flush(id); g2_close(id); free(pair_table); DELETE(X); DELETE(Y);}#endifvoid RNAProfileAlignment::printSeqAli() const
{
Uint i,l;
deque<string> seqs;
string seq,info; // get alignment rows
for(i=0;i<m_numStructures;i++)
{ getSeqAli(seq,i,0,getMaxLength(0));
seqs.push_back(seq);
}
l=seq.length();
// sequence // if(hasSequence) // { // calculate info line for(i=0;i<l;i++) { bool equal=true; for(Uint r=1;r<m_numStructures;r++) { if((seqs[r])[i]!=(seqs[r-1])[i]) { equal=false; break; } } info += equal ? "*" : " "; } // print it
// sequences
for(i=0;i<l;i+=55)
{
for(Uint r=0;r<m_numStructures;r++)
cout << setw(20) << setfill(' ') << left << m_strNames[r].substr(0,20) << setw(5) << " " << seqs[r].substr(i,55) << endl;
cout << setw(25) << " " << info.substr(i,55) << endl; cout << endl; } // }}
void RNAProfileAlignment::printStrAli() const{ Uint i,l;
deque<string> strs;
string str,info; // get alignment rows
for(i=0;i<m_numStructures;i++)
{
getStructAli(str,i);
strs.push_back(str);
}
l=str.length();
for(i=0;i<l;i++) { bool equal=true; for(Uint r=1;r<m_numStructures;r++) { if((strs[r])[i]!=(strs[r-1])[i]) { equal=false; break; } } info += equal ? "*" : " "; } // structures
for(i=0;i<l;i+=55)
{
for(Uint r=0;r<m_numStructures;r++)
cout << setw(20) << setfill(' ') << left << m_strNames[r].substr(0,20) << setw(5) << " " << strs[r].substr(i,55) << endl;
cout << setw(25) << " " << info.substr(i,55) << endl; cout << endl; }
}void RNAProfileAlignment::printFastaAli(bool noStructure) const
{
string str,seq; // get alignment rows
for(Uint i=0;i<m_numStructures;i++)
{
getStructAli(str,i); getSeqAli(seq,i,0,getMaxLength(0));
cout << ">" << m_strNames[i] << endl; cout << seq << endl; if(!noStructure) cout << str << endl; }
}
void RNAProfileAlignment::printConsensus(double minPairProb) const{ const int mountain_high=10; int j; string seq,structure; deque<BaseProbs> baseprobs; deque<double> pairprob; getSequenceAlignment(baseprobs); getStructureAlignment(minPairProb,structure,pairprob); // build sequence deque<BaseProbs>::const_iterator it; for(it=baseprobs.begin();it!=baseprobs.end();it++) { seq += getMlBase(*it); }// assert(seq.size() == structure.size()); if(seq.size() != structure.size()) cerr << "Error in resolving consensus structure!" << endl; for(Uint i=0;i<seq.length();i+=55) { // show structure frequency mountain for(j=0;j<mountain_high;j++) { cout << setw(20) << setfill(' ') << " "; cout << setw(3) << right << 100 - (100 / mountain_high) * j << "% "; for(int k=0;k<55;k++) { if(i+k<seq.length()) { if(getMlBaseFreq(baseprobs[i+k])>=1-(1/(double)mountain_high)*(double)j) cout << "*"; else cout << " "; } } cout << endl; } cout << setw(25) << " " << seq.substr(i,55) << endl; cout << setw(25) << " " << structure.substr(i,55) << endl; // show structure frequency mountain for(j=0;j<mountain_high;j++) { cout << setw(20) << setfill(' ') << " "; cout << setw(3) << right << (100 / mountain_high) * (j+1) << "% "; for(int k=0;k<55;k++) { if(i+k<seq.length()) { if(pairprob[i+k]>=(1/(double)mountain_high)*(double)j) cout << "*"; else cout << " "; } } cout << endl; } cout << endl; }}void RNAProfileAlignment::addStrNames(const deque<string>& strNames){ deque<string>::const_iterator it; for(it=strNames.begin();it!=strNames.end();it++) m_strNames.push_back(*it);}void RNAProfileAlignment::save(const string &filename){ ofstream s(filename.c_str()); // save the pforest to stream in binary format // first of all save the size s.write(reinterpret_cast<const char*>(&m_size),sizeof(size_type)); s.write(reinterpret_cast<const char*>(&m_numStructures),sizeof(Uint)); // save the arrays for(Uint i=0;i<m_size;i++) { for(int r=0;r<RNA_ALPHABET_SIZE;r++) s.write(reinterpret_cast<const char*>(&m_lb[i].p[r]),sizeof(double)); s.write(reinterpret_cast<const char*>(m_lb[i].columnStr.c_str()),sizeof(char)*m_numStructures); } // for(Uint i=0;i<m_size;i++) // { // s.write(reinterpret_cast<char*>(&m_lb[i]),sizeof(label_type)*m_size); // } // s.write(reinterpret_cast<char*>(m_lb),sizeof(label_type)*m_size); s.write(reinterpret_cast<const char*>(m_rb),sizeof(size_type)*m_size); s.write(reinterpret_cast<const char*>(m_noc),sizeof(size_type)*m_size); for(Uint r=0;r<m_numStructures;r++) { ostringstream ss; ss << setw(20) << setfill(' ') << left << m_strNames[r]; s.write(reinterpret_cast<const char*>(ss.str().c_str()),sizeof(char)*20); } // s.write(reinterpret_cast<char*>(m_sumUpCSF),sizeof(size_type)*m_size); // s.write(reinterpret_cast<char*>(m_rmb),sizeof(size_type)*m_size); // s.write(m_name;
//Uint m_numStructures; //Uint m_numStructuresX; //Uint m_numStructuresY; //deque<string> m_strNames; //bool hasSequence;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -