⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rna_profile_alignment.cpp

📁 ViennaRNA-1.6.1
💻 CPP
📖 第 1 页 / 共 2 页
字号:
  color=g2_ink(device_id,0.75,0.75,0.75);  g2_pen(device_id,color);  g2_rectangle(device_id,center_x-box_size/2,center_y-box_size/2,center_x+box_size/2,center_y+box_size/2);}#endifinline double RNAProfileAlignment::getMlBaseFreq(const BaseProbs &bp) const{  return max(max(max(bp.a,bp.c),bp.g),bp.u);}char RNAProfileAlignment::getMlBase(const BaseProbs &bp) const{  double p = getMlBaseFreq(bp);  if(bp.a==p)    return ALPHA_BASE_A;  if(bp.c==p)    return ALPHA_BASE_C;  if(bp.g==p)    return ALPHA_BASE_G;  if(bp.u==p)    return ALPHA_BASE_U;  assert(false); // you should never get so far ...  return 'x';  }
void RNAProfileAlignment::getSeqAli(string &seq,Uint row, Uint i, Uint j) const
{
	if(i==0 && j==getMaxLength(0))
		seq="";

	if(j==0)
		return;

	// basepair=internal node
	if(isPair(i))
	{
		getSeqAli(seq,row,i+1,noc(i));
		getSeqAli(seq,row,rb(i),j-1);		
	}
	else
	{
		// base=leaf
		seq+=m_lb[i].columnStr[row];
		getSeqAli(seq,row,rb(i),j-1);
	}
}void RNAProfileAlignment::getStructAli(string &s,Uint row) const
{
  s="";  map<Uint,Uint> pairs;  makePairTable(pairs, row);  // iterate through leaves nodes and use information of pairs  for(Uint i=0;i<m_size;i++)    {	  RNA_Alphabet c;	  	  c=m_lb[i].columnStr[row];      if(isBase(i))	  {		if(c != '-')		{			if(pairs.find(i) != pairs.end())	// is base paired ?			{				Uint j=pairs[i];				if(i<j)					s+='(';				else					s+=')';			}			else				s+='.';		}		else			s+='-';	  }    }
}
void RNAProfileAlignment::makePairTable(map<Uint,Uint> &pairs, Uint row) const{	pair<int,int> *baseIndex;		// left and right base	RNA_Alphabet c;	assert(row<m_numStructures);	baseIndex=new pair<int,int>[m_size];	// initialize pairs, all gaps	for(int i=size()-1;i>=0;i--)	  {	    baseIndex[i].first=-1;	    baseIndex[i].second=-1;	  }	for(int i=size()-1;i>=0;i--)	{	        c=m_lb[i].columnStr[row];		if(isLeave(i))		  {			if(c!=ALPHA_GAP)			{			    baseIndex[i].first=i;			    baseIndex[i].second=i;			}		  }		else		{			// internal node			// leftmost and rightmost base			bool lmBaseFound=false;			for(size_type r=0,h=i+1;r<getMaxLength(i+1);r++,h=rb(h))			{					  				// leftmost base				if(!lmBaseFound && baseIndex[h].first != -1)				{					baseIndex[i].first=baseIndex[h].first;					lmBaseFound=true;				}				// rightmost base				if(baseIndex[h].second != -1)				{					baseIndex[i].second=baseIndex[h].second;				}			}			// report pairing bases if P node			if(c==ALPHA_BASEPAIR)			{			        assert(baseIndex[i].first != -1);			        assert(baseIndex[i].second != -1);				assert(baseIndex[i].first < baseIndex[i].second);				pairs[baseIndex[i].first]=baseIndex[i].second;				pairs[baseIndex[i].second]=baseIndex[i].first;			}		}	}	delete[] baseIndex;       }void RNAProfileAlignment::filterConsensus(string &structure, deque<double> &pairprob, deque<BaseProbs> &baseprobs, double minFreq) const{	deque<double>::iterator itPP;	//	deque<BaseProbs>::iterator itBP;	string::iterator it;	for(itPP=pairprob.begin(),it=structure.begin();itPP!=pairprob.end();itPP++,it++)	{		if(*itPP<minFreq)		{			*itPP=0.0;			*it='.';		}	}	/*	for(it=structure.begin(),itPP=pairprob.begin(),itBP=baseprobs.begin();it!=structure.end();)	{		if(itBP->base<minFreq)		{							structure.erase(it);			baseprobs.erase(itBP);			pairprob.erase(itPP);			it=structure.begin();			itPP=pairprob.begin();			itBP=baseprobs.begin();					}		else		{			it++;			itPP++;			itBP++;		}	}	*/}/* ****************************************** *//*             Public functions               *//* ****************************************** */void RNAProfileAlignment::getSequenceAlignment(deque<BaseProbs> &baseprob) const{	BaseProbs bp;	// generate base strings	for(size_type i=0;i<size();i++)	{		if(isBase(i))		{				bp.a=label(i).p[ALPHA_PRO_BASE_A];			bp.c=label(i).p[ALPHA_PRO_BASE_C];			bp.g=label(i).p[ALPHA_PRO_BASE_G];			bp.u=label(i).p[ALPHA_PRO_BASE_U];			bp.gap=label(i).p[ALPHA_PRO_GAP];			bp.base=bp.a+bp.c+bp.g+bp.u;			baseprob.push_back(bp);		}	} }void RNAProfileAlignment::getStructureAlignment(double t,string &s, deque<double> &pairprob) const{	WATCH(DBG_GET_PROFILE_STRUCTURE,"Profile_RNA_Alignment_Forest::getStructureAlignment",size());	s="";	getStructureAlignmentFromCSF(s,pairprob,t,0,getMaxLength(0));}#ifdef HAVE_LIBG2  // This features require the g2 libraryvoid RNAProfileAlignment::squigglePlot(const string &filename, SquigglePlotOptions &options) const{	const double base_fontsize=8;	const Uint num_grey_colors=100;	const double min_grey_color=1.0;	string seq,structure;	string base,structname;	float *X,*Y,min_X=0,max_X=0,min_Y=0,max_Y=0;	Uint i;	short *pair_table;	int id_PS,id;	int ps_grey_colors[num_grey_colors];	int ps_color_red;	int ps_color_black;	double xpos,ypos;	deque<double> pairprob;	deque<BaseProbs> baseprobs;	getStructureAlignment(options.minPairProb,structure,pairprob);	getSequenceAlignment(baseprobs);	//  filterConsensus(structure,pairprob,baseprobs,0.5);	//assert(baseprobs.size() == structure.size());	if(baseprobs.size() != structure.size())		cerr <<  "Error in resolving consensus structure!" << endl;	X = new float[structure.size()];	Y = new float[structure.size()];	pair_table = make_pair_table(structure.c_str());	i = naview_xy_coordinates(pair_table, X, Y);	if(i!=structure.size())		cerr << "strange things happening in squigglePlot ..." << endl;	// calculate image dimesions	for(i=0;i<structure.size();i++)	{		min_X=min(min_X,X[i]);		max_X=max(max_X,X[i]);		min_Y=min(min_Y,Y[i]);		max_Y=max(max_Y,Y[i]);	}	//  id_PS  = g2_open_PS("ali.ps", g2_A4, g2_PS_port);	id_PS  = g2_open_EPSF((char*)filename.c_str());	id     = g2_open_vd();	g2_attach(id,id_PS);	//  cout << "min_X: " << min_X <<",max_X: " << max_X << ",min_Y: " << min_Y << "max_Y: " << max_Y << endl; 	g2_set_coordinate_system(id_PS,595/2.0,842/2.0,0.5,0.5);	g2_set_line_width(id,0.2);	// set colors	double intv=min_grey_color/(double)num_grey_colors;	for(i=0;i<num_grey_colors;i++)	{		double grey_color=min_grey_color-i*intv;		ps_grey_colors[i]=g2_ink(id_PS,grey_color,grey_color,grey_color);	}	ps_color_black=g2_ink(id_PS,0,0,0);	if(options.greyColors)		ps_color_red=g2_ink(id_PS,0,0,0);	else		ps_color_red=g2_ink(id_PS,1,0,0);	// draw sequence	g2_set_font_size(id,base_fontsize);	for(i=0;i<structure.size();i++)	{		if(options.mostLikelySequence)		{			double p=getMlBaseFreq(baseprobs[i]);			//base color			if(p==1)				g2_pen(id,ps_color_red);			else				g2_pen(id,ps_grey_colors[(int)floor(p*num_grey_colors-1)]);			base=getMlBase(baseprobs[i]);			xpos=X[i]-base.length()*base_fontsize/2.0;			ypos=Y[i]-4;			g2_string(id,xpos,ypos,(char*)base.c_str());     		}		else		{			drawBaseCircles(id_PS,baseprobs[i],X[i],Y[i]);		}		// connection to next base		if(i<structure.size()-1)		{			if((1-baseprobs[i].gap)*(1-baseprobs[i+1].gap)==1)				g2_pen(id,ps_color_red);			else				g2_pen(id,ps_grey_colors[(int)floor((1-baseprobs[i].gap)*(1-baseprobs[i+1].gap)*num_grey_colors-1)]);			g2_line(id,X[i],Y[i],X[i+1],Y[i+1]);		}	}	// draw pairings	// !!! pair_table indexing begins at 1 !!!	for(i=0;i<structure.size();i++)	{		if((unsigned short)pair_table[i+1]>i+1)		{	    	    			// pairs in both structures			if(pairprob[i]==1)				g2_pen(id,ps_color_red);			else				g2_pen(id,ps_grey_colors[(int)floor(pairprob[i]*num_grey_colors-1)]);	   	    			g2_line(id,X[i],Y[i],X[pair_table[i+1]-1],Y[pair_table[i+1]-1]);		}	}	g2_flush(id);	g2_close(id);	free(pair_table);	DELETE(X);	DELETE(Y);}#endifvoid RNAProfileAlignment::printSeqAli() const
{
	Uint i,l;
	deque<string> seqs;
	string seq,info;	// get alignment rows
	for(i=0;i<m_numStructures;i++)
	{	  		getSeqAli(seq,i,0,getMaxLength(0));
		seqs.push_back(seq);
	}
	l=seq.length();
	// sequence 	//	if(hasSequence)	//	{		// calculate info line		for(i=0;i<l;i++)		{				bool equal=true;							for(Uint r=1;r<m_numStructures;r++)			{				if((seqs[r])[i]!=(seqs[r-1])[i])				{				equal=false;				break;				}			}			info += equal ? "*" : " ";		  		}  		// print it
		// sequences
		for(i=0;i<l;i+=55)
		{
			for(Uint r=0;r<m_numStructures;r++)
				cout << setw(20) << setfill(' ') << left << m_strNames[r].substr(0,20) << setw(5) << " " << seqs[r].substr(i,55) << endl;
			cout << setw(25) << " " << info.substr(i,55) << endl;			cout << endl;		}		//	}}
void RNAProfileAlignment::printStrAli() const{        Uint i,l;
	deque<string> strs;
	string str,info;	// get alignment rows
	for(i=0;i<m_numStructures;i++)
	{
		getStructAli(str,i);
		strs.push_back(str);
	}
	l=str.length();
	for(i=0;i<l;i++)	  {	    bool equal=true;	    	    for(Uint r=1;r<m_numStructures;r++)	      {		if((strs[r])[i]!=(strs[r-1])[i])		  {		    equal=false;		    break;		  }			}	    	    info += equal ? "*" : " ";		  	  }  		// structures
	for(i=0;i<l;i+=55)
	{
		for(Uint r=0;r<m_numStructures;r++)
			cout << setw(20) << setfill(' ') << left << m_strNames[r].substr(0,20) << setw(5) << " " << strs[r].substr(i,55) << endl;
		cout << setw(25) << " " <<  info.substr(i,55) << endl;		cout << endl;	}
   }void RNAProfileAlignment::printFastaAli(bool noStructure) const
{
	string str,seq;	// get alignment rows
	for(Uint i=0;i<m_numStructures;i++)
	{
		getStructAli(str,i);	  		getSeqAli(seq,i,0,getMaxLength(0));
		cout << ">" << m_strNames[i] << endl;		cout << seq << endl;		if(!noStructure)		  cout << str << endl;	}
}
void RNAProfileAlignment::printConsensus(double minPairProb) const{	const int mountain_high=10;	int j;	string seq,structure;	deque<BaseProbs> baseprobs;	deque<double> pairprob;	getSequenceAlignment(baseprobs);	getStructureAlignment(minPairProb,structure,pairprob);	// build sequence	deque<BaseProbs>::const_iterator it;	for(it=baseprobs.begin();it!=baseprobs.end();it++)	{		seq += getMlBase(*it);	}//	assert(seq.size() == structure.size());	if(seq.size() != structure.size())		cerr <<  "Error in resolving consensus structure!" << endl;	for(Uint i=0;i<seq.length();i+=55)	{		// show structure frequency mountain		for(j=0;j<mountain_high;j++)		{			cout << setw(20) << setfill(' ') << " ";			cout << setw(3) << right << 100 - (100 / mountain_high) * j << "% ";			for(int k=0;k<55;k++)			{				if(i+k<seq.length())				{					if(getMlBaseFreq(baseprobs[i+k])>=1-(1/(double)mountain_high)*(double)j)						cout << "*";					else						cout << " ";				}			}			cout << endl;		}		cout << setw(25) << " " << seq.substr(i,55) << endl;		cout << setw(25) << " " << structure.substr(i,55) << endl;		// show structure frequency mountain		for(j=0;j<mountain_high;j++)		{			cout << setw(20) << setfill(' ') << " ";			cout << setw(3) << right << (100 / mountain_high) * (j+1) << "% ";			for(int k=0;k<55;k++)			{						if(i+k<seq.length())				{					if(pairprob[i+k]>=(1/(double)mountain_high)*(double)j)						cout << "*";					else						cout << " ";				}			}			cout << endl;		}		cout << endl;	}}void RNAProfileAlignment::addStrNames(const deque<string>& strNames){	deque<string>::const_iterator it;	for(it=strNames.begin();it!=strNames.end();it++)		m_strNames.push_back(*it);}void RNAProfileAlignment::save(const string &filename){  ofstream s(filename.c_str());  // save the pforest to stream in binary format    // first of all save the size  s.write(reinterpret_cast<const char*>(&m_size),sizeof(size_type));  s.write(reinterpret_cast<const char*>(&m_numStructures),sizeof(Uint));    // save the arrays  for(Uint i=0;i<m_size;i++)    {    for(int r=0;r<RNA_ALPHABET_SIZE;r++)      s.write(reinterpret_cast<const char*>(&m_lb[i].p[r]),sizeof(double));    s.write(reinterpret_cast<const char*>(m_lb[i].columnStr.c_str()),sizeof(char)*m_numStructures);    }      //  for(Uint i=0;i<m_size;i++)  //  {  //    s.write(reinterpret_cast<char*>(&m_lb[i]),sizeof(label_type)*m_size);  //  }  //  s.write(reinterpret_cast<char*>(m_lb),sizeof(label_type)*m_size);  s.write(reinterpret_cast<const char*>(m_rb),sizeof(size_type)*m_size);  s.write(reinterpret_cast<const char*>(m_noc),sizeof(size_type)*m_size);    for(Uint r=0;r<m_numStructures;r++)    {      ostringstream ss;      ss << setw(20) << setfill(' ') << left <<  m_strNames[r];      s.write(reinterpret_cast<const char*>(ss.str().c_str()),sizeof(char)*20);    }    //  s.write(reinterpret_cast<char*>(m_sumUpCSF),sizeof(size_type)*m_size);  //  s.write(reinterpret_cast<char*>(m_rmb),sizeof(size_type)*m_size);    //  s.write(m_name;
  //Uint m_numStructures;  //Uint m_numStructuresX;  //Uint m_numStructuresY;  //deque<string> m_strNames;  //bool hasSequence;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -