⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pairalign.c

📁 生物序列比对程序clustw的源代码
💻 C
字号:
/* Change int h to int gh everywhere  DES June 1994 */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include "clustalw.h"#define MIN(a,b) ((a)<(b)?(a):(b))#define MAX(a,b) ((a)>(b)?(a):(b))#define gap(k)  ((k) <= 0 ? 0 : g + gh * (k))#define tbgap(k)  ((k) <= 0 ? 0 : tb + gh * (k))#define tegap(k)  ((k) <= 0 ? 0 : te + gh * (k))/* *	Prototypes */static void add(sint v);static sint calc_score(sint iat, sint jat, sint v1, sint v2);static float tracepath(sint tsb1,sint tsb2);static void forward_pass(char *ia, char *ib, sint n, sint m);static void reverse_pass(char *ia, char *ib);static sint diff(sint A, sint B, sint M, sint N, sint tb, sint te);static void del(sint k);/* *   Global variables */#ifdef MAC#define pwint   short#else#define pwint   int#endifstatic sint		int_scale;extern double   **tmat;extern float    pw_go_penalty;extern float    pw_ge_penalty;extern float	transition_weight;extern sint 	nseqs;extern sint 	max_aa;extern sint 	gap_pos1,gap_pos2;extern sint  	max_aln_length;extern sint 	*seqlen_array;extern sint 	debug;extern sint  	mat_avscore;extern short 	blosum30mt[],pam350mt[],idmat[],pw_usermat[],pw_userdnamat[];extern short    clustalvdnamt[],swgapdnamt[];extern short    gon250mt[];extern short 	def_dna_xref[],def_aa_xref[],pw_dna_xref[],pw_aa_xref[];extern Boolean  dnaflag;extern char 	**seq_array;extern char 	*amino_acid_codes;extern char 	pw_mtrxname[];extern char 	pw_dnamtrxname[];static float 	mm_score;static sint 	print_ptr,last_print;static sint 	*displ;static pwint 	*HH, *DD, *RR, *SS;static sint 	g, gh;static sint   	seq1, seq2;static sint     matrix[NUMRES][NUMRES];static pwint    maxscore;static sint    	sb1, sb2, se1, se2;sint pairalign(sint istart, sint iend, sint jstart, sint jend){  short	 *mat_xref;  static sint    si, sj, i;  static sint    n,m,len1,len2;  static sint    maxres;  static short    *matptr;  static char   c;  static float gscale,ghscale;  displ = (sint *)ckalloc((2*max_aln_length+1) * sizeof(sint));  HH = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));  DD = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));  RR = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));  SS = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));		#ifdef MAC  int_scale = 10;#else  int_scale = 100;#endif  gscale=ghscale=1.0;  if (dnaflag)    {      if (debug>1) fprintf(stdout,"matrix %s\n",pw_dnamtrxname);      if (strcmp(pw_dnamtrxname, "iub") == 0)	{ 	  matptr = swgapdnamt;	  mat_xref = def_dna_xref;	}      else if (strcmp(pw_dnamtrxname, "clustalw") == 0)	{ 	  matptr = clustalvdnamt;	  mat_xref = def_dna_xref;	  gscale=0.6667;	  ghscale=0.751;	}      else	{	  matptr = pw_userdnamat;	  mat_xref = pw_dna_xref;	}      maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale);      if (maxres == 0) return((sint)-1);      matrix[0][4]=transition_weight*matrix[0][0];      matrix[4][0]=transition_weight*matrix[0][0];      matrix[2][11]=transition_weight*matrix[0][0];      matrix[11][2]=transition_weight*matrix[0][0];      matrix[2][12]=transition_weight*matrix[0][0];      matrix[12][2]=transition_weight*matrix[0][0];    }  else    {      if (debug>1) fprintf(stdout,"matrix %s\n",pw_mtrxname);      if (strcmp(pw_mtrxname, "blosum") == 0)	{	  matptr = blosum30mt;	  mat_xref = def_aa_xref;	}      else if (strcmp(pw_mtrxname, "pam") == 0)	{	  matptr = pam350mt;	  mat_xref = def_aa_xref;	}      else if (strcmp(pw_mtrxname, "gonnet") == 0)	{	  matptr = gon250mt;	  int_scale /= 10;	  mat_xref = def_aa_xref;	}      else if (strcmp(pw_mtrxname, "id") == 0)	{	  matptr = idmat;	  mat_xref = def_aa_xref;	}      else	{	  matptr = pw_usermat;	  mat_xref = pw_aa_xref;	}      maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale);      if (maxres == 0) return((sint)-1);    }  for (si=MAX(0,istart);si<nseqs && si<iend;si++)    {      n = seqlen_array[si+1];      len1 = 0;      for (i=1;i<=n;i++) {	c = seq_array[si+1][i];	if ((c!=gap_pos1) && (c != gap_pos2)) len1++;      }      for (sj=MAX(si+1,jstart+1);sj<nseqs && sj<jend;sj++)	{	  m = seqlen_array[sj+1];	  if(n==0 || m==0) {	    tmat[si+1][sj+1]=1.0;	    tmat[sj+1][si+1]=1.0;	    continue;	  }	  len2 = 0;	  for (i=1;i<=m;i++) {	    c = seq_array[sj+1][i];	    if ((c!=gap_pos1) && (c != gap_pos2)) len2++;	  }	  if (dnaflag) {	    g = 2 * (float)pw_go_penalty * int_scale*gscale;	    gh = pw_ge_penalty * int_scale*ghscale;	  }	  else {	    if (mat_avscore <= 0)              g = 2 * (float)(pw_go_penalty + log((double)(MIN(n,m))))*int_scale;	    else              g = 2 * mat_avscore * (float)(pw_go_penalty +					    log((double)(MIN(n,m))))*gscale;	    gh = pw_ge_penalty * int_scale;	  }	  if (debug>1) fprintf(stdout,"go %d ge %d\n",(pint)g,(pint)gh);	  /*	    align the sequences	  */	  seq1 = si+1;        seq2 = sj+1;        forward_pass(&seq_array[seq1][0], &seq_array[seq2][0],           n, m);        reverse_pass(&seq_array[seq1][0], &seq_array[seq2][0]);        last_print = 0;	print_ptr = 1;/*        sb1 = sb2 = 1;        se1 = n-1;        se2 = m-1;*//* use Myers and Miller to align two sequences */        maxscore = diff(sb1-1, sb2-1, se1-sb1+1, se2-sb2+1,         (sint)0, (sint)0); /* calculate percentage residue identity */        mm_score = tracepath(sb1,sb2);		if(len1==0 || len2==0) mm_score=0;		else			mm_score /= (float)MIN(len1,len2);        tmat[si+1][sj+1] = ((float)100.0 - mm_score)/(float)100.0;        tmat[sj+1][si+1] = ((float)100.0 - mm_score)/(float)100.0;if (debug>1){        fprintf(stdout,"Sequences (%d:%d) Aligned. Score: %d CompScore:  %d\n",                           (pint)si+1,(pint)sj+1,                            (pint)mm_score,                            (pint)maxscore/(MIN(len1,len2)*100));}else{        info("Sequences (%d:%d) Aligned. Score:  %d",                                      (pint)si+1,(pint)sj+1,                                       (pint)mm_score);}   }  }   displ=ckfree((void *)displ);   HH=ckfree((void *)HH);   DD=ckfree((void *)DD);   RR=ckfree((void *)RR);   SS=ckfree((void *)SS);  return((sint)1);}static void add(sint v){        if(last_print<0) {                displ[print_ptr-1] = v;                displ[print_ptr++] = last_print;        }        else                last_print = displ[print_ptr++] = v;}static sint calc_score(sint iat,sint jat,sint v1,sint v2){        sint ipos,jpos;		sint ret;        ipos = v1 + iat;        jpos = v2 + jat;        ret=matrix[(int)seq_array[seq1][ipos]][(int)seq_array[seq2][jpos]];	return(ret);}static float tracepath(sint tsb1,sint tsb2){	char c1,c2;    sint  i1,i2,r;    sint i,k,pos,to_do;	sint count;	float score;	char s1[600], s2[600];        to_do=print_ptr-1;        i1 = tsb1;        i2 = tsb2;	pos = 0;	count = 0;        for(i=1;i<=to_do;++i) {	  if (debug>1) fprintf(stdout,"%d ",(pint)displ[i]);	  if(displ[i]==0) {	    c1 = seq_array[seq1][i1];	    c2 = seq_array[seq2][i2];	    	    if (debug>0)	      {		if (c1>max_aa) s1[pos] = '-';		else s1[pos]=amino_acid_codes[c1];		if (c2>max_aa) s2[pos] = '-';		else s2[pos]=amino_acid_codes[c2];	      }	    	    if ((c1!=gap_pos1) && (c1 != gap_pos2) &&		(c1 == c2)) count++;	    ++i1;	    ++i2;	    ++pos;	  }	  else {	    if((k=displ[i])>0) {	      	      if (debug>0)		for (r=0;r<k;r++)		  {		    s1[pos+r]='-';		    if (seq_array[seq2][i2+r]>max_aa) s2[pos+r] = '-';		    else s2[pos+r]=amino_acid_codes[seq_array[seq2][i2+r]];		  }	      	      i2 += k;	      pos += k;	    }	    else {	      	      if (debug>0)		for (r=0;r<(-k);r++)		  {		    s2[pos+r]='-';		    if (seq_array[seq1][i1+r]>max_aa) s1[pos+r] = '-';		    else s1[pos+r]=amino_acid_codes[seq_array[seq1][i1+r]];		  }	      	      i1 -= k;	      pos -= k;	    }	  }        }	if (debug>0) fprintf(stdout,"\n");	if (debug>0) 	  {	    for (i=0;i<pos;i++) fprintf(stdout,"%c",s1[i]);	    fprintf(stdout,"\n");	    for (i=0;i<pos;i++) fprintf(stdout,"%c",s2[i]);	    fprintf(stdout,"\n");	  }	/*	  if (count <= 0) count = 1;	*/	score = 100.0 * (float)count;	return(score);}static void forward_pass(char *ia, char *ib, sint n, sint m){  sint i,j;  pwint f,hh,p,t;  maxscore = 0;  se1 = se2 = 0;  for (i=0;i<=m;i++)    {       HH[i] = 0;       DD[i] = -g;    }  for (i=1;i<=n;i++)     {        hh = p = 0;		f = -g;        for (j=1;j<=m;j++)           {              f -= gh;               t = hh - g - gh;              if (f<t) f = t;              DD[j] -= gh;              t = HH[j] - g - gh;              if (DD[j]<t) DD[j] = t;              hh = p + matrix[(int)ia[i]][(int)ib[j]];              if (hh<f) hh = f;              if (hh<DD[j]) hh = DD[j];              if (hh<0) hh = 0;              p = HH[j];              HH[j] = hh;              if (hh > maxscore)                {                   maxscore = hh;                   se1 = i;                   se2 = j;                }           }     }}static void reverse_pass(char *ia, char *ib){  sint i,j;  pwint f,hh,p,t;  pwint cost;  cost = 0;  sb1 = sb2 = 1;  for (i=se2;i>0;i--)    {       HH[i] = -1;       DD[i] = -1;    }  for (i=se1;i>0;i--)     {        hh = f = -1;        if (i == se1) p = 0;        else p = -1;        for (j=se2;j>0;j--)           {              f -= gh;               t = hh - g - gh;              if (f<t) f = t;              DD[j] -= gh;              t = HH[j] - g - gh;              if (DD[j]<t) DD[j] = t;              hh = p + matrix[(int)ia[i]][(int)ib[j]];              if (hh<f) hh = f;              if (hh<DD[j]) hh = DD[j];              p = HH[j];              HH[j] = hh;              if (hh > cost)                {                   cost = hh;                   sb1 = i;                   sb2 = j;                   if (cost >= maxscore) break;                }           }        if (cost >= maxscore) break;     }}static int diff(sint A,sint B,sint M,sint N,sint tb,sint te){  sint type;  sint midi,midj,i,j;  int midh;  static pwint f, hh, e, s, t;    if(N<=0)  {    if(M>0) {      del(M);    }        return(-(int)tbgap(M));  }    if(M<=1) {    if(M<=0) {      add(N);      return(-(int)tbgap(N));    }        midh = -(tb+gh) - tegap(N);    hh = -(te+gh) - tbgap(N);    if (hh>midh) midh = hh;    midj = 0;    for(j=1;j<=N;j++) {      hh = calc_score(1,j,A,B)	- tegap(N-j) - tbgap(j-1);      if(hh>midh) {	midh = hh;	midj = j;      }    }        if(midj==0) {      del(1);      add(N);    }    else {      if(midj>1)	add(midj-1);      displ[print_ptr++] = last_print = 0;      if(midj<N)	add(N-midj);    }    return midh;  }  /* Divide: Find optimum midpoint (midi,midj) of cost midh */    midi = M / 2;  HH[0] = 0.0;  t = -tb;  for(j=1;j<=N;j++) {    HH[j] = t = t-gh;    DD[j] = t-g;  }    t = -tb;  for(i=1;i<=midi;i++) {    s=HH[0];    HH[0] = hh = t = t-gh;    f = t-g;    for(j=1;j<=N;j++) {      if ((hh=hh-g-gh) > (f=f-gh)) f=hh;      if ((hh=HH[j]-g-gh) > (e=DD[j]-gh)) e=hh;      hh = s + calc_score(i,j,A,B);      if (f>hh) hh = f;      if (e>hh) hh = e;            s = HH[j];      HH[j] = hh;      DD[j] = e;    }  }    DD[0]=HH[0];    RR[N]=0;  t = -te;  for(j=N-1;j>=0;j--) {    RR[j] = t = t-gh;    SS[j] = t-g;  }    t = -te;  for(i=M-1;i>=midi;i--) {    s = RR[N];    RR[N] = hh = t = t-gh;    f = t-g;        for(j=N-1;j>=0;j--) {            if ((hh=hh-g-gh) > (f=f-gh)) f=hh;      if ((hh=RR[j]-g-gh) > (e=SS[j]-gh)) e=hh;      hh = s + calc_score(i+1,j+1,A,B);      if (f>hh) hh = f;      if (e>hh) hh = e;            s = RR[j];      RR[j] = hh;      SS[j] = e;          }  }    SS[N]=RR[N];    midh=HH[0]+RR[0];  midj=0;  type=1;  for(j=0;j<=N;j++) {    hh = HH[j] + RR[j];    if(hh>=midh)      if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) {	midh=hh;	midj=j;      }  }    for(j=N;j>=0;j--) {    hh = DD[j] + SS[j] + g;    if(hh>midh) {      midh=hh;      midj=j;      type=2;    }  }    /* Conquer recursively around midpoint  */      if(type==1) {             /* Type 1 gaps  */    diff(A,B,midi,midj,tb,g);    diff(A+midi,B+midj,M-midi,N-midj,g,te);  }  else {    diff(A,B,midi-1,midj,tb,0.0);    del(2);    diff(A+midi+1,B+midj,M-midi-1,N-midj,0.0,te);  }    return midh;       /* Return the score of the best alignment */}static void del(sint k){  if(last_print<0)    last_print = displ[print_ptr-1] -= k;  else    last_print = displ[print_ptr++] = -(k);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -