⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pairalign.c

📁 是有关基因比对的经典算法的实现。这对于初学计算生物学的人是非常重要的算法。
💻 C
📖 第 1 页 / 共 2 页
字号:
/* Change int h to int gh everywhere  DES June 1994 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "clustalw.h"

#define MIN(a,b) ((a)<(b)?(a):(b))
#define MAX(a,b) ((a)>(b)?(a):(b))

#define gap(k)  ((k) <= 0 ? 0 : g + gh * (k))
#define tbgap(k)  ((k) <= 0 ? 0 : tb + gh * (k))
#define tegap(k)  ((k) <= 0 ? 0 : te + gh * (k))

/*
*	Prototypes
*/
static void add(sint v);
static sint calc_score(sint iat, sint jat, sint v1, sint v2);
static float tracepath(sint tsb1,sint tsb2);
static void forward_pass(char *ia, char *ib, sint n, sint m);
static void reverse_pass(char *ia, char *ib);
static sint diff(sint A, sint B, sint M, sint N, sint tb, sint te);
static void del(sint k);

/*
 *   Global variables
 */
#ifdef MAC
#define pwint   short
#else
#define pwint   int
#endif
static sint		int_scale;

extern double   **tmat;
extern float    pw_go_penalty;
extern float    pw_ge_penalty;
extern float	transition_weight;
extern sint 	nseqs;
extern sint 	max_aa;
extern sint 	gap_pos1,gap_pos2;
extern sint  	max_aln_length;
extern sint 	*seqlen_array;
extern sint 	debug;
extern sint  	mat_avscore;
extern short 	blosum30mt[],pam350mt[],idmat[],pw_usermat[],pw_userdnamat[];
extern short    clustalvdnamt[],swgapdnamt[];
extern short    gon250mt[];
extern short 	def_dna_xref[],def_aa_xref[],pw_dna_xref[],pw_aa_xref[];
extern Boolean  dnaflag;
extern char 	**seq_array;
extern char 	*amino_acid_codes;
extern char 	pw_mtrxname[];
extern char 	pw_dnamtrxname[];

static float 	mm_score;
static sint 	print_ptr,last_print;
static sint 	*displ;
static pwint 	*HH, *DD, *RR, *SS;
static sint 	g, gh;
static sint   	seq1, seq2;
static sint     matrix[NUMRES][NUMRES];
static pwint    maxscore;
static sint    	sb1, sb2, se1, se2;


sint pairalign(sint istart, sint iend, sint jstart, sint jend)
{
  short	 *mat_xref;
  static sint    si, sj, i;
  static sint    n,m,len1,len2;
  static sint    maxres;
  static short    *matptr;
  static char   c;
  static float gscale,ghscale;

	displ = (sint *)ckalloc((2*max_aln_length+1) * sizeof(sint));
	HH = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
	DD = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
	RR = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
	SS = (pwint *)ckalloc((max_aln_length) * sizeof(pwint));
		
#ifdef MAC
       int_scale = 10;
#else
       int_scale = 100;
#endif
  gscale=ghscale=1.0;
  if (dnaflag)
     {
if (debug>1) fprintf(stdout,"matrix %s\n",pw_dnamtrxname);
       if (strcmp(pw_dnamtrxname, "iub") == 0)
          { 
             matptr = swgapdnamt;
             mat_xref = def_dna_xref;
	  }
       else if (strcmp(pw_dnamtrxname, "clustalw") == 0)
          { 
             matptr = clustalvdnamt;
             mat_xref = def_dna_xref;
             gscale=0.6667;
             ghscale=0.751;
	  }
       else
          {
             matptr = pw_userdnamat;
             mat_xref = pw_dna_xref;
          }
            maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale);
            if (maxres == 0) return((sint)-1);

            matrix[0][4]=transition_weight*matrix[0][0];
            matrix[4][0]=transition_weight*matrix[0][0];
            matrix[2][11]=transition_weight*matrix[0][0];
            matrix[11][2]=transition_weight*matrix[0][0];
            matrix[2][12]=transition_weight*matrix[0][0];
            matrix[12][2]=transition_weight*matrix[0][0];
    }
  else
    {
if (debug>1) fprintf(stdout,"matrix %s\n",pw_mtrxname);
       if (strcmp(pw_mtrxname, "blosum") == 0)
          {
             matptr = blosum30mt;
             mat_xref = def_aa_xref;
          }
       else if (strcmp(pw_mtrxname, "pam") == 0)
          {
             matptr = pam350mt;
             mat_xref = def_aa_xref;
          }
       else if (strcmp(pw_mtrxname, "gonnet") == 0)
          {
             matptr = gon250mt;
             int_scale /= 10;
             mat_xref = def_aa_xref;
          }
       else if (strcmp(pw_mtrxname, "id") == 0)
          {
             matptr = idmat;
             mat_xref = def_aa_xref;
          }
       else
          {
             matptr = pw_usermat;
             mat_xref = pw_aa_xref;
          }

       maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale);
       if (maxres == 0) return((sint)-1);
    }


  for (si=MAX(0,istart);si<nseqs && si<iend;si++)
   {
     n = seqlen_array[si+1];
     len1 = 0;
     for (i=1;i<=n;i++) {
		c = seq_array[si+1][i];
		if ((c!=gap_pos1) && (c != gap_pos2)) len1++;
     }

     for (sj=MAX(si+1,jstart+1);sj<nseqs && sj<jend;sj++)
      {
        m = seqlen_array[sj+1];
        if(n==0 || m==0) {
		tmat[si+1][sj+1]=1.0;
		tmat[sj+1][si+1]=1.0;
		continue;
	}
		len2 = 0;
		for (i=1;i<=m;i++) {
			c = seq_array[sj+1][i];
			if ((c!=gap_pos1) && (c != gap_pos2)) len2++;
		}

        if (dnaflag) {
           g = 2 * (float)pw_go_penalty * int_scale*gscale;
           gh = pw_ge_penalty * int_scale*ghscale;
        }
        else {
           if (mat_avscore <= 0)
              g = 2 * (float)(pw_go_penalty + log((double)(MIN(n,m))))*int_scale;
           else
              g = 2 * mat_avscore * (float)(pw_go_penalty +
                    log((double)(MIN(n,m))))*gscale;
           gh = pw_ge_penalty * int_scale;
        }

if (debug>1) fprintf(stdout,"go %d ge %d\n",(pint)g,(pint)gh);

/*
   align the sequences
*/
        seq1 = si+1;
        seq2 = sj+1;

        forward_pass(&seq_array[seq1][0], &seq_array[seq2][0],
           n, m);

        reverse_pass(&seq_array[seq1][0], &seq_array[seq2][0]);

        last_print = 0;
	print_ptr = 1;
/*
        sb1 = sb2 = 1;
        se1 = n-1;
        se2 = m-1;
*/

/* use Myers and Miller to align two sequences */

        maxscore = diff(sb1-1, sb2-1, se1-sb1+1, se2-sb2+1, 
        (sint)0, (sint)0);
 
/* calculate percentage residue identity */

        mm_score = tracepath(sb1,sb2);

		if(len1==0 || len2==0) mm_score=0;
		else
			mm_score /= (float)MIN(len1,len2);

        tmat[si+1][sj+1] = ((float)100.0 - mm_score)/(float)100.0;
        tmat[sj+1][si+1] = ((float)100.0 - mm_score)/(float)100.0;

if (debug>1)
{
        fprintf(stdout,"Sequences (%d:%d) Aligned. Score: %d CompScore:  %d\n",
                           (pint)si+1,(pint)sj+1, 
                           (pint)mm_score, 
                           (pint)maxscore/(MIN(len1,len2)*100));
}
else
{
        info("Sequences (%d:%d) Aligned. Score:  %d",
                                      (pint)si+1,(pint)sj+1, 
                                      (pint)mm_score);
}

   }
  }
   displ=ckfree((void *)displ);
   HH=ckfree((void *)HH);
   DD=ckfree((void *)DD);
   RR=ckfree((void *)RR);
   SS=ckfree((void *)SS);


  return((sint)1);
}

static void add(sint v)
{

        if(last_print<0) {
                displ[print_ptr-1] = v;
                displ[print_ptr++] = last_print;
        }
        else
                last_print = displ[print_ptr++] = v;
}

static sint calc_score(sint iat,sint jat,sint v1,sint v2)
{
        sint ipos,jpos;
		sint ret;

        ipos = v1 + iat;
        jpos = v2 + jat;

        ret=matrix[(int)seq_array[seq1][ipos]][(int)seq_array[seq2][jpos]];

	return(ret);
}


static float tracepath(sint tsb1,sint tsb2)
{
	char c1,c2;
    sint  i1,i2,r;
    sint i,k,pos,to_do;
	sint count;
	float score;
	char s1[100], s2[100];

        to_do=print_ptr-1;
        i1 = tsb1;
        i2 = tsb2;

	pos = 0;
	count = 0;
        for(i=1;i<=to_do;++i) {

if (debug>1) fprintf(stdout,"%d ",(pint)displ[i]);
                if(displ[i]==0) {
			c1 = seq_array[seq1][i1];
			c2 = seq_array[seq2][i2];

if (debug>0)
{
if (c1>max_aa) s1[pos] = '-';
else s1[pos]=amino_acid_codes[c1];
if (c2>max_aa) s2[pos] = '-';
else s2[pos]=amino_acid_codes[c2];
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -