⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 prfalign.c

📁 是有关基因比对的经典算法的实现。这对于初学计算生物学的人是非常重要的算法。
💻 C
📖 第 1 页 / 共 3 页
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "clustalw.h"
#define ENDALN 127

#define MAX(a,b) ((a)>(b)?(a):(b))
#define MIN(a,b) ((a)<(b)?(a):(b))

/*
 *   Prototypes
 */
static lint 	pdiff(sint A,sint B,sint i,sint j,sint go1,sint go2);
static lint 	prfscore(sint n, sint m);
static sint 	gap_penalty1(sint i, sint j,sint k);
static sint 	open_penalty1(sint i, sint j);
static sint 	ext_penalty1(sint i, sint j);
static sint 	gap_penalty2(sint i, sint j,sint k);
static sint 	open_penalty2(sint i, sint j);
static sint 	ext_penalty2(sint i, sint j);
static void 	padd(sint k);
static void 	pdel(sint k);
static void 	palign(void);
static void 	ptracepath(sint *alen);
static void 	add_ggaps(void);
static char *     add_ggaps_mask(char *mask, int len, char *path1, char *path2);

/*
 *   Global variables
 */
extern double 	**tmat;
extern float 	gap_open, gap_extend;
extern float    transition_weight;
extern sint 	gap_pos1, gap_pos2;
extern sint 	max_aa;
extern sint 	nseqs;
extern sint 	*seqlen_array;
extern sint 	*seq_weight;
extern sint    	debug;
extern Boolean 	neg_matrix;
extern sint 	mat_avscore;
extern short  	blosum30mt[], blosum40mt[], blosum45mt[];
extern short  	blosum62mt2[], blosum80mt[];
extern short  	pam20mt[], pam60mt[];
extern short  	pam120mt[], pam160mt[], pam350mt[];
extern short  	gon40mt[], gon80mt[];
extern short    gon120mt[], gon160mt[], gon250mt[], gon350mt[];
extern short  	clustalvdnamt[],swgapdnamt[];
extern short  	idmat[];
extern short    usermat[];
extern short    userdnamat[];
extern Boolean user_series;
extern UserMatSeries matseries;

extern short	def_dna_xref[],def_aa_xref[],dna_xref[],aa_xref[];
extern sint		max_aln_length;
extern Boolean	distance_tree;
extern Boolean	dnaflag;
extern char 	mtrxname[];
extern char 	dnamtrxname[];
extern char 	**seq_array;
extern char 	*amino_acid_codes;
extern char     *gap_penalty_mask1,*gap_penalty_mask2;
extern char     *sec_struct_mask1,*sec_struct_mask2;
extern sint     struct_penalties1, struct_penalties2;
extern Boolean  use_ss1, use_ss2;
extern Boolean endgappenalties;

static sint 	print_ptr,last_print;
static sint 		*displ;

static char   	**alignment;
static sint    	*aln_len;
static sint    *aln_weight;
static char   	*aln_path1, *aln_path2;
static sint    	alignment_len;
static sint    	**profile1, **profile2;
static lint 	    *HH, *DD, *RR, *SS;
static lint 	    *gS;
static sint    matrix[NUMRES][NUMRES];
static sint    nseqs1, nseqs2;
static sint    	prf_length1, prf_length2;
static sint    *gaps;
static sint    gapcoef1,gapcoef2;
static sint    lencoef1,lencoef2;
static Boolean switch_profiles;

lint prfalign(sint *group, sint *aligned)
{

  static Boolean found;
  static Boolean negative;
  static Boolean error_given=FALSE;
  static sint    i, j, count = 0;
  static sint  NumSeq;
  static sint    len, len1, len2, is, minlen;
  static sint   se1, se2, sb1, sb2;
  static sint  maxres;
  static sint int_scale;
  static short  *matptr;
  static short	*mat_xref;
  static char   c;
  static lint    score;
  static float  scale;
  static double logmin,logdiff;
  static double pcid;


  alignment = (char **) ckalloc( nseqs * sizeof (char *) );
  aln_len = (sint *) ckalloc( nseqs * sizeof (sint) );
  aln_weight = (sint *) ckalloc( nseqs * sizeof (sint) );

  for (i=0;i<nseqs;i++)
     if (aligned[i+1] == 0) group[i+1] = 0;

  nseqs1 = nseqs2 = 0;
  for (i=0;i<nseqs;i++)
    {
        if (group[i+1] == 1) nseqs1++;
        else if (group[i+1] == 2) nseqs2++;
    }

  if ((nseqs1 == 0) || (nseqs2 == 0)) return(0.0);

  if (nseqs2 > nseqs1)
    {
     switch_profiles = TRUE;
     for (i=0;i<nseqs;i++)
       {
          if (group[i+1] == 1) group[i+1] = 2;
          else if (group[i+1] == 2) group[i+1] = 1;
       }
    }
  else
  	switch_profiles = FALSE;

  int_scale = 100;

/*
   calculate the mean of the sequence pc identities between the two groups
*/
        count = 0;
        pcid = 0.0;
	negative=neg_matrix;
        for (i=0;i<nseqs;i++)
          {
             if (group[i+1] == 1)
             for (j=0;j<nseqs;j++)
               if (group[j+1] == 2)
                    {
                       count++;
                       pcid += tmat[i+1][j+1];
                    }
          }

  pcid = pcid/(float)count;
if (debug > 0) fprintf(stdout,"mean tmat %3.1f\n", pcid);


/*
  Make the first profile.
*/
  prf_length1 = 0;
  for (i=0;i<nseqs;i++)
       if (group[i+1] == 1)
		if(seqlen_array[i+1]>prf_length1) prf_length1=seqlen_array[i+1];

  nseqs1 = 0;
if (debug>0) fprintf(stdout,"sequences profile 1:\n");
  for (i=0;i<nseqs;i++)
    {
       if (group[i+1] == 1)
          {
if (debug>0) {
extern char **names;
fprintf(stdout,"%s\n",names[i+1]);
}
             len = seqlen_array[i+1];
             alignment[nseqs1] = (char *) ckalloc( (prf_length1+2) * sizeof (char) );
             for (j=0;j<len;j++)
               alignment[nseqs1][j] = seq_array[i+1][j+1];
		for(j=len;j<prf_length1;j++)
			alignment[nseqs1][j+1]=gap_pos1;
             alignment[nseqs1][prf_length1+1] = ENDALN;
             aln_len[nseqs1] = prf_length1;
             aln_weight[nseqs1] = seq_weight[i];
             nseqs1++;
          }
    }

/*
  Make the second profile.
*/
  prf_length2 = 0;
  for (i=0;i<nseqs;i++)
       if (group[i+1] == 2)
		if(seqlen_array[i+1]>prf_length2) prf_length2=seqlen_array[i+1];

  nseqs2 = 0;
if (debug>0) fprintf(stdout,"sequences profile 2:\n");
  for (i=0;i<nseqs;i++)
    {
       if (group[i+1] == 2)
          {
if (debug>0) {
extern char **names;
fprintf(stdout,"%s\n",names[i+1]);
}
             len = seqlen_array[i+1];
             alignment[nseqs1+nseqs2] =
                   (char *) ckalloc( (prf_length2+2) * sizeof (char) );
             for (j=0;j<len;j++)
               alignment[nseqs1+nseqs2][j] = seq_array[i+1][j+1];
		for(j=len;j<prf_length2;j++)
			alignment[nseqs1+nseqs2][j+1]=gap_pos1;
             alignment[nseqs1+nseqs2][j] = ENDALN;
             aln_len[nseqs1+nseqs2] = prf_length2;
             aln_weight[nseqs1+nseqs2] = seq_weight[i];
             nseqs2++;
          }
    }

  max_aln_length = prf_length1 + prf_length2+2;
  
/*
   calculate real length of profiles - removing gaps!
*/
  len1=0;
  for (i=0;i<nseqs1;i++)
    {
       is=0;
       for (j=0; j<MIN(aln_len[i],prf_length1); j++)
	  {
            c = alignment[i][j];
      	    if ((c !=gap_pos1) && (c != gap_pos2)) is++;
          }
       len1+=is;
    }
  len1/=(float)nseqs1;
   
  len2=0;
  for (i=nseqs1;i<nseqs2+nseqs1;i++)
    {
       is=0;
       for (j=0; j<MIN(aln_len[i],prf_length2); j++)
	  {
            c = alignment[i][j];
      	    if ((c !=gap_pos1) && (c != gap_pos2)) is++;
          }
       len2+=is;
    }
  len2/=(float)nseqs2;

  if (dnaflag)
     {
       scale=1.0;
       if (strcmp(dnamtrxname, "iub") == 0)
	{
            matptr = swgapdnamt;
            mat_xref = def_dna_xref;
	}
       else if (strcmp(dnamtrxname, "clustalw") == 0)
	{
            matptr = clustalvdnamt;
            mat_xref = def_dna_xref;
            scale=0.66;
	}
       else 
        {
           matptr = userdnamat;
           mat_xref = dna_xref;
        }
            maxres = get_matrix(matptr, mat_xref, matrix, neg_matrix, int_scale);
            if (maxres == 0) return((sint)-1);
/*
            matrix[0][4]=transition_weight*matrix[0][0];
            matrix[4][0]=transition_weight*matrix[0][0];
            matrix[2][11]=transition_weight*matrix[0][0];
            matrix[11][2]=transition_weight*matrix[0][0];
            matrix[2][12]=transition_weight*matrix[0][0];
            matrix[12][2]=transition_weight*matrix[0][0];
*/
/* fix suggested by Chanan Rubin at Compugen */
           matrix[mat_xref[0]][mat_xref[4]]=transition_weight*matrix[0][0]; 
           matrix[mat_xref[4]][mat_xref[0]]=transition_weight*matrix[0][0]; 
           matrix[mat_xref[2]][mat_xref[11]]=transition_weight*matrix[0][0]; 
           matrix[mat_xref[11]][mat_xref[2]]=transition_weight*matrix[0][0]; 
           matrix[mat_xref[2]][mat_xref[12]]=transition_weight*matrix[0][0]; 
           matrix[mat_xref[12]][mat_xref[2]]=transition_weight*matrix[0][0]; 

          gapcoef1 = gapcoef2 = 100.0 * gap_open *scale;
          lencoef1 = lencoef2 = 100.0 * gap_extend *scale;
    }
  else
    {
  	if(len1==0 || len2==0) {
  		logmin=1.0;
  		logdiff=1.0;
  	}  
  	else {
  		minlen = MIN(len1,len2);
 		logmin = 1.0/log10((double)minlen);
 		if (len2<len1)
    	 		logdiff = 1.0+0.5*log10((double)((float)len2/(float)len1));
  		else if (len1<len2)
  	   		logdiff = 1.0+0.5*log10((double)((float)len1/(float)len2));
  		else logdiff=1.0;
		if(logdiff<0.9) logdiff=0.9;
  	}
if(debug>0) fprintf(stdout,"%d %d logmin %f   logdiff %f\n",
(pint)len1,(pint)len2, logmin,logdiff);
       scale=0.75;
       if (strcmp(mtrxname, "blosum") == 0)
        {
           scale=0.75;
           if (negative || distance_tree == FALSE) matptr = blosum40mt;
           else if (pcid > 80.0)
             {
                matptr = blosum80mt;
             }
           else if (pcid > 60.0)
             {
                matptr = blosum62mt2;
             }
           else if (pcid > 40.0)
             {
                matptr = blosum45mt;
             }
           else if (pcid > 30.0)
             {
                scale=0.5;
                matptr = blosum45mt;
             }
           else if (pcid > 20.0)
             {
                scale=0.6;
                matptr = blosum45mt;
             }
           else 
             {
                scale=0.6;
                matptr = blosum30mt;
             }
           mat_xref = def_aa_xref;

        }
       else if (strcmp(mtrxname, "pam") == 0)
        {
           scale=0.75;
           if (negative || distance_tree == FALSE) matptr = pam120mt;
           else if (pcid > 80.0) matptr = pam20mt;
           else if (pcid > 60.0) matptr = pam60mt;
           else if (pcid > 40.0) matptr = pam120mt;
           else matptr = pam350mt;
           mat_xref = def_aa_xref;
        }
       else if (strcmp(mtrxname, "gonnet") == 0)
        {
	   scale/=2.0;
           if (negative || distance_tree == FALSE) matptr = gon250mt;
           else if (pcid > 35.0)
             {
                matptr = gon80mt;
		scale/=2.0;
             }
           else if (pcid > 25.0)
             {
                if(minlen<100) matptr = gon250mt;
                else matptr = gon120mt;
             }
           else
             {
                if(minlen<100) matptr = gon350mt;
		else matptr = gon160mt;
             }
           mat_xref = def_aa_xref;
           int_scale /= 10;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -