⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 readmat.c

📁 clustalw1.83.DOS.ZIP,用于多序列比对的软件
💻 C
字号:
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "clustalw.h"
#include "matrices.h"


/*
 *   Prototypes
 */
static Boolean commentline(char *line);


/*
 *   Global variables
 */

extern char 	*amino_acid_codes;
extern sint 	gap_pos1, gap_pos2;
extern sint 	max_aa;
extern short 	def_dna_xref[],def_aa_xref[];
extern sint 	mat_avscore;
extern sint 	debug;
extern Boolean  dnaflag;

extern Boolean user_series;
extern UserMatSeries matseries;
extern short usermatseries[MAXMAT][NUMRES][NUMRES];
extern short aa_xrefseries[MAXMAT][NUMRES+1];


void init_matrix(void)
{

   char c1,c2;
   short i, j, maxres;

   max_aa = strlen(amino_acid_codes)-2;
   gap_pos1 = NUMRES-2;          /* code for gaps inserted by clustalw */
   gap_pos2 = NUMRES-1;           /* code for gaps already in alignment */

/*
   set up cross-reference for default matrices hard-coded in matrices.h
*/
   for (i=0;i<NUMRES;i++) def_aa_xref[i] = -1;
   for (i=0;i<NUMRES;i++) def_dna_xref[i] = -1;

   maxres = 0;
   for (i=0;(c1=amino_acid_order[i]);i++)
     {
         for (j=0;(c2=amino_acid_codes[j]);j++)
          {
           if (c1 == c2)
               {
                  def_aa_xref[i] = j;
                  maxres++;
                  break;
               }
          }
         if ((def_aa_xref[i] == -1) && (amino_acid_order[i] != '*'))
            {
                error("residue %c in matrices.h is not recognised",
                                       amino_acid_order[i]);
            }
     }

   maxres = 0;
   for (i=0;(c1=nucleic_acid_order[i]);i++)
     {
         for (j=0;(c2=amino_acid_codes[j]);j++)
          {
           if (c1 == c2)
               {
                  def_dna_xref[i] = j;
                  maxres++;
                  break;
               }
          }
         if ((def_dna_xref[i] == -1) && (nucleic_acid_order[i] != '*'))
            {
                error("nucleic acid %c in matrices.h is not recognised",
                                       nucleic_acid_order[i]);
            }
     }
}

sint get_matrix(short *matptr, short *xref, sint matrix[NUMRES][NUMRES], Boolean neg_flag, sint scale)
{
   sint gg_score = 0;
   sint gr_score = 0;
   sint i, j, k, ix = 0;
   sint ti, tj;
   sint  maxres;
   sint av1,av2,av3,min, max;
/*
   default - set all scores to 0
*/
   for (i=0;i<=max_aa;i++)
      for (j=0;j<=max_aa;j++)
          matrix[i][j] = 0;

   ix = 0;
   maxres = 0;
   for (i=0;i<=max_aa;i++)
    {
      ti = xref[i];
      for (j=0;j<=i;j++)
       {
          tj = xref[j]; 
          if ((ti != -1) && (tj != -1))
            {
               k = matptr[ix];
               if (ti==tj)
                  {
                     matrix[ti][ti] = k * scale;
                     maxres++;
                  }
               else
                  {
                     matrix[ti][tj] = k * scale;
                     matrix[tj][ti] = k * scale;
                  }
               ix++;
            }
       }
    }

   --maxres;

   av1 = av2 = av3 = 0;
   for (i=0;i<=max_aa;i++)
    {
      for (j=0;j<=i;j++)
       {
           av1 += matrix[i][j];
           if (i==j)
              {
                 av2 += matrix[i][j];
              }
           else
              {
                 av3 += matrix[i][j];
              }
       }
    }

   av1 /= (maxres*maxres)/2;
   av2 /= maxres;
   av3 /= ((float)(maxres*maxres-maxres))/2;
  mat_avscore = -av3;

  min = max = matrix[0][0];
  for (i=0;i<=max_aa;i++)
    for (j=1;j<=i;j++)
      {
        if (matrix[i][j] < min) min = matrix[i][j];
        if (matrix[i][j] > max) max = matrix[i][j];
      }
if (debug>1) fprintf(stdout,"maxres %d\n",(pint)max_aa);
if (debug>1) fprintf(stdout,"average mismatch score %d\n",(pint)av3);
if (debug>1) fprintf(stdout,"average match score %d\n",(pint)av2);
if (debug>1) fprintf(stdout,"average score %d\n",(pint)av1);

/*
   if requested, make a positive matrix - add -(lowest score) to every entry
*/
  if (neg_flag == FALSE)
   {

if (debug>1) fprintf(stdout,"min %d max %d\n",(pint)min,(pint)max);
      if (min < 0)
        {
           for (i=0;i<=max_aa;i++)
            {
              ti = xref[i];
              if (ti != -1)
                {
                 for (j=0;j<=max_aa;j++)
                   {
                    tj = xref[j];
/*
                    if (tj != -1) matrix[ti][tj] -= (2*av3);
*/
                    if (tj != -1) matrix[ti][tj] -= min;
                   }
                }
            }
        }
/*
       gr_score = av3;
       gg_score = -av3;
*/

   }



  for (i=0;i<gap_pos1;i++)
   {
      matrix[i][gap_pos1] = gr_score;
      matrix[gap_pos1][i] = gr_score;
      matrix[i][gap_pos2] = gr_score;
      matrix[gap_pos2][i] = gr_score;
   }
  matrix[gap_pos1][gap_pos1] = gg_score;
  matrix[gap_pos2][gap_pos2] = gg_score;
  matrix[gap_pos2][gap_pos1] = gg_score;
  matrix[gap_pos1][gap_pos2] = gg_score;

  maxres += 2;

  return(maxres);
}


sint read_matrix_series(char *filename, short *usermat, short *xref)
{
   FILE *fd = NULL, *matfd = NULL;
   char mat_filename[FILENAMELEN];
   char inline1[1024];
   sint  maxres = 0;
   sint nmat;
   sint n,llimit,ulimit;

   if (filename[0] == '\0')
     {
        error("comparison matrix not specified");
        return((sint)0);
     }
   if ((fd=fopen(filename,"r"))==NULL) 
     {
        error("cannot open %s", filename);
        return((sint)0);
     }

/* check the first line to see if it's a series or a single matrix */
   while (fgets(inline1,1024,fd) != NULL)
     {
        if (commentline(inline1)) continue;
	if(linetype(inline1,"CLUSTAL_SERIES"))
		user_series=TRUE;
	else
		user_series=FALSE;
        break;
     }

/* it's a single matrix */
  if(user_series == FALSE)
    {
	fclose(fd);
   	maxres=read_user_matrix(filename,usermat,xref);
   	return(maxres);
    }

/* it's a series of matrices, find the next MATRIX line */
   nmat=0;
   matseries.nmat=0;
   while (fgets(inline1,1024,fd) != NULL)
     {
        if (commentline(inline1)) continue;
	if(linetype(inline1,"MATRIX"))
	{
		if(sscanf(inline1+6,"%d %d %s",&llimit,&ulimit,mat_filename)!=3)
		{
			error("Bad format in file %s\n",filename);
   			fclose(fd);
			return((sint)0);
		}
		if(llimit<0 || llimit > 100 || ulimit <0 || ulimit>100)
		{
			error("Bad format in file %s\n",filename);
   			fclose(fd);
			return((sint)0);
		}
		if(ulimit<=llimit)
		{
			error("in file %s: lower limit is greater than upper (%d-%d)\n",filename,llimit,ulimit);
   			fclose(fd);
			return((sint)0);
		}
   		n=read_user_matrix(mat_filename,&usermatseries[nmat][0][0],&aa_xrefseries[nmat][0]);
		if(n<=0)
		{
			error("Bad format in matrix file %s\n",mat_filename);
   			fclose(fd);
			return((sint)0);
		}
		matseries.mat[nmat].llimit=llimit;
		matseries.mat[nmat].ulimit=ulimit;
		matseries.mat[nmat].matptr=&usermatseries[nmat][0][0];
		matseries.mat[nmat].aa_xref=&aa_xrefseries[nmat][0];
		nmat++;
	}
    }
   fclose(fd);
   matseries.nmat=nmat;

   maxres=n;
   return(maxres);

}

sint read_user_matrix(char *filename, short *usermat, short *xref)
{
   double f;
   FILE *fd;
   sint  numargs,farg;
   sint i, j, k = 0;
   char codes[NUMRES];
   char inline1[1024];
   char *args[NUMRES+4];
   char c1,c2;
   sint ix1, ix = 0;
   sint  maxres = 0;
   float scale;

   if (filename[0] == '\0')
     {
        error("comparison matrix not specified");
       	return((sint)0);
     }

   if ((fd=fopen(filename,"r"))==NULL) 
   {
       	error("cannot open %s", filename);
       	return((sint)0);
   }
   maxres = 0;
   while (fgets(inline1,1024,fd) != NULL)
     {
        if (commentline(inline1)) continue;
	if(linetype(inline1,"CLUSTAL_SERIES"))
   	{
       		error("in %s - single matrix expected.", filename);
		fclose(fd);
       		return((sint)0);
   	}
/*
   read residue characters.
*/
        k = 0;
        for (j=0;j<strlen(inline1);j++)
          {
             if (isalpha((int)inline1[j])) codes[k++] = inline1[j];
             if (k>NUMRES)
                {
                   error("too many entries in matrix %s",filename);
		   fclose(fd);
                   return((sint)0);
                }
          }
        codes[k] = '\0';
        break;
    }

   if (k == 0) 
     {
        error("wrong format in matrix %s",filename);
  	fclose(fd);
        return((sint)0);
     }

/*
   cross-reference the residues
*/
   for (i=0;i<NUMRES;i++) xref[i] = -1;

   maxres = 0;
   for (i=0;(c1=codes[i]);i++)
     {
         for (j=0;(c2=amino_acid_codes[j]);j++)
           if (c1 == c2)
               {
                  xref[i] = j;
                  maxres++;
                  break;
               }
         if ((xref[i] == -1) && (codes[i] != '*'))
            {
                warning("residue %c in matrix %s not recognised",
                                       codes[i],filename);
            }
     }


/*
   get the weights
*/

   ix = ix1 = 0;
   while (fgets(inline1,1024,fd) != NULL)
     {
        if (inline1[0] == '\n') continue;
        if (inline1[0] == '#' ||
            inline1[0] == '!') break;
        numargs = getargs(inline1, args, (int)(k+1));
        if (numargs < maxres)
          {
             error("wrong format in matrix %s",filename);
  	     fclose(fd);
             return((sint)0);
          }
        if (isalpha(args[0][0])) farg=1;
        else farg=0;

/* decide whether the matrix values are float or decimal */
	scale=1.0;
	for(i=0;i<strlen(args[farg]);i++)
		if(args[farg][i]=='.')
		{
/* we've found a float value */
			scale=10.0;
			break;
		}

        for (i=0;i<=ix;i++)
          {
             if (xref[i] != -1)
               {
                  f = atof(args[i+farg]);
                  usermat[ix1++] = (short)(f*scale);
               }
          }
        ix++;
     }
   if (ix != k+1)
     {
        error("wrong format in matrix %s",filename);
  	fclose(fd);
        return((sint)0);
     }


  maxres += 2;
  fclose(fd);

  return(maxres);
}

int getargs(char *inline1,char *args[],int max)
{

	char	*inptr;
/*
#ifndef MAC
	char	*strtok(char *s1, const char *s2);
#endif
*/
	int	i;

	inptr=inline1;
	for (i=0;i<=max;i++)
	{
		if ((args[i]=strtok(inptr," \t\n"))==NULL)
			break;
		inptr=NULL;
	}

	return(i);
}


static Boolean commentline(char *line)
{
        int i;
 
        if(line[0] == '#') return TRUE;
        for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {
                if(!isspace(line[i]))
			return FALSE;
        }
        return TRUE;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -