⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dropfz2.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 5 页
字号:
/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia *//* $Name: fa35_03_06 $ - $Id: dropfz2.c,v 1.65 2008/02/19 08:50:13 wrp Exp $ *//* 18-Sept-2006 - removed static global variables for alignment *//* 2002/06/23 finally correctly implement fix to translate 'N' to 'X' *//* 1999/11/29 modification by Z. Zhang to translate DNA 'N' as 'X' *//* implements an improved version of the fasty algorithm, see:   W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of   DNA sequences with protein sequences" Genomics 46:24-36*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <ctype.h>#include "defs.h"#include "param.h"#define XTERNAL#include "upam.h"#include "uascii.h"#define NT_N 16/* globals for fasta */#define MAXWINDOW 64#ifndef MAXSAV#define MAXSAV 10#endif#ifndef ALLOCN0static char *verstr="3.5 Sept 2006";#elsestatic char *verstr="3.5an0 Sept 2006";#endifstruct dstruct		/* diagonal structure for saving current run */{			   int     score;	/* hash score of current match */   int     start;	/* start of current match */   int     stop;	/* end of current match */   struct savestr *dmax;   /* location in vmax[] where best score data saved */};struct savestr{   int     score;		/* pam score with segment optimization */   int     score0;		/* pam score of best single segment */   int     gscore;		/* score from global match */   int     dp;			/* diagonal of match */   int     start;		/* start of match in lib seq */   int     stop;		/* end of match in lib seq */};void savemax();void kpsort();struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };struct wgt { int  iii, ii, iv;};struct wgtc {char c2, c3, c4, c5;};typedef struct st_s { int C, I, D;} *st_ptr;struct f_struct {  struct dstruct *diag;  struct savestr vmax[MAXSAV];	/* best matches saved for one sequence */  struct savestr *vptr[MAXSAV];  struct savestr *lowmax;  int ndo;  int noff;  int hmask;			/* hash constants */  int *pamh1;			/* pam based array */  int *pamh2;			/* pam based kfact array */  int *link, *harr;		/* hash arrays */  int kshft;			/* shift width */  int nsav, lowscor;		/* number of saved runs, worst saved run */#ifndef TFAST  unsigned char *aa0x, *aa0v;	/* aa0x - 111122223333 */#else  unsigned char *aa1x, *aa1v;	/* aa1x - 111122223333 */#endif				/* aa1v - computed codons */  struct sx_s *cur;  struct wgt **weight0;  struct wgt **weight1;  struct wgtc **weight_c;  int *waa;  int *res;  int max_res;  st_ptr up, down, tp;};#define DROP_INTERN#include "drop_func.h"static int dmatchx(const unsigned char *aa0, int n0,		   const unsigned char *aa1, int n1,		   int hoff, int window, 		   int **pam2, int gdelval, int ggapval, int gshift,		   struct f_struct *f_str);int shscore(unsigned char *aa0, int n0, int **pam2);int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);int spam (const unsigned char *aa0, const unsigned char *aa1, 	  struct savestr *dmax, int **pam2,	  struct f_struct *f_str);int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);int lx_band(const unsigned char *prot_seq, int len_prot,	    const unsigned char *dna_prot_seq, int len_dna_prot,	    int **pam_matrix, int gopen, int gext,	    int gshift, int start_diag, int width, struct f_struct *f_str);static void update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char);extern void w_abort (char *p, char *p1);extern void aagetmap(char *to, int n);/* initialize for fasta *//* modified 30-August-1999 by Zheng Zhang to work with an extended alphabet *//* Assume naa=47, and wgts[47][23] matches both upper and lower caseamoino acids with another amino acid.  And also assume the DNA letterdoes not have upper/lower case difference.  If you also allow DNAsequence to be upper/lower case letters, more needs be changed. Notonly here, but also in the alignment code, the way that pack a codoninto a number between 0-63 need be changed. *//* modified so that if **weightci==NULL, do not fiddle with characters */voidinit_weights(struct wgt ***weighti, struct wgtc ***weightci,	     int **wgts, int gshift, int gsubs, int naa){  int i, j, do_wgtc=0;  int aa, b, a, x, y, z;  int *wwt, e;  struct wgt **weight;  struct wgtc **weightc;  char aacmap[64];  int temp[MAXLC+1][64]; /*change*/  char le[MAXLC+1][64];  if (naa > MAXLC) {    fprintf(stderr,"*** dropfz2.c compilation problem %d > %d ***\n",	   naa, MAXLC);  }  if ((*weighti=(struct wgt **)calloc((size_t)(naa+1),sizeof(struct wgt *)))      ==NULL) {    fprintf(stderr," cannot allocate weights array: %d\n",naa);    exit(1);  }  weight = *weighti;  for (aa=0; aa <= naa; aa++) {    if ((weight[aa]=(struct wgt *)calloc((size_t)256,sizeof(struct wgt)))	==NULL) {      fprintf(stderr," cannot allocate weight[]: %d/%d\n",aa,naa);      exit(1);    }  }  if (weightci !=NULL) {    if ((*weightci=(struct wgtc **)calloc((size_t)(naa+1),					  sizeof(struct wgtc *)))==NULL) {      fprintf(stderr," cannot allocate weight_c array: %d\n",naa);      exit(1);    }    weightc = *weightci;    for (aa=0; aa <= naa; aa++) {      if ((weightc[aa]=(struct wgtc *)calloc((size_t)256,sizeof(struct wgtc)))	  ==NULL) {	fprintf(stderr," cannot allocate weightc[]: %d/%d\n",aa,naa);	exit(1);      }    }    do_wgtc = 1;  }  else do_wgtc = 0;  aagetmap(aacmap,64);  for (aa = 0; aa <= naa; aa++) {	/* change*/    wwt = wgts[aa];			/* pam matrix */    for (i = 0; i < 64; i++) {	/* i iterates through the codons */      x = -1000;		/* large negative */      y = i;      for (j = 0; j < 64; j++) {	/* j iterates through the codons */	z = ((~i & j) | (i & ~j));	b = 0;		/* score = 0 */	if (z % 4) b-= gsubs;	if (z /16) b-= gsubs;	if ((z /4) % 4) b -= gsubs;   	b += wwt[aascii[aacmap[j]]];  /* add the match score for char j*/	if (b > x) {	  x = b;		/* x has the score */	  y = j;		/* y has the character */	}      }      /*	  if (y < 0 || y > 63) printf("%d %d %d %d ",aa, i, x, y); */      temp[aa][i] = x;      le[aa][i] = y;    }    /*            printf("\n"); */  }  for (aa= 0; aa <= naa; aa++) {       wwt = temp[aa];      for (i = 0; i < 256; i++) {          for (x=-100,b = 0; b < 4; b++) {              z = (i/ (1 << ((b+1)*2)))*(1<<(b*2))+(i%(1<<(b*2)));	      if (x < (e=wwt[z])) {		  x = e;		  if (do_wgtc) weightc[aa][i].c4 = aacmap[le[aa][z]];	      }          }          weight[aa][i].iv=x-gshift;          weight[aa][i].iii = wwt[i%64];	  if (do_wgtc) {	    weightc[aa][i].c5 = aacmap[le[aa][i%64]];	    weightc[aa][i].c3 = aacmap[i%64];	  }          x = i %16;          for (y = -100, b = 0; b < 3; b++) {              z = ((x >> (b*2)) << (b*2+2)) + (x % (1 << (b*2)));               for (a = 0; a < 4; a++) {		  if ((e =wwt[z+(a<<(b*2))]) > y) {		      y = e;		      if (do_wgtc) 			weightc[aa][i].c2 = aacmap[le[aa][z+(a<<(b*2))]];		  }              }          }          weight[aa][i].ii = y-gshift;      }  }  /*106=CGGG*/  for (aa = 0; aa <= naa; aa++) {    weight[aa][106].iii = wgts[aa][23]; /* is 23 the code for 'X'?*/    weight[aa][106].iv = weight[aa][106].ii = weight[aa][106].iii-gshift;    if (do_wgtc) {      weightc[aa][106].c5 = weightc[aa][106].c4 = weightc[aa][106].c3	= weightc[aa][106].c2 = 'X';    }  }}voidfree_weights(struct wgt ***weighti0, struct wgt ***weighti1, 	     struct wgtc ***weightci, int naa){  int aa;  struct wgt **weight0;  struct wgt **weight1;  struct wgtc **weightc;  weight0 = *weighti0;  weight1 = *weighti1;  weightc = *weightci;  for (aa=0; aa <= naa; aa++) {free(weight0[aa]);}  for (aa=0; aa <= naa; aa++) {free(weight1[aa]);}  for (aa=0; aa <= naa; aa++) {free(weightc[aa]);}  free(weight0);  free(weight1);  free(weightc);}static voidpre_com(const unsigned char *aa0, int n0, unsigned char *aa0v){  int dnav, i;   dnav = (hnt[aa0[0]]<<2) + hnt[aa0[1]];   for (i=2; i<n0; i++) {     dnav = ((dnav<<2)+hnt[aa0[i]])&255;     if (aa0[i] == NT_N  || aa0[i-1]==NT_N || aa0[i-2] == NT_N)        aa0v[i-2] = 106;     else {       if (dnav == 106/*CGGG*/) dnav = 42/*AGGG*/;       aa0v[i-2]=dnav;     }   }}static voidpre_com_r(const unsigned char *aa0, int n0, unsigned char *aa0v){    int dnav, i, ir;    dnav = (3-hnt[aa0[n0-1]]<<2) + 3-hnt[aa0[n0-2]];    for (i=2, ir=n0-3; i<n0; i++,ir--) {      dnav = ((dnav<<2)+3-hnt[aa0[ir]])&255;       if (aa0[ir] == NT_N || aa0[ir+1]==NT_N || aa0[ir+2] == NT_N) 	aa0v[i-2] = 106;      else {	if (dnav == 106) dnav = 42;	aa0v[i-2]=dnav;      }    }}voidinit_work (unsigned char *aa0, int n0, 	   struct pstruct *ppst,	   struct f_struct **f_arg){   int mhv, phv;   int hmax;   int i0, hv;   int pamfact;   int btemp;   struct f_struct *f_str;   struct bdstr *bss;   /* these used to be globals, but do not need to be */   int ktup, fact, kt1, lkt;   int maxn0;   int *pwaa;   int i, j, q;   struct swstr *ss, *r_ss;   int *waa;   int *res;   int nsq, ip, *hsq, naat;#ifndef TFAST   int last_n0, itemp, dnav;   unsigned char *fd, *fs, *aa0x, *aa0v;   int n0x, n0x3;#endif   if (nt[NT_N] != 'N') {     fprintf(stderr," nt[NT_N] (%d) != 'X' (%c) - recompile\n",NT_N,nt[NT_N]);     exit(1);   }        if (ppst->ext_sq_set) {     nsq = ppst->nsqx; ip = 1;     hsq = ppst->hsqx;   }   else {     nsq = ppst->nsq; ip = 0;     hsq = ppst->hsq;   }   f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));   btemp = 2 * ppst->param_u.fa.bestoff / 3 +      n0 / ppst->param_u.fa.bestscale +      ppst->param_u.fa.bkfact *      (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);   btemp = min (btemp, ppst->param_u.fa.bestmax);   if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;   ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;   if (ppst->param_u.fa.optcut_set != 1)#ifndef TFAST      ppst->param_u.fa.optcut = (btemp*5)/4;#else      ppst->param_u.fa.optcut = (btemp*4)/3;#endif#ifdef OLD_FASTA_GAP   ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;#else   ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;#endif   pamfact = ppst->param_u.fa.pamfact;   ktup = ppst->param_u.fa.ktup;   fact = ppst->param_u.fa.scfact * ktup;#ifndef TFAST   /* before hashing, we must set up some space and translate the sequence */   maxn0 = n0 + 2;   if ((aa0x =(unsigned char *)calloc((size_t)maxn0,					     sizeof(unsigned char)))       == NULL) {     fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);     exit (1);   }   aa0x++;   f_str->aa0x = aa0x;   if ((aa0v =(unsigned char *)calloc((size_t)maxn0,					     sizeof(unsigned char)))       == NULL) {     fprintf (stderr, "cannot allocate aa0v array %d\n", maxn0);     exit (1);   }   aa0v++;   f_str->aa0v = aa0v;   /* make a precomputed codon number series */   pre_com(aa0, n0, aa0v);   last_n0 = 0;   for (itemp=0; itemp<3; itemp++) {     n0x=saatran(aa0,&aa0x[last_n0],n0,itemp);     /*         for (i=0; i<n0x; i++) {	   fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);	   if ((i%60)==59) fprintf(stderr,"\n");	   }	   fprintf(stderr,"\n");	   */     last_n0 += n0x+1;   }   /*     fprintf(stderr,"\n"); */   n0x = n0;   n0x3 = n0x/3;   /* now switch aa0 and aa0x for hashing functions */   fs = aa0;   aa0 = aa0x;   aa0x = fs;#endif   if (ppst->ext_sq_set) naat = MAXLC;   else naat = MAXUC;   init_weights(&f_str->weight0, NULL,		ppst->pam2[ip],-ppst->gshift,-ppst->gsubs,naat);   init_weights(&f_str->weight1, &f_str->weight_c,		ppst->pam2[0],-ppst->gshift,-ppst->gsubs,naat);   if (pamfact == -1)      pamfact = 0;   else if (pamfact == -2)      pamfact = 1;   for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)      if (hsq[i0] < NMAP && hsq[i0] > mhv)	 mhv = ppst->hsq[i0];   if (mhv <= 0)   {      fprintf (stderr, " maximum hsq <=0 %d\n", mhv);      exit (1);   }   for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;/*      kshft = 2;	*/   kt1 = ktup - 1;   hv = 1;   for (i0 = 0; i0 < ktup; i0++)      hv = hv << f_str->kshft;   hmax = hv;   f_str->hmask = (hmax >> f_str->kshft) - 1;   if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate hash array\n");     exit (1);   }   if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate pamh1 array\n");     exit (1);   }   if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate pamh2 array\n");     exit (1);   }   if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate hash link array");     exit (1);   }   for (i0 = 0; i0 < hmax; i0++)      f_str->harr[i0] = -1;   for (i0 = 0; i0 < n0; i0++)      f_str->link[i0] = -1;   /* encode the aa0 array */   phv = hv = 0;   lkt = kt1;   for (i0 = 0; i0 < min(n0,lkt); i0++) {     if (hsq[aa0[i0]] >= NMAP) {       hv=phv=0; lkt = i0+ktup; continue;     }     hv = (hv << f_str->kshft) + ppst->hsq[aa0[i0]];     phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;   }   for (; i0 < n0; i0++) {     if (hsq[aa0[i0]] >= NMAP) {       hv=phv=0;       /* restart hv, phv calculation */       for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {	 if (hsq[aa0[i0]] >= NMAP) {	   hv=phv=0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -