⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dropnfa.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 4 页
字号:
/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia *//* $Name: fa35_03_06 $ - $Id: dropnfa.c,v 1.91 2008/02/19 08:50:13 wrp Exp $ *//* 18-Sep-2006 - removed global variables for alignment from nw_align   and bg_align *//* 18-Oct-2005 - converted to use a_res and aln for alignment coordinates *//* 14-May-2003 - modified to return alignment start at 0, rather than   1, for begin:end alignments*//*  implements the fasta algorithm, see:  W. R. Pearson, D. J. Lipman (1988) "Improved tools for biological  sequence comparison" Proc. Natl. Acad. Sci. USA 85:2444-2448  This version uses Smith-Waterman for final protein alignments  W. R. Pearson (1996) "Effective protein sequence comparison"  Methods Enzymol. 266:227-258  26-April-2001 - -DGAP_OPEN redefines -f, as gap open penalty  4-Nov-2001 - modify spam() while(1).*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <math.h>#include "defs.h"#include "param.h"/* this must be consistent with upam.h */#define MAXHASH 32#define NMAP MAXHASH+1/* globals for fasta */#define MAXWINDOW 64#ifndef MAXSAV#define MAXSAV 10#endif#ifndef ALLOCN0static char *verstr="3.5 Sept 2006";#elsestatic char *verstr="3.5an0 Sept 2006";#endifextern void w_abort(char *, char *);int shscore(const unsigned char *aa0, int n0, int **pam2);extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,			double *aa0_f, double **kp);extern void init_karlin_a(struct pstruct *, double *, double **);extern int do_karlin(const unsigned char *, int n1, int **,		     struct pstruct *, double *, double *,		     double *, double *);extern void aancpy(char *to, char *from, int count, struct pstruct *ppst);char *ckalloc(size_t);#ifdef TFASTAextern int aatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);#endif#include "dropnfa.h"#define DROP_INTERN#include "drop_func.h"struct swstr { int H, E;};intdmatch (const unsigned char *aa0, int n0,	const unsigned char *aa1, int n1,	int hoff, int window, 	int **pam2, int gdelval, int ggapval,	struct f_struct *f_str);extern int sw_walign (int **pam2p, int n0,		      const unsigned char *aa1, int n1,		      int q, int r,		      struct swstr *ss,		      struct a_res_str *a_res		      );/* initialize for fasta */voidinit_work (unsigned char *aa0, int n0, 	   struct pstruct *ppst,	   struct f_struct **f_arg){  int mhv, phv;  int hmax;  int i0, hv;  int pamfact;  int btemp;  struct f_struct *f_str;  /* these used to be globals, but do not need to be */  int ktup;		/* word size examined */  int fact;		/* factor used to scale ktup match value */  int kt1;		/* ktup-1 */  int lkt;		/* last ktup - initiall kt1, but can be increased			   for hsq >= NMAP */  int maxn0;		/* used in band alignment */  int *pwaa_a, *pwaa_s;		/* pam[aa0[]] profile */  int i, j, e, f;  struct swstr *ss;  int **pam2p;  int *waa;  int nsq, ip, *hsq;  if (ppst->ext_sq_set) {    nsq = ppst->nsqx; ip = 1;    hsq = ppst->hsqx;  }  else {    nsq = ppst->nsq; ip = 0;    hsq = ppst->hsq;  }  f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));#ifndef TFASTA  if((ppst->zsflag%10) == 6) {    f_str->kar_p = NULL;    init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p);  }#endif  btemp = 2 * ppst->param_u.fa.bestoff / 3 +    n0 / ppst->param_u.fa.bestscale +    ppst->param_u.fa.bkfact *    (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);  if (ppst->nt_align)    btemp = (btemp*ppst->pam_h)/5;  /* normalize to standard +5/-4 */  btemp = min (btemp, ppst->param_u.fa.bestmax);  if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;  ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;  if (ppst->param_u.fa.optcut_set != 1)#ifndef TFASTA    ppst->param_u.fa.optcut = btemp;#else  ppst->param_u.fa.optcut = (btemp*3)/2;#endif#ifndef OLD_FASTA_GAP  ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;#else  ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;#endif  pamfact = ppst->param_u.fa.pamfact;  ktup = ppst->param_u.fa.ktup;  fact = ppst->param_u.fa.scfact * ktup;  if (pamfact == -1) pamfact = 0;  else if (pamfact == -2) pamfact = 1;  for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)    if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0];  if (mhv <= 0) {    fprintf (stderr, " maximum hsq <=0 %d\n", mhv);    exit (1);  }  for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;  /*      kshft = 2;	*/  kt1 = ktup - 1;  hv = 1;  for (i0 = 0; i0 < ktup; i0++) hv = hv << f_str->kshft;  hmax = hv;  f_str->hmask = (hmax >> f_str->kshft) - 1;  if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {    fprintf (stderr, " *** cannot allocate hash array: hmax: %d hmask: %d\n",	     hmax, f_str->hmask);    exit (1);  }  if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) {    fprintf (stderr, " *** cannot allocate pamh1 array nsq=%d\n",nsq);    exit (1);  }  if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {    fprintf (stderr, " *** cannot allocate pamh2 array hmax=%d\n",hmax);    exit (1);  }  if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {    fprintf (stderr, " *** cannot allocate hash link array n0=%d",n0);    exit (1);  }  for (i0 = 0; i0 < hmax; i0++) f_str->harr[i0] = -1;  for (i0 = 0; i0 < n0; i0++) f_str->link[i0] = -1;  /* encode the aa0 array */  phv = hv = 0;  lkt = kt1;  /* restart hv, phv calculation */  for (i0 = 0; i0 < min(lkt,n0); i0++) {    if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ ktup; continue;}    hv = (hv << f_str->kshft) + hsq[aa0[i0]];    phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;  }  for (; i0 < n0; i0++) {    if (hsq[aa0[i0]] >= NMAP) {      hv=phv=0;      /* restart hv, phv calculation */      for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {	if (hsq[aa0[i0]] >= NMAP) {	  hv=phv=0; 	  lkt = i0+ktup;	  continue;	}	hv = (hv << f_str->kshft) + hsq[aa0[i0]];	phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;      }    }    if (i0 >= n0) break;    hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]];    f_str->link[i0] = f_str->harr[hv];    f_str->harr[hv] = i0;    if (pamfact) {      f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);      /* this check should always be true, but just in case */      if (hsq[aa0[i0-kt1]]<NMAP)	phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;    }    else f_str->pamh2[hv] = fact * ktup;  }  /* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the     pam2[0][0] is now undefined for consistency with blast  */  if (pamfact)    for (i0 = 1; i0 <= nsq; i0++)      f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;  else    for (i0 = 1; i0 <= nsq; i0++)      f_str->pamh1[i0] = fact;  f_str->ndo = 0;  f_str->noff = n0-1;#ifndef ALLOCN0  if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,						sizeof (struct dstruct)))==NULL) {    fprintf (stderr," *** cannot allocate diagonal arrays: %lu\n",	     MAXDIAG *sizeof (struct dstruct));    exit (1);  };#else  if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,						sizeof (struct dstruct)))==NULL) {    fprintf (stderr," *** cannot allocate diagonal arrays: %ld\n",	     (long)n0*sizeof (struct dstruct));    exit (1);  };#endif#ifdef TFASTA  if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,					    sizeof(unsigned char)))      == NULL) {    fprintf (stderr, " *** cannot allocate aa1x array %d\n", ppst->maxlen+2);    exit (1);  }  f_str->aa1x++;#endif  f_str->bss = (struct bdstr *) calloc((size_t)ppst->param_u.fa.optwid*2+4,				       sizeof(struct bdstr));  f_str->bss++;    /* allocate space for the scoring arrays */  maxn0 = n0 + 4;  if ((ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))      == NULL) {    fprintf (stderr, " *** cannot allocate ss array %3d\n", n0);    exit (1);  }  ss++;  ss[n0].H = -1;	/* this is used as a sentinel - normally H >= 0 */  ss[n0].E = 1;  f_str->ss = ss;  /* initialize variable (-S) pam matrix */  if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {    fprintf(stderr," *** cannot allocate waa_s array %3d\n",nsq*n0);    exit(1);  }  /* initialize pam2p[1] pointers */  if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {    fprintf(stderr," *** cannot allocate pam2p[1] array %3d\n",n0);    exit(1);  }  pam2p = f_str->pam2p[1];  if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {    fprintf(stderr," *** cannot allocate pam2p[1][] array %3d\n",nsq*n0);    exit(1);  }  for (i=1; i<n0; i++) {    pam2p[i]= pam2p[0] + (i*(nsq+1));  }  /* initialize universal (alignment) matrix */  if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {    fprintf(stderr," *** cannot allocate waa_a struct %3d\n",nsq*n0);    exit(1);  }     /* initialize pam2p[0] pointers */  if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {    fprintf(stderr," *** cannot allocate pam2p[1] array %3d\n",n0);    exit(1);  }  pam2p = f_str->pam2p[0];  if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {    fprintf(stderr," *** cannot allocate pam2p[1][] array %3d\n",nsq*n0);    exit(1);  }  for (i=1; i<n0; i++) {    pam2p[i]= pam2p[0] + (i*(nsq+1));  }  /*      pwaa effectively has a sequence profile --     pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM)     pwaa[n0..2n0-1] has pam scores for residue 1 (A)     pwaa[2n0..3n-1] has pam scores for residue 2 (R), ...     thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that     *pwaa++ rapidly moves though the scores of the aa1p[] position     without further indexing     For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have     a different score in each position.  */  if (ppst->pam_pssm) {    pwaa_s = f_str->waa_s;    pwaa_a = f_str->waa_a;    for (e = 0; e <=nsq; e++)	{	/* for each residue in the alphabet */      for (f = 0; f < n0; f++) {	/* for each position in aa0 */	*pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e];	*pwaa_a++ = f_str->pam2p[0][f][e]  = ppst->pam2p[0][f][e];      }    }  }  else {	/* initialize scanning matrix */    pwaa_s = f_str->waa_s;    pwaa_a = f_str->waa_a;    for (e = 0; e <=nsq; e++)	/* for each residue in the alphabet */      for (f = 0; f < n0; f++)	{	/* for each position in aa0 */	*pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][aa0[f]][e];	*pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][aa0[f]][e];      }  }  f_str->max_res = max(3*n0/2,MIN_RES);  *f_arg = f_str;}/* pstring1 is a message to the manager, currently 512 *//* pstring2 is the same information, but in a markx==10 format */voidget_param (struct pstruct *ppstr, char **pstring1, char *pstring2){#ifndef TFASTA  char *pg_str="FASTA";#else  char *pg_str="TFASTA";#endif  if (!ppstr->param_u.fa.optflag) {     sprintf (pstring1[0], "%s (%s)", pg_str, verstr);     if (ppstr->param_u.fa.iniflag) strcat(pstring1[0]," init1");  }  else {     sprintf (pstring1[0], "%s (%s) [optimized]", pg_str, verstr);  }  sprintf (pstring1[1], #ifdef OLD_FASTA_GAP	   "%s matrix (%d:%d)%s ktup: %d\n join: %d, opt: %d, gap-pen: %d/%d, width: %3d",#else	   "%s matrix (%d:%d)%s ktup: %d\n join: %d, opt: %d, open/ext: %d/%d, width: %3d",#endif	   ppstr->pamfile, ppstr->pam_h,ppstr->pam_l,	   (ppstr->ext_sq_set) ? "xS":"\0",	   ppstr->param_u.fa.ktup, ppstr->param_u.fa.cgap,	   ppstr->param_u.fa.optcut, ppstr->gdelval, ppstr->ggapval,	   ppstr->param_u.fa.optwid);   if (pstring2 != NULL) {     sprintf (pstring2, #ifdef OLD_FASTA_GAP	      "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",#else     "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\; pg_open-ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",#endif	      pg_str,verstr,ppstr->pamfile, ppstr->pam_h,ppstr->pam_l, ppstr->gdelval,              ppstr->ggapval,ppstr->param_u.fa.ktup,ppstr->param_u.fa.optcut,	      ppstr->param_u.fa.cgap);   }}voidclose_work (const unsigned char *aa0, int n0,	    struct pstruct *ppst,	    struct f_struct **f_arg){  struct f_struct *f_str;  f_str = *f_arg;  if (f_str != NULL) {    if (f_str->kar_p!=NULL) free(f_str->kar_p);    f_str->ss--;    f_str->bss--;    /*    free(f_str->res);  */    free(f_str->waa_a);    free(f_str->waa_s);    free(f_str->pam2p[1][0]);    free(f_str->pam2p[1]);    free(f_str->pam2p[0][0]);    free(f_str->pam2p[0]);    free(f_str->ss);    free(f_str->bss);    free(f_str->diag);    free(f_str->link);    free(f_str->pamh2);     free(f_str->pamh1);    free(f_str->harr);    free(f_str);    *f_arg = NULL;  }}#ifdef ALLOCN0void savemax (struct dstruct *, int, struct f_struct *);#elsevoid savemax (struct dstruct *, struct f_struct *);#endifint spam (const unsigned char *, const unsigned char *, struct savestr *,	 int **, int, int, int);int sconn(struct savestr **, int nsave, int cgap, int pgap, int noff);void kpsort(struct savestr **, int);extern intNW_ALIGN(const unsigned char *, const unsigned char *, int, int, 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -