⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dropfx.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 5 页
字号:
/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia *//* $Name: fa35_03_06 $ - $Id: dropfx.c,v 1.74 2008/02/19 08:50:13 wrp Exp $ *//* implements the fastx algorithm, see:   W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of   DNA sequences with protein sequences" Genomics 46:24-36   see dropnfa.c for better variable descriptions and comments*/   /* 18-Sept-2006 - remove global variables used for alignment *//* 22-June-2006 - correct incorrect alignment coordinates generated   after pro_dna() on projected DNA region.  *//* 9-May-2003 -> 3.46 changed lx_band to use projected protein   boundary end.  this fixes some addressing issues on MacOSX, and   speeds up alignment on very long proteins*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <math.h>#include "defs.h"#include "param.h"#define XTERNAL#include "upam.h"/* this must be consistent with upam.h */#define MAXHASH 32#define NMAP MAXHASH+1/* globals for fasta */#define MAXWINDOW 64#ifndef MAXSAV#define MAXSAV 10#endif#ifndef ALLOCN0static char *verstr="3.5 Sept 2006";#elsestatic char *verstr="3.5an0 May 2006";#endifstruct dstruct		/* diagonal structure for saving current run */{			   int     score;	/* hash score of current match */   int     start;	/* start of current match */   int     stop;	/* end of current match */   struct savestr *dmax;   /* location in vmax[] where best score data saved */};struct savestr{   int     score;		/* pam score with segment optimization */   int     score0;		/* pam score of best single segment */   int     gscore;		/* score from global match */   int     dp;			/* diagonal of match */   int     start;		/* start of match in lib seq */   int     stop;		/* end of match in lib seq */};struct swstr { int H, E;};struct bdstr { int CC, DD, CP, DP;};void savemax();void kpsort();struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };struct f_struct {  struct dstruct *diag;  struct savestr vmax[MAXSAV];	/* best matches saved for one sequence */  struct savestr *vptr[MAXSAV];  struct savestr *lowmax;  int ndo;  int noff;  int hmask;			/* hash constants */  int *pamh1;			/* pam based array */  int *pamh2;			/* pam based kfact array */  int *link, *harr;		/* hash arrays */  int kshft;			/* shift width */  int nsav, lowscor;		/* number of saved runs, worst saved run */#ifndef TFAST  unsigned char *aa0x;		/* contains translated codons 111222333*/  unsigned char *aa0y;		/* contains translated codons 123123123*/#else  unsigned char *aa1x;		/* contains translated codons 111222333 */  unsigned char *aa1y;		/* contains translated codons 123123123 */#endif  struct sx_s *cur;  int *waa0;  int *waa1;  int *res;  int max_res;};#define DROP_INTERN#include "drop_func.h"static int dmatchx(const unsigned char *aa0, int n0,		   const unsigned char *aa1, int n1,		   int hoff, int window, 		   int **pam2, int gdelval, int ggapval, int gshift,		   struct f_struct *f_str);int shscore(unsigned char *aa0, int n0, int **pam2);int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);int spam (const unsigned char *aa0, const unsigned char *aa1, 	  struct savestr *dmax, int **pam2,	  struct f_struct *f_str);int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);int lx_band(const unsigned char *prot_seq, int len_prot,	    const unsigned char *dna_prot_seq, int len_dna_prot,	    int **pam_matrix, int gopen, int gext,	    int gshift, int start_diag, int width, struct f_struct *f_str);static voidupdate_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char);extern void w_abort (char *p, char *p1);/* initialize for fasta */voidinit_work (unsigned char *aa0, int n0, 	   struct pstruct *ppst,	   struct f_struct **f_arg){   int mhv, phv;   int hmax;   int i0, hv;   int pamfact;   int btemp;   struct f_struct *f_str;   int ktup;		/* word size examined */   int fact;		/* factor used to scale ktup match value */   int kt1;		/* ktup-1 */   int lkt;		/* last ktup - initiall kt1, but can be increased			   for hsq >= NMAP */   int maxn0;   int *pwaa;   int i, j, q;   struct swstr *ss, *r_ss;   int *waa;   int *res;   int nsq, ip, *hsq;#ifndef TFAST   int last_n0, itemp;   unsigned char *fd, *fs, *aa0x, *aa0y, *aa0s;   int n0x, n0x3;#endif  if (ppst->ext_sq_set) {    nsq = ppst->nsqx; ip = 1;    hsq = ppst->hsqx;  }  else {    nsq = ppst->nsq; ip = 0;    hsq = ppst->hsq;  }   f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));   btemp = 2 * ppst->param_u.fa.bestoff / 3 +      n0 / ppst->param_u.fa.bestscale +      ppst->param_u.fa.bkfact *      (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);   btemp = min (btemp, ppst->param_u.fa.bestmax);   if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;   ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;   if (ppst->param_u.fa.optcut_set != 1)#ifndef TFAST      ppst->param_u.fa.optcut = (btemp*5)/4;#else      ppst->param_u.fa.optcut = (btemp*4)/3;#endif#ifdef OLD_FASTA_GAP   ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;#else   ppst->param_u.fa.pgap = ppst->gdelval;#endif   pamfact = ppst->param_u.fa.pamfact;   ktup = ppst->param_u.fa.ktup;   fact = ppst->param_u.fa.scfact * ktup;   if (pamfact == -1)      pamfact = 0;   else if (pamfact == -2)      pamfact = 1;   for (i0 = 1, mhv = -1; i0 <=nsq; i0++)      if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0];   if (mhv <= 0) {      fprintf (stderr, " maximum hsq <=0 %d\n", mhv);      exit (1);   }   for (f_str->kshft = 0; mhv > 0; mhv /= 2)      f_str->kshft++;/*      kshft = 2;	*/   kt1 = ktup - 1;   hv = 1;   for (i0 = 0; i0 < ktup; i0++) {     hv = hv << f_str->kshft;   }   hmax = hv;   f_str->hmask = (hmax >> f_str->kshft) - 1;   if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate hash array\n");     exit (1);   }   if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate pamh1 array\n");     exit (1);   }   if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate pamh2 array\n");     exit (1);   }   if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate hash link array");     exit (1);   }#ifdef TFAST   if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,					     sizeof(unsigned char)))       == NULL) {     fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);     exit (1);   }   f_str->aa1x++;   if ((f_str->aa1y =(unsigned char *)calloc((size_t)ppst->maxlen+2,					     sizeof(unsigned char)))       == NULL) {     fprintf (stderr, "cannot allocate aa1y array %d\n", ppst->maxlen+2);     exit (1);   }   f_str->aa1y++;#else	/* FASTX */   maxn0 = n0 + 2;   if ((aa0x =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))       == NULL) {     fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);     exit (1);   }   aa0x++;   f_str->aa0x = aa0x;   if ((aa0y =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))       == NULL) {     fprintf (stderr, "cannot allocate aa0y array %d\n", maxn0);     exit (1);   }   aa0y++;   f_str->aa0y = aa0y;   last_n0 = 0;   for (itemp=0; itemp<3; itemp++) {     n0x = saatran(aa0,&aa0x[last_n0],n0,itemp);     /*       for (i=0; i<n0x; i++) {       fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);       if ((i%60)==59) fprintf(stderr,"\n");       }       fprintf(stderr,"\n");     */     last_n0 += n0x+1;   }   /*        fprintf(stderr,"\n");   */   for (itemp=0, fs=aa0x; itemp <3; itemp++,fs++) {     for (fd = &aa0y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;     *fd=EOSEQ;   }   /* now switch aa0 and aa0x for hashing functions */   /* this seems dangerous in threaded code, but only the pointer is changed,      not the data itself */   fs = aa0;   aa0 = aa0x;   aa0x = fs;					 #endif   for (i0 = 0; i0 < hmax; i0++)      f_str->harr[i0] = -1;   for (i0 = 0; i0 < n0; i0++)      f_str->link[i0] = -1;   /* encode the aa0 array */   phv = hv = 0;   lkt = kt1;   for (i0 = 0; i0 < min(lkt,n0); i0++) {     if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt=i0+ktup; continue;}     hv = (hv << f_str->kshft) + hsq[aa0[i0]];     phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;   }   for (; i0 < n0; i0++) {     if (hsq[aa0[i0]] >= NMAP) {       hv=phv=0;        lkt = i0+ktup;       /* restart hv, phv calculation */       for (; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {	 if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ktup; continue;}	 hv = (hv << f_str->kshft) + hsq[aa0[i0]];	 phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;       }     }     if (i0 >= n0) break;     hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]];     f_str->link[i0] = f_str->harr[hv];     f_str->harr[hv] = i0;     if (pamfact) {       f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);       /* this check should always be true, but just in case */       if (hsq[aa0[i0-kt1]]<NMAP)	 phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;     }     else f_str->pamh2[hv] = fact * ktup;   }#ifndef TFAST   /* done hashing, now switch aa0, aa0x back */   fs = aa0;   aa0 = aa0x;   aa0x = fs;#endif/* this has been modified from 0..<nsq to 1..<=nsq because the   pam2[0][0] is now undefined for consistency with blast*/   if (pamfact)      for (i0 = 1; i0 <= nsq; i0++)	 f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;   else      for (i0 = 1; i0 <= nsq; i0++)	 f_str->pamh1[i0] = fact;   f_str->ndo = 0;	/* used to save time on diagonals with long queries */#ifndef ALLOCN0   if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,						 sizeof (struct dstruct)))==NULL) {      fprintf (stderr," cannot allocate diagonal arrays: %ld\n",	      (long) MAXDIAG *sizeof (struct dstruct));      exit (1);     };#else   if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,					      sizeof (struct dstruct)))==NULL) {      fprintf (stderr," cannot allocate diagonal arrays: %ld\n",	      (long)n0*sizeof (struct dstruct));      exit (1);     };#endif   if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {     fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);     exit(1);   }   pwaa = waa;   for (i=0; i<=nsq; i++) {     for (j=0;j<n0; j++) {       *pwaa = ppst->pam2[ip][i][aa0[j]];       pwaa++;     }   }   f_str->waa0 = waa;   if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {     fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);     exit(1);   }   pwaa = waa;   for (i=0; i<=nsq; i++) {     for (j=0;j<n0; j++) {       *pwaa = ppst->pam2[0][i][aa0[j]];       pwaa++;     }   }   f_str->waa1 = waa;#ifndef TFAST   maxn0 = max(2*n0,MIN_RES);#else   /* maxn0 needs to be large enough to accomodate introns      for TFASTX.  For all other functions, it will be      more reasonable. */   maxn0 = max(4*n0,MIN_RES);#endif   if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {     fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);     exit(1);   }   f_str->res = res;   f_str->max_res = maxn0;   *f_arg = f_str;}/* pstring1 is a message to the manager, currently 512 *//* pstring2 is the same information, but in a markx==10 format */voidget_param (struct pstruct *ppst, char **pstring1, char *pstring2){#ifndef TFAST  char *pg_str="FASTX";#else  char *pg_str="TFASTX";#endif  if (!ppst->param_u.fa.optflag) {    sprintf (pstring1[0], "%s (%s)",pg_str,verstr);  }  else {    sprintf (pstring1[0], "%s (%s) [optimized]",pg_str,verstr);  }#ifdef OLD_FASTA_GAP  sprintf (pstring1[1], "%s matrix (%d:%d:%d)%s ktup: %d\n join: %d, gap-pen: %d/%d, shift: %d width: %3d",#else  sprintf (pstring1[1], "%s matrix (o=%d:%d:%d:%d)%s ktup: %d\n join: %d, open/ext: %d/%d, shift: %d width: %3d",#endif	   ppst->pamfile, ppst->pam_h,ppst->pam_l,ppst->pam_xx,ppst->pam_xm,	   (ppst->ext_sq_set) ? "xS":"\0",	   ppst->param_u.fa.ktup, ppst->param_u.fa.cgap,	   ppst->gdelval, ppst->ggapval, ppst->gshift,	   ppst->param_u.fa.optwid);  if (ppst->param_u.fa.iniflag) strcat(pstring1[0]," init1");  if (pstring2 != NULL) {#ifdef OLD_FASTA_GAP    sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",#else     sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\; pg_open_ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",#endif	      pg_str,verstr,ppst->pamfile, ppst->pam_h,ppst->pam_l, 	      (ppst->ext_sq_set) ? "xS":"\0", ppst->gdelval,              ppst->ggapval,ppst->param_u.fa.ktup,ppst->param_u.fa.optcut,	      ppst->param_u.fa.cgap);   }}voidclose_work (const unsigned char *aa0, int n0,	    struct pstruct *ppst,	    struct f_struct **f_arg){  struct f_struct *f_str;  f_str = *f_arg;  if (f_str != NULL) {    free(f_str->cur);#ifndef TFAST    f_str->aa0y--;    free(f_str->aa0y);    f_str->aa0x--;    free(f_str->aa0x);#else    f_str->aa1y--;    free(f_str->aa1y);    f_str->aa1x--;    free(f_str->aa1x);#endif    free(f_str->res);    free(f_str->waa1);    free(f_str->waa0);    free(f_str->diag);    free(f_str->link);    free(f_str->pamh2);     free(f_str->pamh1);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -