⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dropfs2.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 3 页
字号:
/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia *//*  $Name: fa35_03_06 $ - $Id: dropfs2.c,v 1.45 2008/02/19 08:50:13 wrp Exp $ *//* changed to return 2.0, rather than -1.0, for failure *//* Feb 4, 2005 - modifications to allow searches with ktup=2 for very   long queries.  This is a temporary solution to savemax(), spam()   which do not preserve exact matches   do_fasts() has been modified to allow higher maxsav for do_walign   than for do_work (2*nsegs, 6*nsegs) *//* this code implements the "fasts" algorithm, which compares a set of   protein fragments to a protein sequence.  Comma's are used to separate   the sequence fragments, which need not be the same length.   The expected input is:   >mgstm1   MGDAPDFD,   MILGYW,   MLLEYTDS   The fragments do not need to be in the correct order (which is   presumably unknown from the peptide sequencing.*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <math.h>#include "defs.h"#include "param.h"#include "tatstats.h"#define EOSEQ 0#define ESS 49#define NMAP_X 23 /* for 'X' */#define NMAP_Z 24 /* for '*' */#define MAXHASH 32#define NMAP MAXHASH+1static char *verstr="4.32 Feb 2007";#define DROP_INTERN#include "drop_func.h"int shscore(const unsigned char *aa0, const int n0, int **pam2, int nsq);static void update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum);extern void aancpy(char *to, char *from, int count, struct pstruct *ppst);#ifdef TFASTextern int aatran(const unsigned char *ntseq, unsigned char *aaseq, const int maxs, const int frame);#endifvoid savemax(struct dstruct *, struct f_struct *, int maxsav, int exact,int t_end);int spam(const unsigned char *, const unsigned char *, int, struct savestr *, int **, struct f_struct *);int sconn(struct savestr **v,	  int nsave,	  struct f_struct *,	  struct rstruct *,	  struct pstruct *,	  const unsigned char *aa0, int n0,	  const unsigned char *aa1, int n1,	  int opt_prob);void kpsort(struct savestr **, int);void kssort(struct savestr **, int);	/* sort by score */int sconn_a(unsigned char *, int, 	    const unsigned char *, int,	    struct f_struct *, 	    struct a_res_str *,	    struct pstruct *);void kpsort(struct savestr **, int);/* initialize for fasta */voidinit_work (unsigned char *aa0, const int n0, 	   struct pstruct *ppst,	   struct f_struct **f_arg	   ){   int mhv, phv;   int hmax, nsegs;   int i0, ib, hv, old_hv;   int pamfact;   struct f_struct *f_str;   /* these used to be globals, but do not need to be */   int ktup, fact, kt1;   int maxn0;   int stmp;	/* temporary score */   int i, j, q;   int tat_size;   int *res;   unsigned char *query;   int k, l, m, n, N, length, index;   double *tatprobptr;   f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));   ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;   ktup = ppst->param_u.fa.ktup;   if ( ktup  > 2 ) {     ktup = ppst->param_u.fa.ktup = 2;   }   fact = ppst->param_u.fa.scfact;   /* fasts3 cannot work with lowercase symbols as low complexity;      thus, NMAP must be disabled; this depends on aascii['X']  */   if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}   if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}   /* this does not work in a threaded environment */   /*    else {fprintf(stderr," cannot find 'X'==NMAP\n");} */   for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)      if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv)  mhv = ppst->hsq[i0];   if (mhv <= 0) {      fprintf (stderr, " maximum hsq <=0 %d\n", mhv);      exit (1);   }   for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;/*      kshft = 2;	*/   kt1 = ktup-1;   hv = 1;   for (i0 = 0; i0 < ktup; i0++) hv = hv << f_str->kshft;   hmax = hv;   f_str->hmask = (hmax >> f_str->kshft) - 1;   if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {     fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);     exit (1);   }   if ((f_str->aa0ti = (int *) calloc(n0+1, sizeof(int))) == NULL) {     fprintf (stderr, " cannot allocate f_str0->aa0ti array; %d\n",n0+1);     exit (1);   }   if ((f_str->aa0b = (int *) calloc(n0+1, sizeof(int))) == NULL) {     fprintf (stderr, " cannot allocate f_str0->aa0b array; %d\n",n0+1);     exit (1);   }   if ((f_str->aa0e = (int *) calloc(n0+1, sizeof(int))) == NULL) {     fprintf (stderr, " cannot allocate f_str0->aa0e array; %d\n",n0+1);     exit (1);   }   if ((f_str->aa0i = (int *) calloc(n0+1, sizeof(int))) == NULL) {     fprintf (stderr, " cannot allocate f_str0->aa0i array; %d\n",n0+1);     exit (1);   }   if ((f_str->aa0s = (int *) calloc(n0+1, sizeof(int))) == NULL) {     fprintf (stderr, " cannot allocate f_str0->aa0s array; %d\n",n0+1);     exit (1);   }   if ((f_str->aa0l = (int *) calloc(n0+1, sizeof(int))) == NULL) {     fprintf (stderr, " cannot allocate f_str0->aa0l array; %d\n",n0+1);     exit (1);   }   if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate hash array: hmax: %d hmask: %d\n",	      hmax, f_str->hmask);     exit (1);   }   if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate pamh1 array\n");     exit (1);   }   if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate pamh2 array\n");     exit (1);   }   if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate hash link array");     exit (1);   }   /* for FASTS/FASTM, we want to know when we get to the end of a peptide,      so we can ensure that we set the end and restart */   if ((f_str->l_end = (int *) calloc (n0, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate link end array");     exit (1);   }   for (i0 = 0; i0 < hmax; i0++) f_str->harr[i0] = -1;   for (i0 = 0; i0 < n0; i0++) f_str->link[i0] = -1;   for (i0 = 0; i0 < n0; i0++) f_str->l_end[i0] = 0;   /* count the number of peptides */   nsegs = 1;   for (i0 = 0; i0 < n0; i0++) {     if (aa0[i0] == ESS || aa0[i0] == 0) nsegs++;   }   /* allocate space for peptides offsets, nm_u */   if ((f_str->nmoff = (int *)calloc(nsegs+1, sizeof(int)))==NULL) {     fprintf(stderr, " cannot allocat nmoff array: %d\n", nsegs);     exit(1);   }   if ((f_str->nm_u = (int *)calloc(nsegs+1, sizeof(int)))==NULL) {     fprintf(stderr, " cannot allocat nm_u array: %d\n", nsegs);     exit(1);   }   phv = hv = 0;   f_str->nmoff[0] = 0;   f_str->nm0 = 1;   /* encode the aa0 array */   if (kt1 > 0) {     hv = ppst->hsq[aa0[0]];     phv = ppst->pam2[0][aa0[0]][aa0[0]];   }   for (i0=kt1 ; i0 < n0; i0++) {     if (aa0[i0] == ESS || aa0[i0] == 0) {       /*       fprintf(stderr," converted %d to 0\n",aa0[i0]); */       aa0[i0] = EOSEQ;	/* set ESS to 0 */       f_str->nmoff[f_str->nm0++] = i0+1;        f_str->l_end[i0-1] = 1;       phv = hv = 0;       if (kt1 > 0) {	 i0++;	 hv = ppst->hsq[aa0[i0]];	 phv = ppst->pam2[0][aa0[i0]][aa0[i0]];       }       continue;     }     hv = ((hv & f_str->hmask) << f_str->kshft) + ppst->hsq[aa0[i0]];     f_str->link[i0] = f_str->harr[hv];     f_str->harr[hv] = i0;     f_str->pamh2[hv] = (phv += ppst->pam2[0][aa0[i0]][aa0[i0]]);     phv -= ppst->pam2[0][aa0[i0 - kt1]][aa0[i0 - kt1]];   }   f_str->l_end[n0-1] = 1;   f_str->nmoff[f_str->nm0] = n0+1;   /*#ifdef DEBUG   fprintf(stderr, ">>%s\n",qtitle);   for (j=0; j<f_str->nm0; j++) {     for (i=f_str->nmoff[j]; i < f_str->nmoff[j+1]-1; i++) {       fprintf(stderr,"%c",ppst->sq[aa0[i]]);     }     fprintf(stderr," %d\n",aa0[i]);   }   for (j=1; j<=ppst->nsq; j++) {     fprintf(stderr, "%c %d\n", ppst->sq[j], f_str->harr[j]);   }   for (j=0; j<=n0; j++) {     fprintf(stderr, "%c %d\n", ppst->sq[aa0[j]], f_str->link[j]);   }#endif   */   /* build an integer array of the max score that can be achieved      from that position - use in savemax to mark some segments as      fixed */   /* setup aa0b[], aa0e[], which specify the begining and end of each      segment */   stmp = 0;   q = -1;   for (ib = i0 = 0; i0 < n0; i0++) {     f_str->aa0l[i0] = i0 - q;     if (aa0[i0]==EOSEQ) {       f_str->aa0b[i0] = -1;       f_str->aa0e[i0] = -1;       f_str->aa0i[i0] = -1;       f_str->aa0l[i0] = -1;       q = i0;       if (i0 > 0)f_str->aa0s[i0-1] = stmp;       stmp = 0;       ib++;     }     else {       stmp += ppst->pam2[0][aa0[i0]][aa0[i0]];     }     f_str->aa0b[i0] =  f_str->nmoff[ib];     f_str->aa0e[i0] =  f_str->nmoff[ib+1]-2;     f_str->aa0i[i0] =  ib;     /*     fprintf(stderr,"%2d %c: %2d %2d %2d\n",i0,ppst->sq[aa0[i0]],	     f_str->aa0b[i0],f_str->aa0e[i0],f_str->aa0i[i0]);     */   }   f_str->aa0s[n0-1]=stmp;	/* save last best possible score */   /* maxsav - maximum number of peptide alignments saved in search */   /* maxsav_w - maximum number of peptide alignments saved in      alignment */   f_str->maxsav = max(MAXSAV,2*f_str->nm0);   f_str->maxsav_w = max(MAXSAV,6*f_str->nm0);   if ((f_str->vmax = (struct savestr *)	calloc(f_str->maxsav_w,sizeof(struct savestr)))==NULL) {     fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav_w);     exit(1);   }   if ((f_str->vptr = (struct savestr **)	calloc(f_str->maxsav_w,sizeof(struct savestr *)))==NULL) {     fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav_w);     exit(1);   }   if ((f_str->sarr = (struct slink *)	calloc(f_str->maxsav_w,sizeof(struct slink)))==NULL) {     fprintf(stderr, "Couldn't allocate sarr[%d].\n",f_str->maxsav_w);     exit(1);   }   /* Tatusov Statistics Setup */   /* initialize priors array. */   if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {     fprintf(stderr, "Couldn't allocate priors array.\n");     exit(1);   }   calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);   /* pre-calculate the Tatusov probability array for each full segment */   if(ppst->zsflag >= 1 && ppst->zsflag <= 3 && f_str->nm0 <= 10) {     tat_size = (1<<f_str->nm0) -1;     f_str->dotat = 1;     f_str->tatprobs = (struct tat_str **) malloc((size_t)tat_size*sizeof(struct tat_str *));     if (f_str->tatprobs == NULL) {       fprintf (stderr, " cannot allocate tatprobs array: %ld\n",		tat_size * sizeof(struct tat_str *));       exit (1);     }     f_str->intprobs = (double **) malloc((size_t)tat_size * sizeof(double *));     if(f_str->intprobs == NULL) {       fprintf(stderr, "Couldn't allocate intprobs array.\n");       exit(1);     }     for(k = 0, l = f_str->nm0 ; k < l ; k++) {       query = &(aa0[f_str->nmoff[k]]);       length = f_str->nmoff[k+1] - f_str->nmoff[k] - 1;       /* this segment alone */       index = (1 << k) - 1;       generate_tatprobs(query, 0, length - 1, f_str->priors, ppst->pam2[0], ppst->nsq, &(f_str->tatprobs[index]), NULL);       /* integrate the probabilities */       N = f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore;       tatprobptr = (double *) calloc(N+1, sizeof(double));       if(tatprobptr == NULL) {	 fprintf(stderr, "Couldn't calloc tatprobptr.\n");	 exit(1);       }       f_str->intprobs[index] = tatprobptr;       for (i = 0; i <= N ; i++ ) {	 tatprobptr[i] = f_str->tatprobs[index]->probs[i];	 for (j = i + 1 ; j <= N ; j++ ) {	   tatprobptr[i] += f_str->tatprobs[index]->probs[j];	 }       }       /* this segment built on top of all other subcombinations */       for(i = 0, j = (1 << k) - 1 ; i < j ; i++) {	 index = (1 << k) + i;	 generate_tatprobs(query, 0, length - 1, f_str->priors, ppst->pam2[0], ppst->nsq, &(f_str->tatprobs[index]), f_str->tatprobs[i]);	 /* integrate the probabilities */	 N = f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore;	 tatprobptr = (double *) calloc(N+1, sizeof(double));	 if(tatprobptr == NULL) {	   fprintf(stderr, "Couldn't calloc tatprobptr.\n");	   exit(1);	 }	 f_str->intprobs[index] = tatprobptr;       	 for (m = 0; m <= N ; m++ ) {	   tatprobptr[m] = f_str->tatprobs[index]->probs[m];	   for (n = m + 1 ; n <= N ; n++ ) {	     tatprobptr[m] += f_str->tatprobs[index]->probs[n];	   }	 }       }     }   } else {     f_str->dotat = 0;     f_str->shuff_cnt = ppst->shuff_node;   }   /* End of Tatusov Statistics Setup */   /*   for (i0=1; i0<=ppst->nsq; i0++) {     fprintf(stderr," %c: %2d ",ppst->sq[i0],f_str->harr[i0]);     hv = f_str->harr[i0];     while (hv >= 0) {       fprintf(stderr," %2d",f_str->link[hv]);       hv = f_str->link[hv];     }     fprintf(stderr,"\n");   }   *//* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the   pam2[0][0] is now undefined for consistency with blast*/   for (i0 = 1; i0 <= ppst->nsq; i0++)     f_str->pamh1[i0] = ppst->pam2[0][i0][i0];   f_str->ndo = 0;   f_str->noff = n0-1;   if (f_str->diag==NULL)      f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,					      sizeof (struct dstruct));   if (f_str->diag == NULL) {      fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",	      (long) MAXDIAG * (long) (sizeof (struct dstruct)));      exit (1);   }#ifdef TFAST   if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,					     sizeof(unsigned char)))       == NULL) {     fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);     exit (1);   }   f_str->aa1x++;#endif   maxn0 = max(3*n0/2,MIN_RES);   if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {     fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);     exit(1);   }   f_str->res = res;   f_str->max_res = maxn0;   *f_arg = f_str;}/* pstring1 is a message to the manager, currently 512 *//* pstring2 is the same information, but in a markx==10 format */voidget_param (struct pstruct *ppstr, char **pstring1, char *pstring2){#ifdef FASTS#ifndef TFAST  char *pg_str="FASTS";#else  char *pg_str="TFASTS";#endif#endif#ifdef FASTM#ifndef TFAST  char *pg_str="FASTM";#else  char *pg_str="TFASTM";#endif#endif  sprintf (pstring1[0], "%s (%s)",pg_str,verstr);  sprintf (pstring1[1], "%s matrix (%d:%d) ktup=%d",	   ppstr->pamfile, ppstr->pam_h,ppstr->pam_l, ppstr->param_u.fa.ktup);  if (ppstr->param_u.fa.iniflag) strcat(pstring1[0]," init1");  if (pstring2 != NULL) {    sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\; pg_gap-pen: %d %d\n; pg_ktup: %d\n",	     pg_str,verstr,ppstr->pamfile, ppstr->pam_h,ppstr->pam_l, ppstr->gdelval,	     ppstr->ggapval,ppstr->param_u.fa.ktup);   }}voidclose_work (const unsigned char *aa0, const int n0,	    struct pstruct *ppst,	    struct f_struct **f_arg){  struct f_struct *f_str;  int i, j;  f_str = *f_arg;  if (f_str != NULL) {    free(f_str->res);#ifdef TFAST    free(f_str->aa1x - 1); /* because f_str->aa1x got ++'ed when allocated! */#endif    free(f_str->diag);    free(f_str->l_end);    free(f_str->link);    free(f_str->pamh2);     free(f_str->pamh1);    free(f_str->harr);    free(f_str->vmax);    free(f_str->vptr);    free(f_str->sarr);    free(f_str->aa0i);    free(f_str->aa0e);    free(f_str->aa0b);    free(f_str->aa0ti);    free(f_str->aa0t);    free(f_str->nmoff);    free(f_str->nm_u);    if(f_str->dotat) {      for(i = 0, j = (1 << f_str->nm0) - 1 ; i < j ; i++) {	free(f_str->tatprobs[i]->probs);	free(f_str->tatprobs[i]);	free(f_str->intprobs[i]);      }      free(f_str->tatprobs);      free(f_str->intprobs);    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -