⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dropff2.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 3 页
字号:
/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia *//*  - dropffa.c,v 1.1.1.1 1999/10/22 20:55:59 wrp Exp *//* this code implements the "fastf" algorithm, which is designed to   deconvolve mixtures of protein sequences derived from mixed-peptide   Edman sequencing.  The expected input is:   >test | 40001 90043 | mgstm1   MGCEN,   MIDYP,   MLLAY,   MLLGY   Where the ','s indicate the length/end of the sequencing cycle   data.  Thus, in this example, the sequence is from a mixture of 4   peptides, M was found in the first position, G,I, and L(2) at the second,   C,D, L(2) at the third, etc.   Because the sequences are derived from mixtures, there need not be   any partial sequence "MGCEN", the actual deconvolved sequence might be   "MLDGN".*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdlib.h>#include <math.h>#include <ctype.h>#include "defs.h"#include "param.h"#include "structs.h"#include "tatstats.h"#define EOSEQ 0 #define ESS 49#define MAXHASH 32#define NMAP MAXHASH+1#define NMAP_X 23	/* re-code NMAP for 'X' */#define NMAP_Z 24	/* re-code NMAP for '*' */#ifndef MAXSAV#define MAXSAV 10#endif#define DROP_INTERN#include "drop_func.h"static char *verstr="4.21 May 2006 (ajm/wrp)";int shscore(unsigned char *aa0, const int n0, int **pam2, int nsq);#ifdef TFASTextern int aatran(const unsigned char *ntseq, unsigned char *aaseq, 		  const int maxs, const int frame);#endifstruct hlstr { int next, pos;};void savemax(struct dstruct *, struct f_struct *);static int m0_spam(unsigned char *, const unsigned char *, int, struct savestr *,	    int **, struct f_struct *);static int m1_spam(unsigned char *, int,		   const unsigned char *, int,		   struct savestr *, int **, int, struct f_struct *);int sconn(struct savestr **v, int nsave, int cgap,	  struct f_struct *, struct rstruct *, struct pstruct *,	  const unsigned char *aa0, int n0,	  const unsigned char *aa1, int n1,	  int opt_prob);void kpsort(struct savestr **, int);void kssort(struct savestr **, int);void kpsort(struct savestr **, int);intsconn_a(unsigned char *, int, int, struct f_struct *,	struct a_res_str *);/* initialize for fasta */voidinit_work (unsigned char *aa0, int n0, 	   struct pstruct *ppst,	   struct f_struct **f_arg){   int mhv, phv;   int hmax;   int i0, ii0, hv;   struct f_struct *f_str;   int maxn0;   int i, j, q;   struct savestr *vmptr;   int *res;   f_str = (struct f_struct *) calloc(1, sizeof(struct f_struct));   if(f_str == NULL) {     fprintf(stderr, "Couldn't calloc f_str\n");     exit(1);   }   ppst->sw_flag = 0;   /* fastf3 cannot work with lowercase symbols as low complexity;      thus, NMAP must be disabled; this depends on aascii['X']  */   if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}   if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}   /*   this does not work for share ppst structs, as in threads */   /*else {fprintf(stderr," cannot find 'X'==NMAP\n");} */   for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)      if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv) mhv = ppst->hsq[i0];   if (mhv <= 0) {      fprintf (stderr, " maximum hsq <=0 %d\n", mhv);      exit (1);   }   for (f_str->kshft = 0; mhv > 0; mhv /= 2)      f_str->kshft++;/*      kshft = 2;	*/   hmax = hv = (1 << f_str->kshft);   f_str->hmask = (hmax >> f_str->kshft) - 1;   if ((f_str->aa0 = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {     fprintf (stderr, " cannot allocate f_str->aa0 array; %d\n",n0+1);     exit (1);   }   for (i=0; i<n0; i++) f_str->aa0[i] = aa0[i];   aa0 = f_str->aa0;   if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {     fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);     exit (1);   }   f_str->aa0ix = 0;   if ((f_str->harr = (struct hlstr *) calloc (hmax, sizeof (struct hlstr))) == NULL) {     fprintf (stderr, " cannot allocate hash array; hmax: %d hmask: %d\n",	      hmax,f_str->hmask);     exit (1);   }   if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate pamh1 array\n");     exit (1);   }   if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {     fprintf (stderr, " cannot allocate pamh2 array\n");     exit (1);   }   if ((f_str->link = (struct hlstr *) calloc (n0, sizeof (struct hlstr))) == NULL) {     fprintf (stderr, " cannot allocate hash link array");     exit (1);   }   for (i0 = 0; i0 < hmax; i0++) {      f_str->harr[i0].next = -1;      f_str->harr[i0].pos = -1;   }   for (i0 = 0; i0 < n0; i0++) {      f_str->link[i0].next = -1;      f_str->link[i0].pos = -1;   }   /* encode the aa0 array */   /*     this code has been modified to allow for mixed peptide sequences      aa0[] = 5 8 9 3 4 NULL 5 12 3 7 2 NULL      the 'NULL' character resets the hash position counter, to indicate that      any of several residues can be in the same position.      We also need to keep track of the number of times this has happened, so that      we can redivide the sequence later      i0 counts through the sequence      ii0 counts through the hashed sequence      */   f_str->nm0 = 1;   f_str->nmoff = -1;   phv = hv = 0;   for (i0= ii0 = 0; i0 < n0; i0++, ii0++) {     /* reset the counter and start hashing again */     if (aa0[i0] == ESS || aa0[i0] == 0) {       aa0[i0] = 0;	/* set ESS to 0 */       /*       fprintf(stderr," converted ',' to 0\n");*/       i0++;	/* skip over the blank */       f_str->nm0++;       if (f_str->nmoff < 0) f_str->nmoff = i0;       phv = hv = 0;       ii0 = 0;     }     hv = ppst->hsq[aa0[i0]];     f_str->link[i0].next = f_str->harr[hv].next;     f_str->link[i0].pos = f_str->harr[hv].pos;     f_str->harr[hv].next = i0;     f_str->harr[hv].pos = ii0;     f_str->pamh2[hv] = ppst->pam2[0][aa0[i0]][aa0[i0]];   }   if (f_str-> nmoff < 0) f_str->nmoff = n0;#ifdef DEBUG   /*   fprintf(stderr," nmoff: %d/%d nm0: %d\n", f_str->nmoff, n0,f_str->nm0);   */#endif/*#ifdef DEBUG   fprintf(stderr," hmax: %d\n",hmax);   for ( hv=0; hv<hmax; hv++)       fprintf(stderr,"%2d %c %3d %3d\n",hv,	       (hv > 0 && hv < ppst->nsq ) ? ppst->sq[ppst->hsq[hv]] : ' ',	       f_str->harr[hv].pos,f_str->harr[hv].next);   fprintf(stderr,"----\n");   for ( hv=0; hv<n0; hv++)       fprintf(stderr,"%2d: %3d %3d\n",hv,	       f_str->link[hv].pos,f_str->link[hv].next);#endif*/   f_str->maxsav = MAXSAV;   if ((f_str->vmax = (struct savestr *)	calloc(MAXSAV,sizeof(struct savestr)))==NULL) {     fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav);     exit(1);   }   if ((f_str->vptr = (struct savestr **)	calloc(MAXSAV,sizeof(struct savestr *)))==NULL) {     fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav);     exit(1);   }   for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {     vmptr->used = (int *) calloc(n0, sizeof(int));     if(vmptr->used == NULL) {       fprintf(stderr, "Couldn't alloc vmptr->used\n");       exit(1);     }   }/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the   pam2[0][0] is now undefined for consistency with blast*/   for (i0 = 1; i0 <= ppst->nsq; i0++)     f_str->pamh1[i0] = ppst->pam2[0][i0][i0];   ppst->param_u.fa.cgap = shscore(aa0,f_str->nmoff-1,ppst->pam2[0],ppst->nsq)/3;   if (ppst->param_u.fa.cgap > ppst->param_u.fa.bestmax/4)     ppst->param_u.fa.cgap = ppst->param_u.fa.bestmax/4;   f_str->ndo = 0;   f_str->noff = n0-1;   if (f_str->diag==NULL)      f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,					      sizeof (struct dstruct));   if (f_str->diag == NULL)   {      fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",	      (long) MAXDIAG * (long) (sizeof (struct dstruct)));      exit (1);   }#ifdef TFAST   if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,					     sizeof(unsigned char)))       == NULL) {     fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);     exit (1);   }   f_str->aa1x++;#endif   /* allocate space for the scoring arrays */   maxn0 = n0 + 4;   maxn0 = max(3*n0/2,MIN_RES);   if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {     fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);     exit(1);   }   f_str->res = res;   f_str->max_res = maxn0;   /* Tatusov Statistics Setup */   /* initialize priors array. */   if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {     fprintf(stderr, "Couldn't allocate priors array.\n");     exit(1);   }   calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);   f_str->dotat = 0;   f_str->shuff_cnt = ppst->shuff_node;   /* End of Tatusov Statistics Setup */   *f_arg = f_str;}/* pstring1 is a message to the manager, currently 512 *//* pstring2 is the same information, but in a markx==10 format */voidget_param (struct pstruct *ppstr, char **pstring1, char *pstring2){#ifndef TFAST  char *pg_str="FASTF";#else  char *pg_str="TFASTF";#endif  sprintf (pstring1[0], "%s (%s)",pg_str,verstr);  sprintf (pstring1[1], "%s matrix (%d:%d) join: %d",	   ppstr->pamfile, ppstr->pam_h,ppstr->pam_l,ppstr->param_u.fa.cgap);  if (ppstr->param_u.fa.iniflag) strcat(pstring1[0]," init1");  if (pstring2 != NULL) {    sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\; pg_join: %d\n",	     pg_str,verstr, ppstr->pamfile, ppstr->pam_h,ppstr->pam_l,	     ppstr->param_u.fa.cgap);   }}voidclose_work (const unsigned char *aa0, const int n0,	    struct pstruct *ppst,	    struct f_struct **f_arg){  struct f_struct *f_str;  struct savestr *vmptr;  f_str = *f_arg;  if (f_str != NULL) {    for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)      free(vmptr->used);    free(f_str->res);#ifdef TFAST    free(f_str->aa1x - 1); /* allocated, then aa1x++'ed */#endif    free(f_str->diag);    free(f_str->link);    free(f_str->pamh2);     free(f_str->pamh1);    free(f_str->harr);    free(f_str->aa0t);    free(f_str->aa0);    free(f_str->priors);    free(f_str->vmax);    free(f_str->vptr);    free(f_str);    *f_arg = NULL;  }}int do_fastf (unsigned char *aa0, int n0,	      const unsigned char *aa1, int n1,	      struct pstruct *ppst, struct f_struct *f_str,	      struct rstruct *rst, int *hoff, int opt_prob){   int     nd;		/* diagonal array size */   int     lhval;   int     kfact;   register struct dstruct *dptr;   register int tscor;   register struct dstruct *diagp;   struct dstruct *dpmax;   register int lpos;   int     tpos, npos;   struct savestr *vmptr;   int     scor, tmp;   int     im, ib, nsave;   int     cmps ();		/* comparison routine for ksort */   int *hsq;   hsq = ppst->hsq;   if (n1 < 1) {     rst->score[0] = rst->score[1] = rst->score[2] = 0;     rst->escore = 1.0;     rst->segnum = 0;     rst->seglen = 0;     return 1;   }   if (n0+n1+1 >= MAXDIAG) {     fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);     rst->score[0] = rst->score[1] = rst->score[2] = -1;     rst->escore = 2.0;     rst->segnum = 0;     rst->seglen = 0;     return -1;   }   nd = n0 + n1;   dpmax = &f_str->diag[nd];   for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;) {      dptr->stop = -1;      dptr->dmax = NULL;      dptr++->score = 0;   }   /* initialize the saved segment structures */   for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {      vmptr->score = 0;      memset(vmptr->used, 0, n0 * sizeof(int));   }   f_str->lowmax = f_str->vmax;   f_str->lowscor = 0;   /* start hashing */   diagp = &f_str->diag[f_str->noff];   for (lhval = lpos = 0; lpos < n1; lpos++, diagp++) {     if (hsq[aa1[lpos]]>=NMAP) {       lpos++ ; diagp++;       while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}       if (lpos >= n1) break;       lhval = 0;     }     lhval = hsq[aa1[lpos]];     for (tpos = f_str->harr[lhval].pos, npos = f_str->harr[lhval].next;	  tpos >= 0; tpos = f_str->link[npos].pos, npos = f_str->link[npos].next) {       /* tscor gets position of end of current lpos diag run */       if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {	 tscor++;		 /* move forward one */	 if ((tscor -= lpos) <= 0) { /* check for size of gap to this hit - */	                             /* includes implicit -1 mismatch penalty */	   scor = dptr->score;	     /* current score of this run */	   if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 &&	       f_str->lowscor < scor)	/* if updating tscor makes run worse, */	     savemax (dptr, f_str);     /* save it */	   if ((tscor += scor) >= kfact) {  /* add to current run if continuing */	     				    /* is better than restart (kfact) */	       dptr->score = tscor;	       dptr->stop = lpos;	     }	     else {	       dptr->score = kfact;	/* starting over is better */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -