📄 dropfs2.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 3 页
字号:
    free(f_str->priors);    free(f_str);    *f_arg = NULL;  }}void do_fasts (const unsigned char *aa0, const int n0,	       const unsigned char *aa1, const int n1,	       struct pstruct *ppst, struct f_struct *f_str,	       struct rstruct *rst, int *hoff, int opt_prob,	       int maxsav){   int     nd;		/* diagonal array size */   int     lhval;   int     kfact;   register struct dstruct *dptr;   register int tscor;   register struct dstruct *diagp;   struct dstruct *dpmax;   register int lpos;   int     tpos;   struct savestr *vmptr, *vmaxmax;   int     scor, tmp;   int     im, ib, nsave, i;   int     cmps ();		/* comparison routine for ksort */   int ktup;   int doffset;   vmaxmax = &f_str->vmax[maxsav];   ktup = ppst->param_u.fa.ktup;   if (n1 < ktup) {     rst->score[0] = rst->score[1] = rst->score[2] = 0;     rst->escore = 1.0;     rst->segnum = 0;     rst->seglen = 0;     return;   }   if (n0+n1+1 >= MAXDIAG) {     fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);     rst->score[0] = rst->score[1] = rst->score[2] = -1;     rst->escore = 2.0;     rst->segnum = 0;     rst->seglen = 0;     return;   }   nd = n0 + n1;   dpmax = &f_str->diag[nd];   for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)   {      dptr->stop = -1;      dptr->dmax = NULL;      dptr++->score = 0;   }   for (vmptr = f_str->vmax; vmptr < vmaxmax; vmptr++) {      vmptr->score = 0;      vmptr->exact = 0;   }   f_str->lowmax = f_str->vmax;   f_str->lowscor = 0;   /* start hashing */   diagp = &f_str->diag[f_str->noff];   for (lhval=lpos=0; lpos < n1; lpos++, diagp++) {     if (ppst->hsq[aa1[lpos]]>=NMAP) {	/* skip residue */       lpos++ ; diagp++;       while (lpos < n1 && ppst->hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}       if (lpos >= n1) break;       lhval = 0;     }     lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];     for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {       dptr = &diagp[-tpos];       if (f_str->l_end[tpos]) {	 if (dptr->score + f_str->pamh1[aa0[tpos]] == f_str->aa0s[tpos]) {	   dptr->stop = lpos;	   dptr->score = f_str->aa0s[tpos];	   savemax(dptr, f_str, maxsav, 1, tpos);	   dptr->dmax = NULL;	 }	 else if (dptr->score + f_str->pamh1[aa0[tpos]] > f_str->aa0s[tpos]) {	   /*	   fprintf(stderr,"exact match score too high: %d:%d %d < %d + %d - %d:%d - %d > %d\n",		   tpos, lpos, f_str->aa0s[tpos],dptr->score, f_str->pamh1[aa0[tpos]],		   dptr->start, dptr->stop,		   dptr->stop - dptr->start, f_str->aa0l[tpos]);	   */	   dptr->stop = lpos;	   dptr->start = lpos - f_str->aa0l[tpos];	   dptr->score = f_str->aa0s[tpos];	   savemax(dptr, f_str, maxsav, 1, tpos);	   dptr->dmax = NULL;	 }       }       else if ((tscor = dptr->stop) >= 0) {	 tscor++;	/* tscor is stop of current, increment it */	 if ((tscor -= lpos) <= 0) {  /* tscor, the end of the current					 match, is before lpos, so there					 is a mismatch - this is also the					 mismatch cost */	   tscor *= 2;	   scor = dptr->score;	/* save the run score on the diag */	   if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 	       && f_str->lowscor < scor) {	     /* if what we will get (tscor + kfact) is < 0 and the		score is better than the worst savemax() score, save		it */	     savemax (dptr, f_str, maxsav,0,-1);	   }	   /* if extending is better than starting over, extend */	   if ((tscor += scor) >= kfact) {	     dptr->score = tscor;	     dptr->stop = lpos;	     if (f_str->l_end[tpos]) {	       if (dptr->score == f_str->aa0s[tpos]) {		 savemax(dptr, f_str, maxsav,1,tpos);		 dptr->dmax = NULL;	       }	       else if (dptr->score > f_str->lowscor)		 savemax(dptr, f_str, maxsav,0,tpos);	     }	   }	   else {     /* otherwise, start new */	     dptr->score = kfact;	     dptr->start = dptr->stop = lpos;	   }	 } 	 else { /* tscor is after lpos, so extend one residue */	   dptr->score += f_str->pamh1[aa0[tpos]];	   dptr->stop = lpos;	   if (f_str->l_end[tpos]) {	     if (dptr->score == f_str->aa0s[tpos]) {	       savemax(dptr, f_str, maxsav,1,tpos);	       dptr->dmax = NULL;	     }	     else if (dptr->score > f_str->lowscor)	       savemax(dptr, f_str, maxsav,0,tpos);	   }	 }       }       else {	/* start new */	 dptr->score = f_str->pamh2[lhval];	 dptr->start = dptr->stop = lpos;       }     }				/* end tpos */   }				/* end lpos */   for (dptr = f_str->diag; dptr < dpmax;) {     if (dptr->score > f_str->lowscor) savemax (dptr, f_str, maxsav,0,-1);     dptr->stop = -1;     dptr->dmax = NULL;     dptr++->score = 0;   }   f_str->ndo = nd;/*        at this point all of the elements of aa1[lpos]        have been searched for elements of aa0[tpos]        with the results in diag[dpos]*/   for (nsave=0, vmptr=f_str->vmax; vmptr< vmaxmax; vmptr++) {      if (vmptr->score > 0) {	/*	fprintf(stderr,"%c 0: %4d-%4d  1: %4d-%4d  dp: %d score: %d",		(vmptr->exact ? 'x' : ' '),		f_str->noff+vmptr->start-vmptr->dp,		f_str->noff+vmptr->stop-vmptr->dp,		vmptr->start,vmptr->stop,		vmptr->dp,vmptr->score);	*/	vmptr->score = spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str);	/*	fprintf(stderr,"  sscore: %d %d-%d\n",vmptr->score,vmptr->start,vmptr->stop);	*/	if (vmptr->score > 0) f_str->vptr[nsave++] = vmptr;      }   }   if (nsave <= 0) {     rst->score[0] = rst->score[1] = rst->score[2] = 0;     rst->escore = 1.0;     rst->segnum = 0;     rst->seglen = 0;     f_str->nsave = 0;     return;   }   /*   fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff);   for (ib=0; ib<nsave; ib++) {     fprintf(stderr,"%c 0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",	     f_str->vptr[ib]->exact ? 'x' : ' ',	     f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,	     f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,	     f_str->vptr[ib]->start,f_str->vptr[ib]->stop,	     f_str->vptr[ib]->dp,f_str->vptr[ib]->score);   }   fprintf(stderr,"---\n");   */   kssort(f_str->vptr,nsave);   /* make certain each seg is used only once */   for (ib=0; ib<f_str->nm0; ib++) f_str->nm_u[ib]=0;   for (ib=0; ib < nsave; ib++) {     doffset = f_str->vptr[ib]->dp - f_str->noff;     tpos=f_str->aa0i[f_str->vptr[ib]->start - doffset];     if (f_str->nm_u[tpos] == 0) {       f_str->nm_u[tpos]=1;     } else {       f_str->vptr[ib]->score = -1;     }   }   kssort(f_str->vptr,nsave);   for (ib = nsave-1; ib >= 0; ib--) {     if (f_str->vptr[ib]->score > -1) break;   }   nsave = ib+1;#ifdef DEBUG   /*   for (ib = 0; ib < nsave; ib++) {     if (f_str->vptr[ib]->score > 1000) {       fprintf(stderr," score[%d] too high: %d\n",ib, f_str->vptr[ib]->score);       for (i=0; i< 10; i++) {	 fprintf(stderr, "%c:%d ",ppst->sq[aa1[i]],aa1[i]);       }       fprintf(stderr,"\n");       f_str->vptr[ib]->score = 0;     }   }   */#endif   scor = sconn (f_str->vptr, nsave, 		 f_str, rst, ppst, aa0, n0, aa1, n1,		 opt_prob);   if (rst->escore < 0.0) rst->escore = 2.0;   kssort(f_str->vptr,nsave);   /* here we should use an nsave that is consistent with sconn and nm0 */   f_str->nsave = nsave;   if (nsave > f_str->nm0) f_str->nsave = f_str->nm0;   rst->score[1] = f_str->vptr[0]->score;   rst->score[0] = rst->score[2] = max(scor, f_str->vptr[0]->score);}void do_work (const unsigned char *aa0, const int n0,	      const unsigned char *aa1, const int n1,	      int frame,	      struct pstruct *ppst, struct f_struct *f_str,	      int qr_flg, struct rstruct *rst){  int opt_prob;  int hoff, n10, i;  if (qr_flg==1 && f_str->shuff_cnt <= 0) {    rst->escore = 2.0;    rst->score[0]=rst->score[1]=rst->score[2]= -1;    return;  }  if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1;  else opt_prob = 0;  if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0;  if (qr_flg) {    opt_prob=1;    /*    if (frame==1) */      f_str->shuff_cnt--;  }  if (n1 < ppst->param_u.fa.ktup) {    rst->score[0] = rst->score[1] = rst->score[2] = -1;    rst->escore = 2.0;    return;  }#ifdef TFAST  n10=aatran(aa1,f_str->aa1x,n1,frame);  if (ppst->debug_lib)    for (i=0; i<n10; i++)      if (f_str->aa1x[i]>ppst->nsq) {	fprintf(stderr,		"residue[%d/%d] %d range (%d)\n",i,n1,		f_str->aa1x[i],ppst->nsq);	f_str->aa1x[i]=0;	n10=i-1;      }  do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);#else	/* FASTA */  do_fasts (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);#endif  rst->comp = rst->H = -1.0;}void do_opt (const unsigned char *aa0, const int n0,	     const unsigned char *aa1, const int n1,	     int frame,	     struct pstruct *ppst, struct f_struct *f_str,	     struct rstruct *rst){  int lag, tscore, hoff, n10;#ifdef TFAST  n10=aatran(aa1,f_str->aa1x,n1,frame);  do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1, f_str->maxsav);#else	/* FASTA */  do_fasts(aa0,n0,aa1,n1,ppst,f_str,rst, &hoff, 1, f_str->maxsav);#endif}/* modify savemax() so that full length 100% matches are marked   so that they cannot be removed - if we have a 100% match, mark "exact"   modify savemax() to split alignments that include a comma*//* savemax(dptr, f_str, maxsav) takes a current diagonal run (saved in dptr),   and places it in the set of runs to be saved (in  f_str->vmax[])*/void savemax (struct dstruct *dptr, struct f_struct *f_str, int maxsav,	 int exact, int tpos){  register int dpos;	/* position along the diagonal, -n0 .. n1 */  int i, j, lowj;  register struct savestr *vmptr;  struct savestr *vmaxmax;  vmaxmax = &f_str->vmax[maxsav];  dpos = (int) (dptr - f_str->diag);	/* current diagonal *//* check to see if this is the continuation of a run that is already saved *//* if we are at the end of the query, save it regardless *//*  if (t_end > 0 && t_end < dptr->stop - dptr->start) {return;} */  if ((vmptr = dptr->dmax) != NULL	/* have an active run */      && vmptr->dp == dpos &&		/* on the correct diagonal */      vmptr->start == dptr->start) {	/* and it starts at the same place */    vmptr->stop = dptr->stop;	/* update the end of the match in vmax[] */    if (exact == 1) {    /*      fprintf(stderr,"have cont exact match: %d - %d:%d %d:%d = %d\n",	      dptr->score, dptr->start, dptr->stop,	      vmptr->start, vmptr->stop, dptr->stop - dptr->start+1);    */      exact = 1;    }/* if the score is worse, don't update, return - if the score gets bad   enough, it will restart in the diagonal scan */    if ((i = dptr->score) <= vmptr->score) { return;} /* score is better, update */    vmptr->score = i;    vmptr->exact = exact;/* if the score is not the worst, return */    if (vmptr != f_str->lowmax) { return;}  }  else {	/* not a continuation */    /* save in the lowest place */    /*    fprintf(stderr," Replacing: %d - %d:%d => %d - %d:%d",	    f_str->lowmax->score, f_str->lowmax->start, f_str->lowmax->stop,	    dptr->score, dptr->start, dptr->stop);    */    vmptr = f_str->lowmax;    /*    if (exact == 1) {      fprintf(stderr,"have new exact match: %d - %d:%d = %d\n",	      dptr->score, dptr->start, dptr->stop, dptr->stop - dptr->start+1);    }    */    vmptr->exact = exact;    i = vmptr->score = dptr->score;   /* 'i' is used as a bound */    vmptr->dp = dpos;    vmptr->start = dptr->start;    vmptr->stop = dptr->stop;    dptr->dmax = vmptr;  }  /* rescan the list for the worst score */  for (vmptr = f_str->vmax;  vmptr < &f_str->vmax[maxsav] ; vmptr++) {    if (vmptr->score < i && !vmptr->exact) {      i = vmptr->score;      f_str->lowmax = vmptr;    }  }  f_str->lowscor = i;}/* this version of spam scans the diagonal to find the best local score,   then resets the boundaries for a global alignment and re-scans *//* NOOVERHANG allows one to score any overhanging alignment as zero.   Useful for SAGE alignments.  Normally, one allows overhangs because   of the possibility of partial sequences.*/#undef NOOVERHANG/*    May, 2005 - spam() has an intesting bug that occurs when two   peptides match in order, separated by one position (the comma).  In   this case, spam() splits the match, and only returns the better of   the two matches.  So, if spam splits an alignment at a comma, it   needs the ability to insert the missing match.*/int spam (const unsigned char *aa0, const unsigned char *aa1,int n1,	  struct savestr *dmax, int **pam2,	  struct f_struct *f_str){   int     lpos, doffset;   int     tot, mtot;   struct {     int  start, stop, score;   } curv, maxv;   register const unsigned char *aa0p, *aa1p;   curv.start = dmax->start;   aa1p = &aa1[dmax->start];   doffset = dmax->dp - f_str->noff;   aa0p = &aa0[dmax->start - doffset];   tot = curv.score = maxv.score = 0;   for (lpos = dmax->start; lpos <= dmax->stop; lpos++) {     tot += pam2[*aa0p++][*aa1p++];     if (tot > curv.score) {       curv.stop = lpos;	/* here, curv.stop is actually curv.max */       curv.score = tot;      }      else if (tot < 0) {	if (curv.score > maxv.score) {	  maxv.start = curv.start;	  maxv.stop = curv.stop;	  maxv.score = curv.score;	}	tot = curv.score = 0;	curv.start = lpos+1;      }   }   if (curv.score > maxv.score) {     maxv.start = curv.start;     maxv.stop = curv.stop;     maxv.score = curv.score;   }   if (maxv.score <= 0) return 0;   /* now, reset the boundaries of the alignment using aa0b[]      and aa0e[], which specify the residues that start and end      the segment */         maxv.start = f_str->aa0b[maxv.stop-doffset] + doffset;   if (maxv.start < 0) {     maxv.start = 0;#ifdef NOOVERHANG     return 0;#endif   }   maxv.stop = f_str->aa0e[maxv.stop-doffset] + doffset;   if (maxv.stop > n1) {     maxv.stop = n1-1;#ifdef NOOVERHANG     return 0;#endif   }   aa1p = &aa1[lpos = maxv.start];   aa0p = &aa0[lpos - doffset];   for (tot=0; lpos <= maxv.stop; lpos++) {     tot += pam2[*aa0p++][*aa1p++];   }   maxv.score = tot;/*	if (maxv.start != dmax->start || maxv.stop != dmax->stop)		printf(" new region: %3d %3d %3d %3d\n",maxv.start,			dmax->start,maxv.stop,dmax->stop);*/   dmax->start = maxv.start;   dmax->stop = maxv.stop;   return maxv.score;}int sconn (struct savestr **v, int n, 	   struct f_struct *f_str,	   struct rstruct *rst, struct pstruct *ppst,	   const unsigned char *aa0, int n0,	   const unsigned char *aa1, int n1, int opt_prob){   int     i, si, cmpp ();   struct slink *start, *sl, *sj, *so, *sarr;   int     lstart, ltmp, tstart, plstop, ptstop, ptstart, tstop;   double  tatprob;   int     dotat;   sarr = f_str->sarr;   /*  sort the score left to right in lib pos */   kpsort (v, n);   start = NULL;   rst->score[0] = 0;   rst->escore = 2.0;/*  for the remaining runs, see if they fit *//*  lstart/lstop -> start/stop in library sequence    tstart/tstop -> start/stop in query sequence    plstart/plstop ->*/
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -