core_algorithms.c

来自「这是一个基于HMM 模型的生物多序列比对算法的linux实现版本。hmmer」· C语言 代码 · 共 1,965 行 · 第 1/5 页

C
1,965
字号
    for (k = 1; k <= hmm->M; k++) {				/* match state */      mmx[cur][k] = -INFTY;      if ((sc = mmx[prv][k-1] + hmm->tsc[TMM][k-1]) > -INFTY)	{ mmx[cur][k] = sc; mtr[cur][k] = mtr[prv][k-1]; }      if ((sc = imx[prv][k-1] + hmm->tsc[TIM][k-1]) > mmx[cur][k])	{ mmx[cur][k] = sc; mtr[cur][k] = itr[prv][k-1]; }      if ((sc = xmx[prv][XMB] + hmm->bsc[k]) > mmx[cur][k])	{ mmx[cur][k] = sc; mtr[cur][k] = i-1; }      if ((sc = dmx[prv][k-1] + hmm->tsc[TDM][k-1]) > mmx[cur][k])	{ mmx[cur][k] = sc; mtr[cur][k] = dtr[prv][k-1]; }      if (hmm->msc[dsq[i]][k] != -INFTY)	mmx[cur][k] += hmm->msc[dsq[i]][k];      else	mmx[cur][k] = -INFTY;				/* delete state */      dmx[cur][k] = -INFTY;      if ((sc = mmx[cur][k-1] + hmm->tsc[TMD][k-1]) > -INFTY)	{ dmx[cur][k] = sc; dtr[cur][k] = mtr[cur][k-1]; }      if ((sc = dmx[cur][k-1] + hmm->tsc[TDD][k-1]) > dmx[cur][k])	{ dmx[cur][k] = sc; dtr[cur][k] = dtr[cur][k-1]; }				/* insert state */      if (k < hmm->M) {	imx[cur][k] = -INFTY;	if ((sc = mmx[prv][k] + hmm->tsc[TMI][k]) > -INFTY)	  { imx[cur][k] = sc; itr[cur][k] = mtr[prv][k]; }	if ((sc = imx[prv][k] + hmm->tsc[TII][k]) > imx[cur][k])	  { imx[cur][k] = sc; itr[cur][k] = itr[prv][k]; }	if (hmm->isc[dsq[i]][k] != -INFTY)	  imx[cur][k] += hmm->isc[dsq[i]][k];	else	  imx[cur][k] = -INFTY;      }    }    /* Now the special states. Order is important here.     * remember, C and J emissions are zero score by definition,     */				/* N state */    xmx[cur][XMN] = -INFTY;    if ((sc = xmx[prv][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY)      xmx[cur][XMN] = sc;				/* E state */    xmx[cur][XME] = -INFTY;    for (k = 1; k <= hmm->M; k++)      if ((sc =  mmx[cur][k] + hmm->esc[k]) > xmx[cur][XME])	{ xmx[cur][XME] = sc; etr[i] = mtr[cur][k]; }				/* J state */    xmx[cur][XMJ] = -INFTY;    if ((sc = xmx[prv][XMJ] + hmm->xsc[XTJ][LOOP]) > -INFTY)      { xmx[cur][XMJ] = sc; xtr[cur][XMJ] = xtr[prv][XMJ]; }    if ((sc = xmx[cur][XME]   + hmm->xsc[XTE][LOOP]) > xmx[cur][XMJ])      { xmx[cur][XMJ] = sc; xtr[cur][XMJ] = i; }				/* B state */    xmx[cur][XMB] = -INFTY;    if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY)      { xmx[cur][XMB] = sc; btr[i] = 0; }    if ((sc = xmx[cur][XMJ] + hmm->xsc[XTJ][MOVE]) > xmx[cur][XMB])      { xmx[cur][XMB] = sc; btr[i] = xtr[cur][XMJ]; }				/* C state */    xmx[cur][XMC] = -INFTY;    if ((sc = xmx[prv][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY)      { xmx[cur][XMC] = sc; xtr[cur][XMC] = xtr[prv][XMC]; }    if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > xmx[cur][XMC])      { xmx[cur][XMC] = sc; xtr[cur][XMC] = i; }  }				/* T state (not stored) */  sc = xmx[cur][XMC] + hmm->xsc[XTC][MOVE];  /*****************************************************************   * Collapsed traceback stage.    * xtr[L%2][XMC] contains the position j of the previous E   * etr[j]        contains the position i of the previous B   * btr[i]        contains the position j of the previous E, or 0   * continue until btr[i] = 0.   *****************************************************************/  curralloc = 2;		/* minimum: no hits */  P7AllocTrace(curralloc, &tr);  /* Init of collapsed trace. Back to front; we ReverseTrace() later.   */  tpos = 0;  tr->statetype[tpos] = STT;  tr->pos[tpos]       = 0;  i                   = xtr[L%2][XMC];  while (i > 0)    {      curralloc += 2;      P7ReallocTrace(tr, curralloc);      tpos++;      tr->statetype[tpos] = STE;      tr->pos[tpos] = i;      i = etr[i];      tpos++;      tr->statetype[tpos] = STB;      tr->pos[tpos] = i;      i = btr[i];    }  tpos++;  tr->statetype[tpos] = STS;  tr->pos[tpos]       = 0;  tr->tlen = tpos + 1;  P7ReverseTrace(tr);    FreePlan7Matrix(mx);  FreePlan7Matrix(tmx);  free(btr);  free(etr);  *ret_tr = tr;  return Scorify(sc);}/* Function: P7WeeViterbi() * Date:     SRE, Wed Mar  4 08:24:04 1998 [St. Louis] * * Purpose:  Hirschberg/Myers/Miller linear memory alignment. *           See [Hirschberg75,MyM-88a] for the idea of the algorithm. *           Adapted to HMM implementation.  *            *           Requires that you /know/ that there's only *           one hit to the model in the sequence: either *           because you're forcing single-hit, or you've *           previously called P7ParsingViterbi to parse *           the sequence into single-hit segments. The reason *           for this is that a cyclic model (a la Plan7) *           defeats the nice divide and conquer trick. *           (I think some trickery with propagated trace pointers *           could get around this but haven't explored it.) *           This is implemented by ignoring transitions *           to/from J state. * * Args:     dsq    - sequence in digitized form *           L      - length of dsq *           hmm    - the model *           ret_tr - RETURN: traceback. * * Returns:  Score of the optimal Viterbi alignment. */floatP7WeeViterbi(unsigned char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr){  struct p7trace_s *tr;         /* RETURN: traceback */  int          *kassign;        /* 0..L+1, alignment of seq positions to model nodes */  char         *tassign;        /* 0..L+1, alignment of seq positions to state types */  int          *endlist;        /* stack of end points on sequence to work on */  int          *startlist;      /* stack of start points on sequence to work on */  int          lpos;            /* position in endlist, startlist */  int          k1, k2, k3;	/* start, mid, end in model      */  char         t1, t2, t3;	/* start, mid, end in state type */  int          s1, s2, s3;	/* start, mid, end in sequence   */  float        sc;		/* score of segment optimal alignment */  float        ret_sc;		/* optimal score over complete seq */  int          tlen;		/* length needed for trace */  int          i, k, tpos;	/* index in sequence, model, trace */  /* Initialize.   */  kassign   = MallocOrDie (sizeof(int) * (L+1));  tassign   = MallocOrDie (sizeof(char)* (L+1));  endlist   = MallocOrDie (sizeof(int) * (L+1));  startlist = MallocOrDie (sizeof(int) * (L+1));  lpos = 0;  startlist[lpos] = 1;  endlist[lpos]   = L;  kassign[1]      = 1;  kassign[L]      = hmm->M;  tassign[1]      = STS;	/* temporary boundary condition! will become N or M */  tassign[L]      = STT;	/* temporary boundary condition! will become M or C */  /* Recursive divide-and-conquer alignment.   */  while (lpos >= 0)    {				/* Pop a segment off the stack */      s1 = startlist[lpos];      k1 = kassign[s1];      t1 = tassign[s1];      s3 = endlist[lpos];      k3 = kassign[s3];      t3 = tassign[s3];      lpos--;				/* find optimal midpoint of segment */      sc = get_wee_midpt(hmm, dsq, L, k1, t1, s1, k3, t3, s3, &k2, &t2, &s2);      kassign[s2] = k2;      tassign[s2] = t2;                               /* score is valid on first pass */      if (t1 == STS && t3 == STT) ret_sc = sc;				/* push N-terminal segment on stack */      if (t2 != STN && (s2 - s1 > 1 || (s2 - s1 == 1 && t1 == STS)))	{	  lpos++;	  startlist[lpos] = s1;	  endlist[lpos]   = s2;	}				/* push C-terminal segment on stack */      if (t2 != STC && (s3 - s2 > 1 || (s3 - s2 == 1 && t3 == STT)))	{          lpos++;          startlist[lpos] = s2;          endlist[lpos]   = s3;	}      if (t2 == STN)	{			/* if we see STN midpoint, we know the whole N-term is STN */	  for (; s2 >= s1; s2--) {	    kassign[s2] = 1;	    tassign[s2] = STN;	  }	}      if (t2 == STC)	{			/* if we see STC midpoint, we know whole C-term is STC */	  for (; s2 <= s3; s2++) {	    kassign[s2] = hmm->M;	    tassign[s2] = STC;	  }	}    }  /*****************************************************************   * Construct a traceback structure from kassign/tassign by interpolating   * necessary states.    * Trace allocation is as follows. We clearly need L emitting states.   * We also need nonemitting states as follows:   * STS,STN,STB,STE,STC,STT = 6   * STD: count k2-k1-1 in kassign M->M's   * Also, count N->M's and M->C's (potential wing unfoldings)...   *   ...and be careful to check wing unfoldings when there aren't   *      any emitting N or C flanks! (bugfix, 2.1.1b)   *****************************************************************/   tlen = L + 6;  for (i = 1; i < L; i++)    {      if (tassign[i] == STM && tassign[i+1] == STM)	tlen += kassign[i+1] - kassign[i] - 1;      if (tassign[i] == STN && tassign[i+1] == STM)	tlen += kassign[i+1] - 1;      if (tassign[i] == STM && tassign[i+1] == STC)	tlen += hmm->M - kassign[i];    }  if (tassign[1] == STM) tlen += kassign[1] - 1;   if (tassign[L] == STM) tlen += hmm->M - kassign[L];  P7AllocTrace(tlen, &tr);  tr->statetype[0] = STS;  tr->nodeidx[0]   = 0;  tr->pos[0]       = 0;  tr->statetype[1] = STN;  tr->nodeidx[1]   = 0;  tr->pos[1]       = 0;  tpos = 2;    for (i = 1; i <= L; i++)    {      switch(tassign[i]) {      case STM:				/* check for first match state */	if (tr->statetype[tpos-1] == STN) {	  tr->statetype[tpos] = STB;	  tr->nodeidx[tpos]   = 0;	  tr->pos[tpos]       = 0;	  tpos++;				/* check for wing unfolding */	  if (Prob2Score(hmm->begin[kassign[i]], hmm->p1) + INTSCALE <= hmm->bsc[kassign[i]])	    for (k = 1; k < kassign[i]; k++) {	      tr->statetype[tpos] = STD;	      tr->nodeidx[tpos]   = k;	      tr->pos[tpos]       = 0;	      tpos++;	    }	}				/* do the match state itself */	tr->statetype[tpos] = STM;	tr->nodeidx[tpos]   = kassign[i];	tr->pos[tpos]       = i;	tpos++;				/* do any deletes necessary 'til next match */	if (i < L && tassign[i+1] == STM && kassign[i+1] - kassign[i] > 1)	  for (k = kassign[i] + 1; k < kassign[i+1]; k++)	    {	      tr->statetype[tpos] = STD;	      tr->nodeidx[tpos]   = k;	      tr->pos[tpos]       = 0;	      tpos++;	    }				/* check for last match state */	if (i == L || tassign[i+1] == STC) {				/* check for wing unfolding */	  if (Prob2Score(hmm->end[kassign[i-1]], 1.) + INTSCALE <=  hmm->esc[kassign[i-1]])	    for (k = kassign[i]+1; k <= hmm->M; k++)	      {		tr->statetype[tpos] = STD;		tr->nodeidx[tpos]   = k;		tr->pos[tpos]       = 0;		tpos++;	      }				/* add on the end state */	  tr->statetype[tpos] = STE;	  tr->nodeidx[tpos]   = 0;	  tr->pos[tpos]       = 0; 	  tpos++;				/* and a nonemitting C state */	  tr->statetype[tpos] = STC;	  tr->nodeidx[tpos]   = 0;	  tr->pos[tpos]       = 0; 	  tpos++;	}	break;	      case STI:	tr->statetype[tpos] = STI;	tr->nodeidx[tpos]   = kassign[i];	tr->pos[tpos]       = i;	tpos++;	break;      case STN:	tr->statetype[tpos] = STN;	tr->nodeidx[tpos]   = 0;	tr->pos[tpos]       = i;	tpos++;	break;      case STC:	tr->statetype[tpos] = STC;	tr->nodeidx[tpos]   = 0;	tr->pos[tpos]       = i; 	tpos++;	break;      default: Die("Bogus state %s", Statetype(tassign[i]));      }    }				/* terminate the trace */  tr->statetype[tpos] = STT;  tr->nodeidx[tpos]   = 0;  tr->pos[tpos]       = 0;   tr->tlen = tpos+1;  *ret_tr = tr;  free(kassign);  free(tassign);  free(startlist);  free(endlist);  return ret_sc;}/* Function: Plan7ESTViterbi() *  * Purpose:  Frameshift-tolerant alignment of protein model to cDNA EST. *            *  */floatPlan7ESTViterbi(unsigned char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s **ret_mx){  struct dpmatrix_s *mx;  int **xmx;  int **mmx;  int **imx;  int **dmx;  int   i,k;  int   sc;  int   codon;    /* Allocate a DP matrix with 0..L rows, 0..M+1 columns.   */   mx = AllocPlan7Matrix(L+1, hmm->M, &xmx, &mmx, &imx, &dmx);  /* Initialization of the zero row (DNA sequence of length 0)   * Note that xmx[i][stN] = 0 by definition for all i,   *    and xmx[i][stT] = xmx[i][stC], so neither stN nor stT need   *    to be calculated in DP matrices.   */  xmx[0][XMN] = 0;		                     /* S->N, p=1            */  xmx[0][XMB] = hmm->xsc[XTN][MOVE];                 /* S->N->B, no N-tail   */  xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY;  /* need seq to get here */  for (k = 0; k <= hmm->M; k++)    mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY;      /* need seq to get here */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?