core_algorithms.c

来自「这是一个基于HMM 模型的生物多序列比对算法的linux实现版本。hmmer」· C语言 代码 · 共 1,965 行 · 第 1/5 页

C
1,965
字号
  /* Initialization of the first row (DNA sequence of length 1);   * only N state can make this nucleotide.   */  xmx[1][XMN] = xmx[0][XMN] + hmm->xsc[XTN][LOOP];  xmx[1][XMB] = xmx[1][XMN] + hmm->xsc[XTN][MOVE];   xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY;  /* need 2 nt to get here */  for (k = 0; k <= hmm->M; k++)    mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY;      /* need 2 nt to get into model */  /* Recursion. Done as a pull.   * Note some slightly wasteful boundary conditions:     *    tsc[0] = -INFTY for all eight transitions (no node 0)   *    D_M and I_M are wastefully calculated (they don't exist)   */  for (i = 2; i <= L; i++) {    mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY;				/* crude calculation of lookup value for codon */    if (i > 2) {      if (dsq[i-2] < 4 && dsq[i-1] < 4 && dsq[i] < 4)	codon = dsq[i-2] * 16 + dsq[i-1] * 4 + dsq[i];      else	codon = 64;		/* ambiguous codon; punt */    }    for (k = 1; k <= hmm->M; k++) {				/* match state */      if (i > 2) {	mmx[i][k]  = mmx[i-3][k-1] + hmm->tsc[TMM][k-1];	if ((sc = imx[i-3][k-1] + hmm->tsc[TIM][k-1]) > mmx[i][k])	  mmx[i][k] = sc;	if ((sc = xmx[i-3][XMB] + hmm->bsc[k]) > mmx[i][k])	  mmx[i][k] = sc;	if ((sc = dmx[i-3][k-1] + hmm->tsc[TDM][k-1]) > mmx[i][k])	  mmx[i][k] = sc;	mmx[i][k] += hmm->dnam[codon][k];      }				/* -1 frameshifts into match state */      if ((sc = mmx[i-2][k-1] + hmm->tsc[TMM][k-1] + hmm->dna2) > mmx[i][k])	mmx[i][k] = sc;      if ((sc = imx[i-2][k-1] + hmm->tsc[TIM][k-1] + hmm->dna2) > mmx[i][k])	mmx[i][k] = sc;      if ((sc = xmx[i-2][XMB] + hmm->bsc[k] + hmm->dna2) > mmx[i][k])	mmx[i][k] = sc;      if ((sc = dmx[i-2][k-1] + hmm->tsc[TDM][k-1] + hmm->dna2) > mmx[i][k])	mmx[i][k] = sc;      				/* +1 frameshifts into match state */      if (i > 3) {	if ((sc = mmx[i-4][k-1] + hmm->tsc[TMM][k-1] + hmm->dna4) > mmx[i][k])	  mmx[i][k] = sc;	if ((sc = imx[i-4][k-1] + hmm->tsc[TIM][k-1] + hmm->dna4) > mmx[i][k])	  mmx[i][k] = sc;	if ((sc = xmx[i-4][XMB] + hmm->bsc[k] + hmm->dna4) > mmx[i][k])	  mmx[i][k] = sc;	if ((sc = dmx[i-4][k-1] + hmm->tsc[TDM][k-1] + hmm->dna4) > mmx[i][k])	  mmx[i][k] = sc;      }      				/* delete state */      dmx[i][k]  = mmx[i][k-1] + hmm->tsc[TMD][k-1];      if ((sc = dmx[i][k-1] + hmm->tsc[TDD][k-1]) > dmx[i][k])	dmx[i][k] = sc;				/* insert state */      if (i > 2) {	imx[i][k] = mmx[i-3][k] + hmm->tsc[TMI][k];	if ((sc = imx[i-3][k] + hmm->tsc[TII][k]) > imx[i][k])	  imx[i][k] = sc;	imx[i][k] += hmm->dnai[codon][k];      }				/* -1 frameshifts into insert state */      if ((sc = mmx[i-2][k] + hmm->tsc[TMI][k] + hmm->dna2) > imx[i][k])	imx[i][k] = sc;      if ((sc = imx[i-2][k] + hmm->tsc[TII][k] + hmm->dna2) > imx[i][k])	imx[i][k] = sc;				/* +1 frameshifts into insert state */      if (i > 4) {	if ((sc = mmx[i-4][k] + hmm->tsc[TMI][k] + hmm->dna4) > imx[i][k])	  imx[i][k] = sc;	if ((sc = imx[i-4][k] + hmm->tsc[TII][k] + hmm->dna4) > imx[i][k])	  imx[i][k] = sc;      }    }    /* Now the special states. Order is important here.     * remember, C and J emissions are zero score by definition,     */				/* N state: +1 nucleotide */    xmx[i][XMN] = xmx[i-1][XMN] + hmm->xsc[XTN][LOOP];                                /* E state: collect from M's, and last D  */    xmx[i][XME] = dmx[i][hmm->M];    /* transition prob from last D = 1.0 */    for (k = 1; k <= hmm->M; k++)      if ((sc =  mmx[i][k] + hmm->esc[k]) > xmx[i][XME])        xmx[i][XME] = sc;                                /* J state: +1 nucleotide */    xmx[i][XMJ] = xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP];    if ((sc = xmx[i][XME]   + hmm->xsc[XTE][LOOP]) > xmx[i][XMJ])      xmx[i][XMJ] = sc;                                /* B state: collect from N,J */    xmx[i][XMB] = xmx[i][XMN] + hmm->xsc[XTN][MOVE];    if ((sc = xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]) > xmx[i][XMB])      xmx[i][XMB] = sc;				/* C state: +1 nucleotide */    xmx[i][XMC] = xmx[i-1][XMC] + hmm->xsc[XTC][LOOP];    if ((sc = xmx[i][XME] + hmm->xsc[XTE][MOVE]) > xmx[i][XMC])      xmx[i][XMC] = sc;  }  sc = xmx[L][XMC] + hmm->xsc[XTC][MOVE];  if (ret_mx != NULL) *ret_mx = mx;  else                FreePlan7Matrix(mx);  return Scorify(sc);            /* the total Viterbi score. */}/* Function: get_wee_midpt() * Date:     SRE, Wed Mar  4 08:27:11 1998 [St. Louis] * * Purpose:  The heart of the divide and conquer algorithm *           for P7WeeViterbi(). This function is called *           recursively to find successive optimal midpoints *           in the alignment matrix. See P7WeeViterbi() for *           further comments on the assumptions of this algorithm. *            * Args:     hmm   - the model, set up for integer scores *           dsq   - the sequence, digitized *           L     - length of the sequence *           k1    - model node to start with, 1..M *           t1    - state type to start with, STM | STI | STN | STC; STS to start *           s1    - sequence position to start with, 1..L; 1 to start *           k3    - model node to end with, 1..M *           t3    - state type to end with, STM | STI | STN | STC; STT to start *           s3    - sequence position to end with, 1..L; L to start *          ret_k2 - RETURN: optimal midpoint, node position in model *          ret_t2 - RETURN: optimal midpoint, state type  *          ret_s2 - RETURN: optimal midpoint, sequence position * * Returns: score of optimal alignment, in bits.  */static floatget_wee_midpt(struct plan7_s *hmm, unsigned char *dsq, int L, 	      int k1, char t1, int s1,	      int k3, char t3, int s3,	      int *ret_k2, char *ret_t2, int *ret_s2){  struct dpmatrix_s *fwd;  struct dpmatrix_s *bck;  int        **xmx;             /* convenience ptr into special states */  int        **mmx;             /* convenience ptr into match states   */  int        **imx;             /* convenience ptr into insert states  */  int        **dmx;             /* convenience ptr into delete states  */  int          k2;  char         t2;  int          s2;  int          cur, prv, nxt;	/* current, previous, next row index (0 or 1)*/  int          i,k;		/* indices for seq, model */  int          sc;		/* integer score */  int          max;		/* maximum integer score */  int          start;		/* s1 to start at (need, for STS special case) */   /* Choose our midpoint.   * Special cases: s1, s3 adjacent and t1 == STS: s2 = s1   *                s1, s3 adjacent and t3 == STT: s2 = s3   *                (where we must replace STS, STT eventually)   */  s2 = s1 + (s3-s1) / 2;  if (s3-s1 == 1 && t1 == STS) s2 = s1;  if (s3-s1 == 1 && t3 == STT) s2 = s3;  /* STS is a special case. STS aligns to row zero by convention,   * but we'll be passed s1=1, t1=STS. We have to init on row   * zero then start DP on row 1.   */  start = (t1 == STS) ? 0 : s1;  /* Allocate our forward two rows.   * Initialize row zero.   */  fwd = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx);  cur = start%2;  xmx[cur][XMN] = xmx[cur][XMB] = -INFTY;  xmx[cur][XME] = xmx[cur][XMC] = -INFTY;    for (k = k1; k <= k3; k++)    mmx[cur][k] = imx[cur][k] = dmx[cur][k] = -INFTY;        /* Where to put our zero for our start point...   * (only possible to start on an emitting state; J disallowed)   */  switch (t1) {  case STM: mmx[cur][k1]  = 0; break;  case STI: imx[cur][k1]  = 0; break;  case STN: xmx[cur][XMN] = 0; break;  case STC: xmx[cur][XMC] = 0; break;  case STS: xmx[cur][XMN] = 0; break;  default:  Die("you can't init get_wee_midpt with a %s\n", Statetype(t1));  }  /* Still initializing.   * Deal with pulling horizontal matrix moves in initial row.   * These are any transitions to nonemitters:   *    STM-> E, D   *    STI-> none   *    STN-> B   *    STC-> (T, but we never observe this in the forward pass of a d&c)   *    STE-> C   *    STS-> (N, already implied by setting xmx[cur][XMN] = 0)   *    STB-> M   */   if (t1 == STM)    {      for (k = k1+1; k <= k3; k++)	{				/* transits into STD */	  dmx[cur][k] = -INFTY;	  if ((sc = mmx[cur][k-1] + hmm->tsc[TMD][k-1]) > -INFTY)	    dmx[cur][k] = sc;	  if ((sc = dmx[cur][k-1] + hmm->tsc[TDD][k-1]) > dmx[cur][k])	    dmx[cur][k] = sc;	}				/* transit into STE */      xmx[cur][XME] = -INFTY;      if ((sc = mmx[cur][k1] + hmm->esc[k1]) > -INFTY)	xmx[cur][XME] = sc;    }				/* transit into STB from STN */  xmx[cur][XMB] = -INFTY;  if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY)    xmx[cur][XMB] = sc;				/* transit into STC from STE */  xmx[cur][XMC] = -INFTY;  if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > -INFTY)    xmx[cur][XMC] = sc;    /* Done initializing.   * Start recursive DP; sweep forward to chosen s2 midpoint. Done as a pull.   */  for (i = start+1; i <= s2; i++) {    cur = i % 2;    prv = !cur;    mmx[cur][k1] = imx[cur][k1] = dmx[cur][k1] = -INFTY;    /* Insert state in column k1, and B->M transition in k1.     */    if (k1 < hmm->M) {      imx[cur][k1] = -INFTY;      if ((sc = mmx[prv][k1] + hmm->tsc[TMI][k1]) > -INFTY)	imx[cur][k1] = sc;      if ((sc = imx[prv][k1] + hmm->tsc[TII][k1]) > imx[cur][k1])	imx[cur][k1] = sc;      if (hmm->isc[dsq[i]][k1] != -INFTY)	imx[cur][k1] += hmm->isc[dsq[i]][k1];      else	imx[cur][k1] = -INFTY;    }    if ((sc = xmx[prv][XMB] + hmm->bsc[k1]) > -INFTY)      mmx[cur][k1] = sc;    if (hmm->msc[dsq[i]][k1] != -INFTY)      mmx[cur][k1] += hmm->msc[dsq[i]][k1];    else      mmx[cur][k1] = -INFTY;    /* Main chunk of recursion across model positions     */    for (k = k1+1; k <= k3; k++) {				/* match state */      mmx[cur][k]  = -INFTY;      if ((sc = mmx[prv][k-1] + hmm->tsc[TMM][k-1]) > -INFTY)	mmx[cur][k] = sc;      if ((sc = imx[prv][k-1] + hmm->tsc[TIM][k-1]) > mmx[cur][k])	mmx[cur][k] = sc;      if ((sc = xmx[prv][XMB] + hmm->bsc[k]) > mmx[cur][k])	mmx[cur][k] = sc;      if ((sc = dmx[prv][k-1] + hmm->tsc[TDM][k-1]) > mmx[cur][k])	mmx[cur][k] = sc;      if (hmm->msc[dsq[i]][k] != -INFTY)	mmx[cur][k] += hmm->msc[dsq[i]][k];      else	mmx[cur][k] = -INFTY;				/* delete state */      dmx[cur][k] = -INFTY;      if (k < hmm->M) {	if ((sc = mmx[cur][k-1] + hmm->tsc[TMD][k-1]) > -INFTY)	  dmx[cur][k] = sc;	if ((sc = dmx[cur][k-1] + hmm->tsc[TDD][k-1]) > dmx[cur][k])	  dmx[cur][k] = sc;      }				/* insert state */      imx[cur][k] = -INFTY;      if (k < hmm->M) {	if ((sc = mmx[prv][k] + hmm->tsc[TMI][k]) > -INFTY)	  imx[cur][k] = sc;	if ((sc = imx[prv][k] + hmm->tsc[TII][k]) > imx[cur][k])	  imx[cur][k] = sc;	if (hmm->isc[dsq[i]][k] != -INFTY)	  imx[cur][k] += hmm->isc[dsq[i]][k];	else	  imx[cur][k] = -INFTY;      }    }				/* N state */    xmx[cur][XMN] = -INFTY;    if ((sc = xmx[prv][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY)      xmx[cur][XMN] = sc;				/* E state */    xmx[cur][XME] = -INFTY;    for (k = k1; k <= k3 && k <= hmm->M; k++)      if ((sc =  mmx[cur][k] + hmm->esc[k]) > xmx[cur][XME])	xmx[cur][XME] = sc;				/* B state */    xmx[cur][XMB] = -INFTY;    if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY)      xmx[cur][XMB] = sc;				/* C state */    xmx[cur][XMC] = -INFTY;    if ((sc = xmx[prv][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY)      xmx[cur][XMC] = sc;    if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > xmx[cur][XMC])      xmx[cur][XMC] = sc;  }  /* Row s2%2 in fwd matrix now contains valid scores from s1 (start) to s2,   * with J transitions disallowed (no cycles through model).    */  /*****************************************************************   * Backwards pass.   *****************************************************************/   /* Allocate our backwards two rows. Init last row.   */  bck = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx);  nxt = s3%2;  xmx[nxt][XMN] = xmx[nxt][XMB] = -INFTY;  xmx[nxt][XME] = xmx[nxt][XMC] = -INFTY;    for (k = k1; k <= k3 + 1; k++)    mmx[nxt][k] = imx[nxt][k] = dmx[nxt][k] = -INFTY;        cur = !nxt;  mmx[cur][k3+1] = imx[cur][k3+1] = dmx[cur][k3+1] = -INFTY;        /* Where to put the zero for our end point on last row.   */  switch (t3) {  case STM: mmx[nxt][k3]  = 0; break;  case STI: imx[nxt][k3]  = 0; break;  case STN: xmx[nxt][XMN] = 0; break;  case STC: xmx[nxt][XMC] = 0; break;   /* must be an emitting C */  case STT: xmx[nxt][XMC] = hmm->xsc[XTC][MOVE];  break; /* C->T implied */  default:  Die("you can't init get_wee_midpt with a %s\n", Statetype(t3));  }  /* Still initializing.   * In the case t3==STT, there are a few horizontal moves possible    * on row s3, because STT isn't an emitter. All other states are    * emitters, so their connections have to be to the previous row s3-1.   */  if (t3 == STT)     {				/* E->C */      xmx[nxt][XME] = xmx[nxt][XMC] + hmm->xsc[XTE][MOVE];				/* M->E */      for (k = k3; k >= k1; k--) {	mmx[nxt][k] = xmx[nxt][XME] + hmm->esc[k];	if (s3 != s2)	  mmx[nxt][k] += hmm->msc[dsq[s3]][k];      }    }  /* Start recursive DP; sweep backwards to chosen s2 midpoint.   * Done as a pull. M, I scores at current row do /not/ include   * emission scores. Be careful of integer underflow.   */  for (i = s3-1; i >= s2; i--) {				/* note i < L, so i+1 is always a legal index */    cur = i%2;    nxt = !cur;				/* C pulls from C (T is special cased) */    xmx[cur][XMC] = -INFTY;    if ((sc = xmx[nxt][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY)      xmx[cur][XMC] = sc;				/* B pulls from M's */    xmx[cur][XMB] = -INFTY;    for (k = k1; k <= k3; k++)      if ((sc = mmx[nxt][k] + hmm->bsc[k]) > xmx[cur][XMB])	xmx[cur][XMB] = sc;				/* E pulls from C (J disallowed) */    xmx[cur][XME] = -INFTY;    if ((sc = xmx[cur][

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?