📄 core_algorithms.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
   *    STS-> (N, already implied by setting xmx[cur][XMN] = 0)   *    STB-> M   */   if (t1 == STM)    {      for (k = k1+1; k <= k3; k++)	{				/* transits into STD */	  dmx[cur][k] = -INFTY;	  if ((sc = mmx[cur][k-1] + hmm->tsc[k-1][TMD]) > -INFTY)	    dmx[cur][k] = sc;	  if ((sc = dmx[cur][k-1] + hmm->tsc[k-1][TDD]) > dmx[cur][k])	    dmx[cur][k] = sc;	}				/* transit into STE */      xmx[cur][XME] = -INFTY;      if ((sc = mmx[cur][k1] + hmm->esc[k1]) > -INFTY)	xmx[cur][XME] = sc;    }				/* transit into STB from STN */  xmx[cur][XMB] = -INFTY;  if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY)    xmx[cur][XMB] = sc;				/* transit into STC from STE */  xmx[cur][XMC] = -INFTY;  if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > -INFTY)    xmx[cur][XMC] = sc;    /* Done initializing.   * Start recursive DP; sweep forward to chosen s2 midpoint. Done as a pull.   */  for (i = start+1; i <= s2; i++) {    cur = i % 2;    prv = !cur;    mmx[cur][k1] = imx[cur][k1] = dmx[cur][k1] = -INFTY;    /* Insert state in column k1, and B->M transition in k1.     */    if (k1 < hmm->M) {      imx[cur][k1] = -INFTY;      if ((sc = mmx[prv][k1] + hmm->tsc[k1][TMI]) > -INFTY)	imx[cur][k1] = sc;      if ((sc = imx[prv][k1] + hmm->tsc[k1][TII]) > imx[cur][k1])	imx[cur][k1] = sc;      if (hmm->isc[(int) dsq[i]][k1] != -INFTY)	imx[cur][k1] += hmm->isc[(int) dsq[i]][k1];      else	imx[cur][k1] = -INFTY;    }    if ((sc = xmx[prv][XMB] + hmm->bsc[k1]) > -INFTY)      mmx[cur][k1] = sc;    if (hmm->msc[(int) dsq[i]][k1] != -INFTY)      mmx[cur][k1] += hmm->msc[(int) dsq[i]][k1];    else      mmx[cur][k1] = -INFTY;    /* Main chunk of recursion across model positions     */    for (k = k1+1; k <= k3; k++) {				/* match state */      mmx[cur][k]  = -INFTY;      if ((sc = mmx[prv][k-1] + hmm->tsc[k-1][TMM]) > -INFTY)	mmx[cur][k] = sc;      if ((sc = imx[prv][k-1] + hmm->tsc[k-1][TIM]) > mmx[cur][k])	mmx[cur][k] = sc;      if ((sc = xmx[prv][XMB] + hmm->bsc[k]) > mmx[cur][k])	mmx[cur][k] = sc;      if ((sc = dmx[prv][k-1] + hmm->tsc[k-1][TDM]) > mmx[cur][k])	mmx[cur][k] = sc;      if (hmm->msc[(int) dsq[i]][k] != -INFTY)	mmx[cur][k] += hmm->msc[(int) dsq[i]][k];      else	mmx[cur][k] = -INFTY;				/* delete state */      dmx[cur][k] = -INFTY;      if (k < hmm->M) {	if ((sc = mmx[cur][k-1] + hmm->tsc[k-1][TMD]) > -INFTY)	  dmx[cur][k] = sc;	if ((sc = dmx[cur][k-1] + hmm->tsc[k-1][TDD]) > dmx[cur][k])	  dmx[cur][k] = sc;      }				/* insert state */      imx[cur][k] = -INFTY;      if (k < hmm->M) {	if ((sc = mmx[prv][k] + hmm->tsc[k][TMI]) > -INFTY)	  imx[cur][k] = sc;	if ((sc = imx[prv][k] + hmm->tsc[k][TII]) > imx[cur][k])	  imx[cur][k] = sc;	if (hmm->isc[(int) dsq[i]][k] != -INFTY)	  imx[cur][k] += hmm->isc[(int) dsq[i]][k];	else	  imx[cur][k] = -INFTY;      }    }				/* N state */    xmx[cur][XMN] = -INFTY;    if ((sc = xmx[prv][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY)      xmx[cur][XMN] = sc;				/* E state */    xmx[cur][XME] = -INFTY;    for (k = k1; k <= k3 && k <= hmm->M; k++)      if ((sc =  mmx[cur][k] + hmm->esc[k]) > xmx[cur][XME])	xmx[cur][XME] = sc;				/* B state */    xmx[cur][XMB] = -INFTY;    if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY)      xmx[cur][XMB] = sc;				/* C state */    xmx[cur][XMC] = -INFTY;    if ((sc = xmx[prv][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY)      xmx[cur][XMC] = sc;    if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > xmx[cur][XMC])      xmx[cur][XMC] = sc;  }  /* Row s2%2 in fwd matrix now contains valid scores from s1 (start) to s2,   * with J transitions disallowed (no cycles through model).    */  /*****************************************************************   * Backwards pass.   *****************************************************************/   /* Allocate our backwards two rows. Init last row.   */  bck = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx);  nxt = s3%2;  xmx[nxt][XMN] = xmx[nxt][XMB] = -INFTY;  xmx[nxt][XME] = xmx[nxt][XMC] = -INFTY;    for (k = k1; k <= k3 + 1; k++)    mmx[nxt][k] = imx[nxt][k] = dmx[nxt][k] = -INFTY;        cur = !nxt;  mmx[cur][k3+1] = imx[cur][k3+1] = dmx[cur][k3+1] = -INFTY;        /* Where to put the zero for our end point on last row.   */  switch (t3) {  case STM: mmx[nxt][k3]  = 0; break;  case STI: imx[nxt][k3]  = 0; break;  case STN: xmx[nxt][XMN] = 0; break;  case STC: xmx[nxt][XMC] = 0; break;   /* must be an emitting C */  case STT: xmx[nxt][XMC] = hmm->xsc[XTC][MOVE];  break; /* C->T implied */  default:  Die("you can't init get_wee_midpt with a %s\n", Statetype(t3));  }  /* Still initializing.   * In the case t3==STT, there are a few horizontal moves possible    * on row s3, because STT isn't an emitter. All other states are    * emitters, so their connections have to be to the previous row s3-1.   */  if (t3 == STT)     {				/* E->C */      xmx[nxt][XME] = xmx[nxt][XMC] + hmm->xsc[XTE][MOVE];				/* M->E */      for (k = k3; k >= k1; k--) {	mmx[nxt][k] = xmx[nxt][XME] + hmm->esc[k];	if (s3 != s2)	  mmx[nxt][k] += hmm->msc[(int)dsq[s3]][k];      }    }  /* Start recursive DP; sweep backwards to chosen s2 midpoint.   * Done as a pull. M, I scores at current row do /not/ include   * emission scores. Be careful of integer underflow.   */  for (i = s3-1; i >= s2; i--) {				/* note i < L, so i+1 is always a legal index */    cur = i%2;    nxt = !cur;				/* C pulls from C (T is special cased) */    xmx[cur][XMC] = -INFTY;    if ((sc = xmx[nxt][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY)      xmx[cur][XMC] = sc;				/* B pulls from M's */    xmx[cur][XMB] = -INFTY;    for (k = k1; k <= k3; k++)      if ((sc = mmx[nxt][k] + hmm->bsc[k]) > xmx[cur][XMB])	xmx[cur][XMB] = sc;				/* E pulls from C (J disallowed) */    xmx[cur][XME] = -INFTY;    if ((sc = xmx[cur][XMC] + hmm->xsc[XTE][MOVE]) > -INFTY)      xmx[cur][XME] = sc;				/* N pulls from B, N */    xmx[cur][XMN] = -INFTY;    if ((sc = xmx[cur][XMB] + hmm->xsc[XTN][MOVE]) > -INFTY)      xmx[cur][XMN] = sc;    if ((sc = xmx[nxt][XMN] + hmm->xsc[XTN][LOOP]) > xmx[cur][XMN])      xmx[cur][XMN] = sc;    /* Main recursion across model     */    for (k = k3; k >= k1; k--)  {				/* special case k == M */      if (k == hmm->M) {	mmx[cur][k] = xmx[cur][XME]; /* p=1 transition to E by definition */	dmx[cur][k] = -INFTY;	/* doesn't exist */	imx[cur][k] = -INFTY;	/* doesn't exist */	if (i != s2)	  mmx[cur][k] += hmm->msc[(int)dsq[i]][k];	continue;		      }    	/* below this k < M, so k+1 is a legal index */				/* pull into match state */      mmx[cur][k] = -INFTY;      if ((sc = xmx[cur][XME] + hmm->esc[k]) > -INFTY)	mmx[cur][k] = sc;       if ((sc = mmx[nxt][k+1] + hmm->tsc[k][TMM]) > mmx[cur][k])	mmx[cur][k] = sc;       if ((sc = imx[nxt][k] + hmm->tsc[k][TMI]) > mmx[cur][k])	mmx[cur][k] = sc;       if ((sc = dmx[cur][k+1] + hmm->tsc[k][TMD]) > mmx[cur][k])	mmx[cur][k] = sc;      if (i != s2) 	mmx[cur][k] += hmm->msc[(int)dsq[i]][k];				/* pull into delete state */      dmx[cur][k] = -INFTY;      if ((sc = mmx[nxt][k+1] + hmm->tsc[k][TDM]) > -INFTY)	dmx[cur][k] = sc;      if ((sc = dmx[cur][k+1] + hmm->tsc[k][TDD]) > dmx[cur][k])	dmx[cur][k] = sc;				/* pull into insert state */      imx[cur][k] = -INFTY;      if ((sc = mmx[nxt][k+1] + hmm->tsc[k][TIM]) > -INFTY)	imx[cur][k] = sc;      if ((sc = imx[nxt][k] + hmm->tsc[k][TII]) > imx[cur][k])	imx[cur][k] = sc;      if (i != s2)	imx[cur][k] += hmm->isc[(int)dsq[i]][k];          }  }     /*****************************************************************   * DP complete; we have both forward and backward passes. Now we   * look across the s2 row and find the optimal emitting state.   *****************************************************************/    cur = s2%2;  max = -INFTY;  for (k = k1; k <= k3; k++)    {      if ((sc = fwd->mmx[cur][k] + bck->mmx[cur][k]) > max)	{ k2 = k; t2 = STM; max = sc; }      if ((sc = fwd->imx[cur][k] + bck->imx[cur][k]) > max)	{ k2 = k; t2 = STI; max = sc; }    }  if ((sc = fwd->xmx[cur][XMN] + bck->xmx[cur][XMN]) > max)    { k2 = 1;        t2 = STN; max = sc; }  if ((sc = fwd->xmx[cur][XMC] + bck->xmx[cur][XMC]) > max)    { k2 = hmm->M;   t2 = STC; max = sc; }  /*****************************************************************   * Garbage collection, return.   *****************************************************************/    FreePlan7Matrix(fwd);  FreePlan7Matrix(bck);  *ret_k2 = k2;  *ret_t2 = t2;  *ret_s2 = s2;  return Scorify(max);}/* Function: P7ViterbiAlignAlignment() * Date:     SRE, Sat Jul  4 13:39:00 1998 [St. Louis] * * Purpose:  Align a multiple alignment to an HMM without *           changing the multiple alignment itself. *           Adapted from P7Viterbi(). *            *           Heuristic; not a guaranteed optimal alignment. *           Guaranteeing an optimal alignment appears difficult. *           [cryptic note to myself:] In paths connecting to I* metastates, *           recursion breaks down; if there is a gap in the *           previous column for a given seq, we can't determine what state the *           I* metastate corresponds to for this sequence, unless we *           look back in the DP matrix. The lookback would either involve *           recursing back to the previous M* metastate (giving a *           O(MN^2) algorithm instead of O(MN)) or expanding the I* *           metastate into 3^nseq separate I* metastates to keep track *           of which of three states each seq is in. Since the second *           option blows up exponentially w/ nseq, it is not attractive. *           If the first option were used, the correct algorithm would be related to  *           modelmakers.c:Maxmodelmaker(), but somewhat more difficult. *            *           The heuristic approach here is to calculate a "consensus" *           sequence from the alignment, and align the consensus to the HMM. *           Some hackery is employed, weighting transitions and emissions *           to make things work (re: con and mocc arrays). * * Args:     aseq  - aligned sequences *           ainfo - info for aseqs (includes alen, nseq, wgt) *           hmm   - model to align to         * * Returns:  Traceback. Caller must free with P7FreeTrace(). *           pos[] contains alignment columns, indexed 1..alen. *           statetype[] contains metastates M*, etc. as STM, etc. */struct p7trace_s *P7ViterbiAlignAlignment(MSA *msa, struct plan7_s *hmm){  struct dpmatrix_s *mx;        /* Viterbi calculation lattice (two rows) */  struct dpshadow_s *tb;        /* shadow matrix of traceback pointers */  struct p7trace_s  *tr;        /* RETURN: traceback */  int  **xmx, **mmx, **imx, **dmx;  char **xtb, **mtb, **itb, **dtb;  float **con;                  /* [1..alen][0..Alphabet_size-1], consensus counts */  float  *mocc;                 /* fractional occupancy of a column; used to weight transitions */  int     i;			/* counter for columns */  int     k;			/* counter for model positions */  int     idx;			/* counter for seqs */  int     sym;			/* counter for alphabet symbols */  int     sc;			/* temp variable for holding score */  float   denom;		/* total weight of seqs; used to "normalize" counts */  int     cur, prv;  /* The "consensus" is a counts matrix, [1..alen][0..Alphabet_size-1].   * Gaps are not counted explicitly, but columns with lots of gaps get   * less total weight because they have fewer counts.   */				/* allocation */  con  = MallocOrDie(sizeof(float *) * (msa->alen+1));  mocc = MallocOrDie(sizeof(float)   * (msa->alen+1));  for (i = 1; i <= msa->alen; i++) {    con[i] = MallocOrDie(sizeof(float) * Alphabet_size);    FSet(con[i], Alphabet_size, 0.0);  }  mocc[0] = -9999.;				/* initialization */				/* note: aseq is off by one, 0..alen-1 */				/* "normalized" to have a max total count of 1 per col */  denom = FSum(msa->wgt, msa->nseq);  for (i = 1; i <= msa->alen; i++)    {      for (idx = 0; idx < msa->nseq; idx++)	if (! isgap(msa->aseq[idx][i-1]))	  P7CountSymbol(con[i], SYMIDX(msa->aseq[idx][i-1]), msa->wgt[idx]);      FScale(con[i], Alphabet_size, 1./denom);      mocc[i] = FSum(con[i], Alphabet_size);    }    /* Allocate a DP matrix with 2 rows, 0..M columns,   * and a shadow matrix with 0,1..alen rows, 0..M columns.   */   mx = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx);  tb = AllocShadowMatrix(msa->alen+1, hmm->M, &xtb, &mtb, &itb, &dtb);  /* Initialization of the zero row.   */  xmx[0][XMN] = 0;		                     /* S->N, p=1            */  xtb[0][XMN] = STS;  xmx[0][XMB] = hmm->xsc[XTN][MOVE];                 /* S->N->B, no N-tail   */  xtb[0][XMB] = STN;  xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY;  /* need seq to get here */  tb->esrc[0] = 0;  xtb[0][XMC] = xtb[0][XMJ] = STBOGUS;    for (k = 0; k <= hmm->M; k++) {    mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY;      /* need seq to get here */    mtb[0][k] = itb[0][k] = dtb[0][k] = STBOGUS;  }    /* Recursion. Done as a pull.   * Note some slightly wasteful boundary conditions:     *    tsc[0] = -INFTY for all eight transitions (no node 0)   *    D_M and I_M are wastefully calculated (they don't exist)   */  for (i = 1; i <= msa->alen; i++) {    cur = i % 2;    prv = ! cur;    mmx[cur][0] = imx[cur][0] = dmx[cur][0] = -INFTY;    mtb[i][0]   = itb[i][0]   = dtb[i][0]   = STBOGUS;    for (k = 1; k <= hmm->M; k++) {				/* match state */      mmx[cur][k]  = -INFTY;      mtb[i][k]    = STBOGUS;      if (mmx[prv][k-1] > -INFTY && hmm->tsc[k-1][TMM] > -INFTY &&	  (sc = mmx[prv][k-1] + hmm->tsc[k-1][TMM]) > mmx[cur][k])	{ mmx[cur][k] = sc; mtb[i][k] = STM; }      if (imx[prv][k-1] > -INFTY && hmm->tsc[k-1][TIM] > -INFTY &&	  (sc = imx[prv][k-1] + hmm->tsc[k-1][TIM] * mocc[i-1]) > mmx[cur][k])	{ mmx[cur][k] = sc; mtb[i][k] = STI; }      if ((sc = xmx[prv][XMB] + hmm->bsc[k]) > mmx[cur][k])	{ mmx[cur][k] = sc; mtb[i][k] = STB; }
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -