📄 core_algorithms.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. ************************************************************//* core_algorithms.c * SRE, Mon Nov 11 15:58:52 1996 * CVS $Id: core_algorithms.c,v 1.21 2001/05/14 17:58:44 eddy Exp $ *  * Simple and robust "research" implementations of Forward, Backward, * and Viterbi for Plan7. */#include "structs.h"#include "config.h"#include "funcs.h"#include "squid.h"#include <string.h>#include <assert.h>static float get_wee_midpt(struct plan7_s *hmm, char *dsq, int L, 			   int k1, char t1, int s1,			   int k3, char t3, int s3,			   int *ret_k2, char *ret_t2, int *ret_s2);/* Function: AllocPlan7Matrix() *  * Purpose:  Allocate a dynamic programming matrix for standard Forward, *           Backward, or Viterbi, with scores kept as scaled log-odds *           integers. Keeps 2D arrays compact in RAM in an attempt  *           to maximize cache hits. Sets up individual ptrs to the *           four matrix components as a convenience. *            * Args:     rows  - number of rows to allocate; typically L+1 *           M     - size of model *           xmx, mmx, imx, dmx  *                 - RETURN: ptrs to four mx components as a convenience *                  * Return:   mx *           mx is allocated here. Caller frees with FreeDPMatrix(mx). */struct dpmatrix_s *AllocPlan7Matrix(int rows, int M, int ***xmx, int ***mmx, int ***imx, int ***dmx){  struct dpmatrix_s *mx;  int i;  mx         = (struct dpmatrix_s *) MallocOrDie (sizeof(struct dpmatrix_s));  mx->xmx    = (int **) MallocOrDie (sizeof(int *) * rows);  mx->mmx    = (int **) MallocOrDie (sizeof(int *) * rows);  mx->imx    = (int **) MallocOrDie (sizeof(int *) * rows);  mx->dmx    = (int **) MallocOrDie (sizeof(int *) * rows);  mx->xmx[0] = (int *)  MallocOrDie (sizeof(int) * (rows*5));  mx->mmx[0] = (int *)  MallocOrDie (sizeof(int) * (rows*(M+2)));  mx->imx[0] = (int *)  MallocOrDie (sizeof(int) * (rows*(M+2)));  mx->dmx[0] = (int *)  MallocOrDie (sizeof(int) * (rows*(M+2)));  for (i = 1; i < rows; i++)    {      mx->xmx[i] = mx->xmx[0] + (i*5);       mx->mmx[i] = mx->mmx[0] + (i*(M+2));      mx->imx[i] = mx->imx[0] + (i*(M+2));      mx->dmx[i] = mx->dmx[0] + (i*(M+2));    }  if (xmx != NULL) *xmx = mx->xmx;  if (mmx != NULL) *mmx = mx->mmx;  if (imx != NULL) *imx = mx->imx;  if (dmx != NULL) *dmx = mx->dmx;  return mx;}/* Function: FreePlan7Matrix() *  * Purpose:  Free a dynamic programming matrix allocated by AllocPlan7Matrix(). *  * Return:   (void) */voidFreePlan7Matrix(struct dpmatrix_s *mx){  free (mx->xmx[0]);  free (mx->mmx[0]);  free (mx->imx[0]);  free (mx->dmx[0]);  free (mx->xmx);  free (mx->mmx);  free (mx->imx);  free (mx->dmx);  free (mx);}/* Function: AllocShadowMatrix() *  * Purpose:  Allocate a dynamic programming traceback pointer matrix for  *           a Viterbi algorithm.  *            * Args:     rows  - number of rows to allocate; typically L+1 *           M     - size of model *           xtb, mtb, itb, dtb  *                 - RETURN: ptrs to four mx components as a convenience *                  * Return:   mx *           mx is allocated here. Caller frees with FreeDPMatrix(mx). */struct dpshadow_s *AllocShadowMatrix(int rows, int M, char ***xtb, char ***mtb, char ***itb, char ***dtb){  struct dpshadow_s *tb;  int i;  tb         = (struct dpshadow_s *) MallocOrDie (sizeof(struct dpshadow_s));  tb->xtb    = (char **) MallocOrDie (sizeof(char *) * rows);  tb->mtb    = (char **) MallocOrDie (sizeof(char *) * rows);  tb->itb    = (char **) MallocOrDie (sizeof(char *) * rows);  tb->dtb    = (char **) MallocOrDie (sizeof(char *) * rows);  tb->esrc   = (int *)   MallocOrDie (sizeof(int)  * rows);  tb->xtb[0] = (char *)  MallocOrDie (sizeof(char) * (rows*5));  tb->mtb[0] = (char *)  MallocOrDie (sizeof(char) * (rows*(M+2)));  tb->itb[0] = (char *)  MallocOrDie (sizeof(char) * (rows*(M+2)));  tb->dtb[0] = (char *)  MallocOrDie (sizeof(char) * (rows*(M+2)));  for (i = 1; i < rows; i++)    {      tb->xtb[i] = tb->xtb[0] + (i*5);       tb->mtb[i] = tb->mtb[0] + (i*(M+2));      tb->itb[i] = tb->itb[0] + (i*(M+2));      tb->dtb[i] = tb->dtb[0] + (i*(M+2));    }  if (xtb != NULL) *xtb = tb->xtb;  if (mtb != NULL) *mtb = tb->mtb;  if (itb != NULL) *itb = tb->itb;  if (dtb != NULL) *dtb = tb->dtb;  return tb;}/* Function: FreeShadowMatrix() *  * Purpose:  Free a dynamic programming matrix allocated by AllocShadowMatrix(). *  * Return:   (void) */voidFreeShadowMatrix(struct dpshadow_s *tb){  free (tb->xtb[0]);  free (tb->mtb[0]);  free (tb->itb[0]);  free (tb->dtb[0]);  free (tb->esrc);  free (tb->xtb);  free (tb->mtb);  free (tb->itb);  free (tb->dtb);  free (tb);}/* Function: P7ViterbiSize() * Date:     SRE, Fri Mar  6 15:13:20 1998 [St. Louis] * * Purpose:  Returns the ballpark predicted memory requirement for a  *           P7Viterbi() alignment, in MB.  *            *           Currently L must fit in an int (< 2 GB), but we have *           to deal with LM > 2 GB - e.g. watch out for overflow, do *           the whole calculation in floating point. Bug here detected *           in 2.1.1 by David Harper, Sanger Centre. *                                     * Args:     L  - length of sequence *           M  - length of HMM        * * Returns:  # of MB */intP7ViterbiSize(int L, int M){  float Mbytes;  /* We're excessively precise here, but it doesn't cost   * us anything to be pedantic. The four terms are:   *   1. the matrix structure itself;   *   2. the O(NM) main matrix (this dominates!)   *   3. ptrs into the rows of the matrix   *   4. storage for 5 special states. (xmx)   */  Mbytes =  (float) sizeof(struct dpmatrix_s);   Mbytes += 3. * (float) (L+1) * (float) (M+2) * (float) sizeof(int);   Mbytes += 4. * (float) (L+1) * (float) sizeof(int *);   Mbytes += 5. * (float) (L+1) * (float) sizeof(int);  Mbytes /= 1048576.;  return (int) Mbytes;}/* Function: P7SmallViterbiSize() * Date:     SRE, Fri Mar  6 15:20:04 1998 [St. Louis] * * Purpose:  Returns the ballpark predicted memory requirement for *           a P7SmallViterbi() alignment, in MB.  *            *           P7SmallViterbi() is a wrapper, calling both P7ParsingViterbi() *           and P7WeeViterbi(). P7ParsingViterbi() typically dominates *           the memory requirement, so the value returned *           is the P7ParsingViterbi() number. *      *           We don't (yet) worry about overflow issues like we did with *           P7ViterbiSize(). We'll have many other 32-bit int issues in the *           code if we overflow here. * * Args:     L - length of sequence *           M - length of HMM    *                * Returns:  # of MB */intP7SmallViterbiSize(int L, int M){  return ((2 * sizeof(struct dpmatrix_s) +	   12 * (M+2) * sizeof(int)      + /* 2 matrices w/ 2 rows */ 	   16 * sizeof(int *)            + /* ptrs into rows of matrix */	   20 * sizeof(int)              + /* 5 special states     */	   2  * (L+1) * sizeof(int))       /* traceback indices    */      	  / 1000000);}/* Function: P7WeeViterbiSize() * Date:     SRE, Fri Mar  6 15:40:42 1998 [St. Louis] * * Purpose:  Returns the ballpark predicted memory requirement for *           a P7WeeViterbi() alignment, in MB.  * * Args:     L - length of sequence *           M - length of HMM    *                * Returns:  # of MB  */intP7WeeViterbiSize(int L, int M){  return ((2 * sizeof(struct dpmatrix_s) +	   12 * (M+2) * sizeof(int)      + /* 2 matrices w/ 2 rows */ 	   16 * sizeof(int *)            + /* ptrs into rows of matrix */	   20 * sizeof(int)              + /* 5 special states      */	   2  * (L+2) * sizeof(int) +      /* stacks for starts/ends (overkill) */      	   (L+2) * sizeof(int) +           /* k assignments to seq positions */	   (L+2) * sizeof(char))           /* state assignments to seq pos */	  / 1000000);}/* Function: P7Forward() *  * Purpose:  The Forward dynamic programming algorithm. *           The scaling issue is dealt with by working in log space *           and calling ILogsum(); this is a slow but robust approach. *            * Args:     dsq    - sequence in digitized form *           L      - length of dsq *           hmm    - the model *           ret_mx - RETURN: dp matrix; pass NULL if it's not wanted *            * Return:   log P(S|M)/P(S|R), as a bit score. */floatP7Forward(char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s **ret_mx){  struct dpmatrix_s *mx;  int **xmx;  int **mmx;  int **imx;  int **dmx;  int   i,k;  int   sc;  /* Allocate a DP matrix with 0..L rows, 0..M-1 columns.   */   mx = AllocPlan7Matrix(L+1, hmm->M, &xmx, &mmx, &imx, &dmx);  /* Initialization of the zero row.   * Note that xmx[i][stN] = 0 by definition for all i,   *    and xmx[i][stT] = xmx[i][stC], so neither stN nor stT need   *    to be calculated in DP matrices.   */  xmx[0][XMN] = 0;		                     /* S->N, p=1            */  xmx[0][XMB] = hmm->xsc[XTN][MOVE];                 /* S->N->B, no N-tail   */  xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY;  /* need seq to get here */  for (k = 0; k <= hmm->M; k++)    mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY;      /* need seq to get here */  /* Recursion. Done as a pull.   * Note some slightly wasteful boundary conditions:     *    tsc[0] = -INFTY for all eight transitions (no node 0)   *    D_M and I_M are wastefully calculated (they don't exist)   */  for (i = 1; i <= L; i++)    {      mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY;      for (k = 1; k < hmm->M; k++)	{	  mmx[i][k]  = ILogsum(ILogsum(mmx[i-1][k-1] + hmm->tsc[k-1][TMM],				     imx[i-1][k-1] + hmm->tsc[k-1][TIM]),			      ILogsum(xmx[i-1][XMB] + hmm->bsc[k],				     dmx[i-1][k-1] + hmm->tsc[k-1][TDM]));	  mmx[i][k] += hmm->msc[(int) dsq[i]][k];	  dmx[i][k]  = ILogsum(mmx[i][k-1] + hmm->tsc[k-1][TMD],			      dmx[i][k-1] + hmm->tsc[k-1][TDD]);	  imx[i][k]  = ILogsum(mmx[i-1][k] + hmm->tsc[k][TMI],			      imx[i-1][k] + hmm->tsc[k][TII]);	  imx[i][k] += hmm->isc[(int) dsq[i]][k];	}      mmx[i][hmm->M] = ILogsum(ILogsum(mmx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TMM],				   imx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TIM]),			       ILogsum(xmx[i-1][XMB] + hmm->bsc[hmm->M-1],				   dmx[i-1][hmm->M-1] + hmm->tsc[hmm->M-1][TDM]));      mmx[i][hmm->M] += hmm->msc[(int) dsq[i]][hmm->M];      /* Now the special states.       * remember, C and J emissions are zero score by definition       */      xmx[i][XMN] = xmx[i-1][XMN] + hmm->xsc[XTN][LOOP];      xmx[i][XME] = -INFTY;      for (k = 1; k <= hmm->M; k++)	xmx[i][XME] = ILogsum(xmx[i][XME], mmx[i][k] + hmm->esc[k]);      xmx[i][XMJ] = ILogsum(xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP],			   xmx[i][XME]   + hmm->xsc[XTE][LOOP]);      xmx[i][XMB] = ILogsum(xmx[i][XMN] + hmm->xsc[XTN][MOVE],			    xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]);      xmx[i][XMC] = ILogsum(xmx[i-1][XMC] + hmm->xsc[XTC][LOOP],			    xmx[i][XME] + hmm->xsc[XTE][MOVE]);    }			      sc = xmx[L][XMC] + hmm->xsc[XTC][MOVE];  if (ret_mx != NULL) *ret_mx = mx;  else                FreePlan7Matrix(mx);  return Scorify(sc);		/* the total Forward score. */}      /* Function: P7Viterbi() *  * Purpose:  The Viterbi dynamic programming algorithm.  *           Identical to Forward() except that max's *           replace sum's.  *            * Args:     dsq    - sequence in digitized form *           L      - length of dsq *           hmm    - the model *           ret_tr - RETURN: traceback; pass NULL if it's not wanted *            * Return:   log P(S|M)/P(S|R), as a bit score */floatP7Viterbi(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr){  struct dpmatrix_s *mx;  struct p7trace_s  *tr;  int **xmx;  int **mmx;  int **imx;  int **dmx;  int   i,k;  int   sc;  /* Allocate a DP matrix with 0..L rows, 0..M-1 columns.   */   mx = AllocPlan7Matrix(L+1, hmm->M, &xmx, &mmx, &imx, &dmx);  /* Initialization of the zero row.   */  xmx[0][XMN] = 0;		                     /* S->N, p=1            */  xmx[0][XMB] = hmm->xsc[XTN][MOVE];                 /* S->N->B, no N-tail   */  xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY;  /* need seq to get here */  for (k = 0; k <= hmm->M; k++)    mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY;      /* need seq to get here */  /* Recursion. Done as a pull.   * Note some slightly wasteful boundary conditions:     *    tsc[0] = -INFTY for all eight transitions (no node 0)
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -