fast_algorithms.c

来自「这是一个基于HMM 模型的生物多序列比对算法的linux实现版本。hmmer」· C语言 代码 · 共 1,087 行 · 第 1/3 页

C
1,087
字号
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-2003 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. ************************************************************//* fast_algorithms.c * SRE, Sun Nov 10 08:54:48 2002 [AA 3080, Denver to StL] * CVS $Id: fast_algorithms.c,v 1.9 2003/10/02 16:39:41 eddy Exp $ *  * Optimized routines to replace slower implementations in core_algorithms.c. *  * The routines in core_algorithms.c are designed for clarity * and maintainability, not for speed. Implementations here * are designed for speed, not clarity. If you're trying to  * understand the code, or optimize for a specific platform, * you are probably better off looking at core_algorithms.c. *  * P7Viterbi() is the key function to target optimization to. * The implementation in core_algorithms.c is currently ifdef'ed  * out of the code. The implementation that is used by default  * is here, in fast_algorithms.c. A third implementation, from * Erik Lindahl at Stanford, is Mac/Altivec specific. *  * Which implementation is used is controlled by ifdef's. The * default implementation uses a fast implementation of  * P7Viterbi() from here. Other options (mutually exclusive): *  * -DSLOW *   enable original core_algorithms.c code: slower than default, *   but might be easier to follow, for someone trying *   to understand the DP code. * -DALTIVEC *   enable Erik Lindahl's Altivec code for Macintosh OSX */#include "config.h"#include "squidconf.h"#include "structs.h"#include "funcs.h"#include "squid.h"/* the DEFAULT P7Viterbi() is portably optimized; code follows: */#if !defined ALTIVEC && !defined SLOW/* Function: P7Viterbi() - portably optimized version * Incept:   SRE, Fri Nov 15 13:14:33 2002 [St. Louis] *  * Purpose:  The Viterbi dynamic programming algorithm.  *           Derived from core_algorithms.c:P7Viterbi(). *            * Args:     dsq    - sequence in digitized form *           L      - length of dsq *           hmm    - the model *           mx     - re-used DP matrix *           ret_tr - RETURN: traceback; pass NULL if it's not wanted *            * Return:   log P(S|M)/P(S|R), as a bit score */floatP7Viterbi(unsigned char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s *mx, struct p7trace_s **ret_tr){  struct p7trace_s  *tr;  int **xmx;  int **mmx;  int **imx;  int **dmx;  int   i,k;  int   sc;  int  *mc, *dc, *ic;        /* pointers to rows of mmx, dmx, imx */  int  *ms, *is;             /* pointers to msc[i], isc[i] */  int  *mpp, *mpc, *ip;      /* ptrs to mmx[i-1], mmx[i], imx[i-1] */  int  *bp;		     /* ptr into bsc[] */  int  *ep;                  /* ptr into esc[] */  int   xmb;		     /* value of xmx[i-1][XMB] */  int   xme;                 /* max for xmx[i][XME] */  int  *dpp;                 /* ptr into dmx[i-1] (previous row) */  int  *tpmm, *tpmi, *tpmd, *tpim, *tpii, *tpdm, *tpdd; /* ptrs into tsc */  int   M;    /* Make sure we have space for a DP matrix with 0..L rows, 0..M-1 columns.   */   ResizePlan7Matrix(mx, L, hmm->M, &xmx, &mmx, &imx, &dmx);  /* Initialization of the zero row.   */  xmx[0][XMN] = 0;		                     /* S->N, p=1            */  xmx[0][XMB] = hmm->xsc[XTN][MOVE];                 /* S->N->B, no N-tail   */  xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY;  /* need seq to get here */  for (k = 0; k <= hmm->M; k++)    mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY;      /* need seq to get here */  /* Initializations that help icc vectorize.   */  M        = hmm->M;  /* Recursion. Done as a pull.   * Note some slightly wasteful boundary conditions:     *    tsc[0] = -INFTY for all eight transitions (no node 0)   *    D_M and I_M are wastefully calculated (they don't exist)   */  tpmm  = hmm->tsc[TMM];  tpim  = hmm->tsc[TIM];  tpdm  = hmm->tsc[TDM];  tpmd  = hmm->tsc[TMD];  tpdd  = hmm->tsc[TDD];  tpmi  = hmm->tsc[TMI];  tpii  = hmm->tsc[TII];  bp    = hmm->bsc;  for (i = 1; i <= L; i++) {    mc    = mmx[i];        dc    = dmx[i];    ic    = imx[i];    mpp   = mmx[i-1];    dpp   = dmx[i-1];    ip    = imx[i-1];    xmb   = xmx[i-1][XMB];    ms    = hmm->msc[dsq[i]];    is    = hmm->isc[dsq[i]];    mc[0] = -INFTY;    dc[0] = -INFTY;    ic[0] = -INFTY;    for (k = 1; k <= M; k++) {      mc[k] = mpp[k-1]   + tpmm[k-1];      if ((sc = ip[k-1]  + tpim[k-1]) > mc[k])  mc[k] = sc;      if ((sc = dpp[k-1] + tpdm[k-1]) > mc[k])  mc[k] = sc;      if ((sc = xmb  + bp[k])         > mc[k])  mc[k] = sc;       mc[k] += ms[k];      if (mc[k] < -INFTY) mc[k] = -INFTY;        dc[k] = dc[k-1] + tpdd[k-1];      if ((sc = mc[k-1] + tpmd[k-1]) > dc[k]) dc[k] = sc;      if (dc[k] < -INFTY) dc[k] = -INFTY;        if (k < M) {	ic[k] = mpp[k] + tpmi[k];	if ((sc = ip[k] + tpii[k]) > ic[k]) ic[k] = sc; 	ic[k] += is[k];	if (ic[k] < -INFTY) ic[k] = -INFTY;       }    }    /* Now the special states. Order is important here.     * remember, C and J emissions are zero score by definition,     */				/* N state */    xmx[i][XMN] = -INFTY;    if ((sc = xmx[i-1][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY)      xmx[i][XMN] = sc;				/* E state */    xme = -INFTY;    mpc = mmx[i];    ep  = hmm->esc;    for (k = 1; k <= hmm->M; k++)      if ((sc =  mpc[k] + ep[k]) > xme) xme = sc;     xmx[i][XME] = xme;				/* J state */    xmx[i][XMJ] = -INFTY;    if ((sc = xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP]) > -INFTY)      xmx[i][XMJ] = sc;    if ((sc = xmx[i][XME]   + hmm->xsc[XTE][LOOP]) > xmx[i][XMJ])      xmx[i][XMJ] = sc;				/* B state */    xmx[i][XMB] = -INFTY;    if ((sc = xmx[i][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY)      xmx[i][XMB] = sc;    if ((sc = xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]) > xmx[i][XMB])      xmx[i][XMB] = sc;				/* C state */    xmx[i][XMC] = -INFTY;    if ((sc = xmx[i-1][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY)      xmx[i][XMC] = sc;    if ((sc = xmx[i][XME] + hmm->xsc[XTE][MOVE]) > xmx[i][XMC])      xmx[i][XMC] = sc;  }				/* T state (not stored) */  sc = xmx[L][XMC] + hmm->xsc[XTC][MOVE];  if (ret_tr != NULL) {    P7ViterbiTrace(hmm, dsq, L, mx, &tr);    *ret_tr = tr;  }  return Scorify(sc);		/* the total Viterbi score. */}#endif /*default P7Viterbi, used when ALTIVEC and SLOW are not defined*/#ifdef ALTIVEC/*################################################################ * The Altivec port, for Macintosh PowerPC. * Erik Lindahl, Stanford University, 2002. *  * Replaces the following functions: *    AllocPlan7Body()      plan7.c                (data alignment on 16-byte boundaries) *    CreatePlan7Matrix()   core_algorithms.c      (data alignment on 16-byte boundaries) *    ResizePlan7Matrix()   core_algorithms.c      (data alignment on 16-byte boundaries) *    P7Viterbi()           core_algorithms.c      (total recode, w/ Altivec instructions) ################################################################*/    voidAllocPlan7Body(struct plan7_s *hmm, int M) {  int k, x;  hmm->M = M;  hmm->rf     = MallocOrDie ((M+2) * sizeof(char));  hmm->cs     = MallocOrDie ((M+2) * sizeof(char));  hmm->ca     = MallocOrDie ((M+2) * sizeof(char));  hmm->map    = MallocOrDie ((M+1) * sizeof(int));  hmm->t      = MallocOrDie (M     *           sizeof(float *));  hmm->tsc    = MallocOrDie (7     *           sizeof(int *));  hmm->mat    = MallocOrDie ((M+1) *           sizeof(float *));  hmm->ins    = MallocOrDie (M     *           sizeof(float *));  hmm->msc    = MallocOrDie (MAXCODE   *       sizeof(int *));  hmm->isc    = MallocOrDie (MAXCODE   *       sizeof(int *));     hmm->t[0]   = MallocOrDie ((7*M)     *       sizeof(float));  /* Allocate extra memory so tsc[TMM,TIM,TDM,TMD,TDD] start on the    * 16-byte cache boundary, and tsc[TMI,TII] start   * 12 bytes offset from the boundary.    */  hmm->tsc_mem = MallocOrDie (((7*(M+16)))  *   sizeof(int));  hmm->mat[0] = MallocOrDie ((MAXABET*(M+1)) * sizeof(float));  hmm->ins[0] = MallocOrDie ((MAXABET*M) *     sizeof(float));  /* Allocate extra mem. to make sure all members of msc,isc start   * on 12-byte offsets from cache boundary.   */  hmm->msc_mem = MallocOrDie ((MAXCODE*(M+1+16)) * sizeof(int));  hmm->isc_mem = MallocOrDie ((MAXCODE*(M+16)) *   sizeof(int));  /* note allocation strategy for important 2D arrays -- trying   * to keep locality as much as possible, cache efficiency etc.   */  for (k = 1; k <= M; k++) {    hmm->mat[k] = hmm->mat[0] + k * MAXABET;    if (k < M) {      hmm->ins[k] = hmm->ins[0] + k * MAXABET;      hmm->t[k]   = hmm->t[0]   + k * 7;    }  }    /* align tsc pointers */  hmm->tsc[TMM] = (int *) (((((unsigned long int) hmm->tsc_mem) + 15) & (~0xf)));  hmm->tsc[TMI] = (int *) (((((unsigned long int) hmm->tsc_mem) + (M+12)*sizeof(int) + 15) & (~0xf)) + 12);  hmm->tsc[TMD] = (int *) (((((unsigned long int) hmm->tsc_mem) + 2*(M+12)*sizeof(int) + 15) & (~0xf)));  hmm->tsc[TIM] = (int *) (((((unsigned long int) hmm->tsc_mem) + 3*(M+12)*sizeof(int) + 15) & (~0xf)));  hmm->tsc[TII] = (int *) (((((unsigned long int) hmm->tsc_mem) + 4*(M+12)*sizeof(int) + 15) & (~0xf)) + 12);  hmm->tsc[TDM] = (int *) (((((unsigned long int) hmm->tsc_mem) + 5*(M+12)*sizeof(int) + 15) & (~0xf)));  hmm->tsc[TDD] = (int *) (((((unsigned long int) hmm->tsc_mem) + 6*(M+12)*sizeof(int) + 15) & (~0xf)));  for (x = 0; x < MAXCODE; x++) {    hmm->msc[x] = (int *) (((((unsigned long int)hmm->msc_mem) + x*(M+1+12)*sizeof(int) + 15) & (~0xf)) + 12);    hmm->isc[x] = (int *) (((((unsigned long int)hmm->isc_mem) + x*(M+12)*sizeof(int) + 15) & (~0xf)) + 12);  }  /* tsc[0] is used as a boundary condition sometimes [Viterbi()],   * so set to -inf always.   */  for (x = 0; x < 7; x++)    hmm->tsc[x][0] = -INFTY;  hmm->begin  = MallocOrDie  ((M+1) * sizeof(float));  hmm->bsc_mem= MallocOrDie  ((M+1+12) * sizeof(int));  hmm->end    = MallocOrDie  ((M+1) * sizeof(float));  hmm->esc_mem= MallocOrDie  ((M+1+12) * sizeof(int));  hmm->bsc = (int *) (((((unsigned long int) hmm->bsc_mem) + 15) & (~0xf)) + 12);  hmm->esc = (int *) (((((unsigned long int) hmm->esc_mem) + 15) & (~0xf)) + 12);    return;}  /* Function: CreatePlan7Matrix() *  * Purpose:  Create a dynamic programming matrix for standard Forward, *           Backward, or Viterbi, with scores kept as scaled log-odds *           integers. Keeps 2D arrays compact in RAM in an attempt  *           to maximize cache hits. *            *           The mx structure can be dynamically grown, if a new *           HMM or seq exceeds the currently allocated size. Dynamic *           growing is more efficient than an alloc/free of a whole *           matrix for every new target. The ResizePlan7Matrix() *           call does this reallocation, if needed. Here, in the *           creation step, we set up some pads - to inform the resizing *           call how much to overallocate when it realloc's. If a pad *           is zero, we will not resize in that dimension. *            * Args:     N     - N+1 rows are allocated, for sequence.   *           M     - size of model in nodes *           padN  - over-realloc in seq/row dimension, or 0 *           padM  - over-realloc in HMM/column dimension, or 0 *                  * Return:   mx *           mx is allocated here. Caller frees with FreePlan7Matrix(mx). */struct dpmatrix_s *CreatePlan7Matrix(int N, int M, int padN, int padM){  struct dpmatrix_s *mx;  int i,n;  mx         = (struct dpmatrix_s *) MallocOrDie (sizeof(struct dpmatrix_s));  mx->xmx    = (int **) MallocOrDie (sizeof(int *) * (N+1));  mx->mmx    = (int **) MallocOrDie (sizeof(int *) * (N+1));  mx->imx    = (int **) MallocOrDie (sizeof(int *) * (N+1));  mx->dmx    = (int **) MallocOrDie (sizeof(int *) * (N+1));  /* For the memory accessed by the altivec routines, we want to have   * accesses aligned to 16-byte boundaries as far as possible.   * To accomplish this, we align extra memory and then set the first   * pointer on each row to point to 4 bytes before a boundary.   * This means element 1, which is the first one we work on, will be   * on a 16-byte boundary. We still make sure we 'own' the three bytes   * before, though, so we can load the vector with element 0 cache-aligned too.   * The real pointers to memory are kept in xmx_mem,mmx_mem,imx_mem,dmx_mem.   */  mx->xmx_mem = (void *) MallocOrDie (sizeof(int) * (N+1)*(5 + 16));  mx->mmx_mem = (void *) MallocOrDie (sizeof(int) * (N+1)*(M+2+16));  mx->imx_mem = (void *) MallocOrDie (sizeof(int) * (N+1)*(M+2+16));  mx->dmx_mem = (void *) MallocOrDie (sizeof(int) * (N+1)*(M+2+16));    mx->xmx[0] = (int *) (((((unsigned long int) mx->xmx_mem) + 15) & (~0xf)) + 12);  mx->mmx[0] = (int *) (((((unsigned long int) mx->mmx_mem) + 15) & (~0xf)) + 12);  mx->imx[0] = (int *) (((((unsigned long int) mx->imx_mem) + 15) & (~0xf)) + 12);  mx->dmx[0] = (int *) (((((unsigned long int) mx->dmx_mem) + 15) & (~0xf)) + 12);    /* And make sure the beginning of each row is aligned the same way */  for (i = 1; i <= N; i++)    {      mx->xmx[i] = mx->xmx[0] + i*(5+11) ; /* add 11 bytes per row, making it divisible by 4 */      n = 12 - (M+2)%4;      mx->mmx[i] = mx->mmx[0] + i*(M+2+n);      mx->imx[i] = mx->imx[0] + i*(M+2+n);      mx->dmx[i] = mx->dmx[0] + i*(M+2+n);    }   mx->maxN = N;  mx->maxM = M;  mx->padN = padN;  mx->padM = padM;    return mx;}/* Function: ResizePlan7Matrix() *  * Purpose:  Reallocate a dynamic programming matrix, if necessary, *           for a problem of NxM: sequence length N, model size M. *           (N=1 for small memory score-only variants; we allocate *           N+1 rows in the DP matrix.) 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?