fast_algorithms.c
来自「这是一个基于HMM 模型的生物多序列比对算法的linux实现版本。hmmer」· C语言 代码 · 共 1,087 行 · 第 1/3 页
C
1,087 行
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-2003 Washington University School of Medicine * All Rights Reserved * * This source code is distributed under the terms of the * GNU General Public License. See the files COPYING and LICENSE * for details. ************************************************************//* fast_algorithms.c * SRE, Sun Nov 10 08:54:48 2002 [AA 3080, Denver to StL] * CVS $Id: fast_algorithms.c,v 1.9 2003/10/02 16:39:41 eddy Exp $ * * Optimized routines to replace slower implementations in core_algorithms.c. * * The routines in core_algorithms.c are designed for clarity * and maintainability, not for speed. Implementations here * are designed for speed, not clarity. If you're trying to * understand the code, or optimize for a specific platform, * you are probably better off looking at core_algorithms.c. * * P7Viterbi() is the key function to target optimization to. * The implementation in core_algorithms.c is currently ifdef'ed * out of the code. The implementation that is used by default * is here, in fast_algorithms.c. A third implementation, from * Erik Lindahl at Stanford, is Mac/Altivec specific. * * Which implementation is used is controlled by ifdef's. The * default implementation uses a fast implementation of * P7Viterbi() from here. Other options (mutually exclusive): * * -DSLOW * enable original core_algorithms.c code: slower than default, * but might be easier to follow, for someone trying * to understand the DP code. * -DALTIVEC * enable Erik Lindahl's Altivec code for Macintosh OSX */#include "config.h"#include "squidconf.h"#include "structs.h"#include "funcs.h"#include "squid.h"/* the DEFAULT P7Viterbi() is portably optimized; code follows: */#if !defined ALTIVEC && !defined SLOW/* Function: P7Viterbi() - portably optimized version * Incept: SRE, Fri Nov 15 13:14:33 2002 [St. Louis] * * Purpose: The Viterbi dynamic programming algorithm. * Derived from core_algorithms.c:P7Viterbi(). * * Args: dsq - sequence in digitized form * L - length of dsq * hmm - the model * mx - re-used DP matrix * ret_tr - RETURN: traceback; pass NULL if it's not wanted * * Return: log P(S|M)/P(S|R), as a bit score */floatP7Viterbi(unsigned char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s *mx, struct p7trace_s **ret_tr){ struct p7trace_s *tr; int **xmx; int **mmx; int **imx; int **dmx; int i,k; int sc; int *mc, *dc, *ic; /* pointers to rows of mmx, dmx, imx */ int *ms, *is; /* pointers to msc[i], isc[i] */ int *mpp, *mpc, *ip; /* ptrs to mmx[i-1], mmx[i], imx[i-1] */ int *bp; /* ptr into bsc[] */ int *ep; /* ptr into esc[] */ int xmb; /* value of xmx[i-1][XMB] */ int xme; /* max for xmx[i][XME] */ int *dpp; /* ptr into dmx[i-1] (previous row) */ int *tpmm, *tpmi, *tpmd, *tpim, *tpii, *tpdm, *tpdd; /* ptrs into tsc */ int M; /* Make sure we have space for a DP matrix with 0..L rows, 0..M-1 columns. */ ResizePlan7Matrix(mx, L, hmm->M, &xmx, &mmx, &imx, &dmx); /* Initialization of the zero row. */ xmx[0][XMN] = 0; /* S->N, p=1 */ xmx[0][XMB] = hmm->xsc[XTN][MOVE]; /* S->N->B, no N-tail */ xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; /* need seq to get here */ for (k = 0; k <= hmm->M; k++) mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; /* need seq to get here */ /* Initializations that help icc vectorize. */ M = hmm->M; /* Recursion. Done as a pull. * Note some slightly wasteful boundary conditions: * tsc[0] = -INFTY for all eight transitions (no node 0) * D_M and I_M are wastefully calculated (they don't exist) */ tpmm = hmm->tsc[TMM]; tpim = hmm->tsc[TIM]; tpdm = hmm->tsc[TDM]; tpmd = hmm->tsc[TMD]; tpdd = hmm->tsc[TDD]; tpmi = hmm->tsc[TMI]; tpii = hmm->tsc[TII]; bp = hmm->bsc; for (i = 1; i <= L; i++) { mc = mmx[i]; dc = dmx[i]; ic = imx[i]; mpp = mmx[i-1]; dpp = dmx[i-1]; ip = imx[i-1]; xmb = xmx[i-1][XMB]; ms = hmm->msc[dsq[i]]; is = hmm->isc[dsq[i]]; mc[0] = -INFTY; dc[0] = -INFTY; ic[0] = -INFTY; for (k = 1; k <= M; k++) { mc[k] = mpp[k-1] + tpmm[k-1]; if ((sc = ip[k-1] + tpim[k-1]) > mc[k]) mc[k] = sc; if ((sc = dpp[k-1] + tpdm[k-1]) > mc[k]) mc[k] = sc; if ((sc = xmb + bp[k]) > mc[k]) mc[k] = sc; mc[k] += ms[k]; if (mc[k] < -INFTY) mc[k] = -INFTY; dc[k] = dc[k-1] + tpdd[k-1]; if ((sc = mc[k-1] + tpmd[k-1]) > dc[k]) dc[k] = sc; if (dc[k] < -INFTY) dc[k] = -INFTY; if (k < M) { ic[k] = mpp[k] + tpmi[k]; if ((sc = ip[k] + tpii[k]) > ic[k]) ic[k] = sc; ic[k] += is[k]; if (ic[k] < -INFTY) ic[k] = -INFTY; } } /* Now the special states. Order is important here. * remember, C and J emissions are zero score by definition, */ /* N state */ xmx[i][XMN] = -INFTY; if ((sc = xmx[i-1][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY) xmx[i][XMN] = sc; /* E state */ xme = -INFTY; mpc = mmx[i]; ep = hmm->esc; for (k = 1; k <= hmm->M; k++) if ((sc = mpc[k] + ep[k]) > xme) xme = sc; xmx[i][XME] = xme; /* J state */ xmx[i][XMJ] = -INFTY; if ((sc = xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP]) > -INFTY) xmx[i][XMJ] = sc; if ((sc = xmx[i][XME] + hmm->xsc[XTE][LOOP]) > xmx[i][XMJ]) xmx[i][XMJ] = sc; /* B state */ xmx[i][XMB] = -INFTY; if ((sc = xmx[i][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY) xmx[i][XMB] = sc; if ((sc = xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]) > xmx[i][XMB]) xmx[i][XMB] = sc; /* C state */ xmx[i][XMC] = -INFTY; if ((sc = xmx[i-1][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY) xmx[i][XMC] = sc; if ((sc = xmx[i][XME] + hmm->xsc[XTE][MOVE]) > xmx[i][XMC]) xmx[i][XMC] = sc; } /* T state (not stored) */ sc = xmx[L][XMC] + hmm->xsc[XTC][MOVE]; if (ret_tr != NULL) { P7ViterbiTrace(hmm, dsq, L, mx, &tr); *ret_tr = tr; } return Scorify(sc); /* the total Viterbi score. */}#endif /*default P7Viterbi, used when ALTIVEC and SLOW are not defined*/#ifdef ALTIVEC/*################################################################ * The Altivec port, for Macintosh PowerPC. * Erik Lindahl, Stanford University, 2002. * * Replaces the following functions: * AllocPlan7Body() plan7.c (data alignment on 16-byte boundaries) * CreatePlan7Matrix() core_algorithms.c (data alignment on 16-byte boundaries) * ResizePlan7Matrix() core_algorithms.c (data alignment on 16-byte boundaries) * P7Viterbi() core_algorithms.c (total recode, w/ Altivec instructions) ################################################################*/ voidAllocPlan7Body(struct plan7_s *hmm, int M) { int k, x; hmm->M = M; hmm->rf = MallocOrDie ((M+2) * sizeof(char)); hmm->cs = MallocOrDie ((M+2) * sizeof(char)); hmm->ca = MallocOrDie ((M+2) * sizeof(char)); hmm->map = MallocOrDie ((M+1) * sizeof(int)); hmm->t = MallocOrDie (M * sizeof(float *)); hmm->tsc = MallocOrDie (7 * sizeof(int *)); hmm->mat = MallocOrDie ((M+1) * sizeof(float *)); hmm->ins = MallocOrDie (M * sizeof(float *)); hmm->msc = MallocOrDie (MAXCODE * sizeof(int *)); hmm->isc = MallocOrDie (MAXCODE * sizeof(int *)); hmm->t[0] = MallocOrDie ((7*M) * sizeof(float)); /* Allocate extra memory so tsc[TMM,TIM,TDM,TMD,TDD] start on the * 16-byte cache boundary, and tsc[TMI,TII] start * 12 bytes offset from the boundary. */ hmm->tsc_mem = MallocOrDie (((7*(M+16))) * sizeof(int)); hmm->mat[0] = MallocOrDie ((MAXABET*(M+1)) * sizeof(float)); hmm->ins[0] = MallocOrDie ((MAXABET*M) * sizeof(float)); /* Allocate extra mem. to make sure all members of msc,isc start * on 12-byte offsets from cache boundary. */ hmm->msc_mem = MallocOrDie ((MAXCODE*(M+1+16)) * sizeof(int)); hmm->isc_mem = MallocOrDie ((MAXCODE*(M+16)) * sizeof(int)); /* note allocation strategy for important 2D arrays -- trying * to keep locality as much as possible, cache efficiency etc. */ for (k = 1; k <= M; k++) { hmm->mat[k] = hmm->mat[0] + k * MAXABET; if (k < M) { hmm->ins[k] = hmm->ins[0] + k * MAXABET; hmm->t[k] = hmm->t[0] + k * 7; } } /* align tsc pointers */ hmm->tsc[TMM] = (int *) (((((unsigned long int) hmm->tsc_mem) + 15) & (~0xf))); hmm->tsc[TMI] = (int *) (((((unsigned long int) hmm->tsc_mem) + (M+12)*sizeof(int) + 15) & (~0xf)) + 12); hmm->tsc[TMD] = (int *) (((((unsigned long int) hmm->tsc_mem) + 2*(M+12)*sizeof(int) + 15) & (~0xf))); hmm->tsc[TIM] = (int *) (((((unsigned long int) hmm->tsc_mem) + 3*(M+12)*sizeof(int) + 15) & (~0xf))); hmm->tsc[TII] = (int *) (((((unsigned long int) hmm->tsc_mem) + 4*(M+12)*sizeof(int) + 15) & (~0xf)) + 12); hmm->tsc[TDM] = (int *) (((((unsigned long int) hmm->tsc_mem) + 5*(M+12)*sizeof(int) + 15) & (~0xf))); hmm->tsc[TDD] = (int *) (((((unsigned long int) hmm->tsc_mem) + 6*(M+12)*sizeof(int) + 15) & (~0xf))); for (x = 0; x < MAXCODE; x++) { hmm->msc[x] = (int *) (((((unsigned long int)hmm->msc_mem) + x*(M+1+12)*sizeof(int) + 15) & (~0xf)) + 12); hmm->isc[x] = (int *) (((((unsigned long int)hmm->isc_mem) + x*(M+12)*sizeof(int) + 15) & (~0xf)) + 12); } /* tsc[0] is used as a boundary condition sometimes [Viterbi()], * so set to -inf always. */ for (x = 0; x < 7; x++) hmm->tsc[x][0] = -INFTY; hmm->begin = MallocOrDie ((M+1) * sizeof(float)); hmm->bsc_mem= MallocOrDie ((M+1+12) * sizeof(int)); hmm->end = MallocOrDie ((M+1) * sizeof(float)); hmm->esc_mem= MallocOrDie ((M+1+12) * sizeof(int)); hmm->bsc = (int *) (((((unsigned long int) hmm->bsc_mem) + 15) & (~0xf)) + 12); hmm->esc = (int *) (((((unsigned long int) hmm->esc_mem) + 15) & (~0xf)) + 12); return;} /* Function: CreatePlan7Matrix() * * Purpose: Create a dynamic programming matrix for standard Forward, * Backward, or Viterbi, with scores kept as scaled log-odds * integers. Keeps 2D arrays compact in RAM in an attempt * to maximize cache hits. * * The mx structure can be dynamically grown, if a new * HMM or seq exceeds the currently allocated size. Dynamic * growing is more efficient than an alloc/free of a whole * matrix for every new target. The ResizePlan7Matrix() * call does this reallocation, if needed. Here, in the * creation step, we set up some pads - to inform the resizing * call how much to overallocate when it realloc's. If a pad * is zero, we will not resize in that dimension. * * Args: N - N+1 rows are allocated, for sequence. * M - size of model in nodes * padN - over-realloc in seq/row dimension, or 0 * padM - over-realloc in HMM/column dimension, or 0 * * Return: mx * mx is allocated here. Caller frees with FreePlan7Matrix(mx). */struct dpmatrix_s *CreatePlan7Matrix(int N, int M, int padN, int padM){ struct dpmatrix_s *mx; int i,n; mx = (struct dpmatrix_s *) MallocOrDie (sizeof(struct dpmatrix_s)); mx->xmx = (int **) MallocOrDie (sizeof(int *) * (N+1)); mx->mmx = (int **) MallocOrDie (sizeof(int *) * (N+1)); mx->imx = (int **) MallocOrDie (sizeof(int *) * (N+1)); mx->dmx = (int **) MallocOrDie (sizeof(int *) * (N+1)); /* For the memory accessed by the altivec routines, we want to have * accesses aligned to 16-byte boundaries as far as possible. * To accomplish this, we align extra memory and then set the first * pointer on each row to point to 4 bytes before a boundary. * This means element 1, which is the first one we work on, will be * on a 16-byte boundary. We still make sure we 'own' the three bytes * before, though, so we can load the vector with element 0 cache-aligned too. * The real pointers to memory are kept in xmx_mem,mmx_mem,imx_mem,dmx_mem. */ mx->xmx_mem = (void *) MallocOrDie (sizeof(int) * (N+1)*(5 + 16)); mx->mmx_mem = (void *) MallocOrDie (sizeof(int) * (N+1)*(M+2+16)); mx->imx_mem = (void *) MallocOrDie (sizeof(int) * (N+1)*(M+2+16)); mx->dmx_mem = (void *) MallocOrDie (sizeof(int) * (N+1)*(M+2+16)); mx->xmx[0] = (int *) (((((unsigned long int) mx->xmx_mem) + 15) & (~0xf)) + 12); mx->mmx[0] = (int *) (((((unsigned long int) mx->mmx_mem) + 15) & (~0xf)) + 12); mx->imx[0] = (int *) (((((unsigned long int) mx->imx_mem) + 15) & (~0xf)) + 12); mx->dmx[0] = (int *) (((((unsigned long int) mx->dmx_mem) + 15) & (~0xf)) + 12); /* And make sure the beginning of each row is aligned the same way */ for (i = 1; i <= N; i++) { mx->xmx[i] = mx->xmx[0] + i*(5+11) ; /* add 11 bytes per row, making it divisible by 4 */ n = 12 - (M+2)%4; mx->mmx[i] = mx->mmx[0] + i*(M+2+n); mx->imx[i] = mx->imx[0] + i*(M+2+n); mx->dmx[i] = mx->dmx[0] + i*(M+2+n); } mx->maxN = N; mx->maxM = M; mx->padN = padN; mx->padM = padM; return mx;}/* Function: ResizePlan7Matrix() * * Purpose: Reallocate a dynamic programming matrix, if necessary, * for a problem of NxM: sequence length N, model size M. * (N=1 for small memory score-only variants; we allocate * N+1 rows in the DP matrix.)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?