core_algorithms.c
来自「这是一个基于HMM 模型的生物多序列比对算法的linux实现版本。hmmer」· C语言 代码 · 共 1,965 行 · 第 1/5 页
C
1,965 行
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-2003 Washington University School of Medicine * All Rights Reserved * * This source code is distributed under the terms of the * GNU General Public License. See the files COPYING and LICENSE * for details. ************************************************************//* core_algorithms.c * SRE, Mon Nov 11 15:58:52 1996 * CVS $Id: core_algorithms.c,v 1.35 2003/10/03 18:26:37 eddy Exp $ * * Simple and robust "research" implementations of Forward, Backward, * and Viterbi for Plan7. For optimized replacements for some of these functions, * see fast_algorithms.c. */#include "config.h"#include "squidconf.h"#include "structs.h"#include "funcs.h"#include "squid.h"#include <string.h>#include <assert.h>static float get_wee_midpt(struct plan7_s *hmm, unsigned char *dsq, int L, int k1, char t1, int s1, int k3, char t3, int s3, int *ret_k2, char *ret_t2, int *ret_s2);#ifndef ALTIVEC/* Function: CreatePlan7Matrix() * * Purpose: Create a dynamic programming matrix for standard Forward, * Backward, or Viterbi, with scores kept as scaled log-odds * integers. Keeps 2D arrays compact in RAM in an attempt * to maximize cache hits. * * The mx structure can be dynamically grown, if a new * HMM or seq exceeds the currently allocated size. Dynamic * growing is more efficient than an alloc/free of a whole * matrix for every new target. The ResizePlan7Matrix() * call does this reallocation, if needed. Here, in the * creation step, we set up some pads - to inform the resizing * call how much to overallocate when it realloc's. * * Args: N - N+1 rows are allocated, for sequence. * M - size of model in nodes * padN - over-realloc in seq/row dimension, or 0 * padM - over-realloc in HMM/column dimension, or 0 * * Return: mx * mx is allocated here. Caller frees with FreePlan7Matrix(mx). */struct dpmatrix_s *CreatePlan7Matrix(int N, int M, int padN, int padM){ struct dpmatrix_s *mx; int i; mx = (struct dpmatrix_s *) MallocOrDie (sizeof(struct dpmatrix_s)); mx->xmx = (int **) MallocOrDie (sizeof(int *) * (N+1)); mx->mmx = (int **) MallocOrDie (sizeof(int *) * (N+1)); mx->imx = (int **) MallocOrDie (sizeof(int *) * (N+1)); mx->dmx = (int **) MallocOrDie (sizeof(int *) * (N+1)); mx->xmx_mem = (void *) MallocOrDie (sizeof(int) * ((N+1)*5)); mx->mmx_mem = (void *) MallocOrDie (sizeof(int) * ((N+1)*(M+2))); mx->imx_mem = (void *) MallocOrDie (sizeof(int) * ((N+1)*(M+2))); mx->dmx_mem = (void *) MallocOrDie (sizeof(int) * ((N+1)*(M+2))); /* The indirect assignment below looks wasteful; it's actually * used for aligning data on 16-byte boundaries as a cache * optimization in the fast altivec implementation */ mx->xmx[0] = (int *) mx->xmx_mem; mx->mmx[0] = (int *) mx->mmx_mem; mx->imx[0] = (int *) mx->imx_mem; mx->dmx[0] = (int *) mx->dmx_mem; for (i = 1; i <= N; i++) { mx->xmx[i] = mx->xmx[0] + (i*5); mx->mmx[i] = mx->mmx[0] + (i*(M+2)); mx->imx[i] = mx->imx[0] + (i*(M+2)); mx->dmx[i] = mx->dmx[0] + (i*(M+2)); } mx->maxN = N; mx->maxM = M; mx->padN = padN; mx->padM = padM; return mx;}#endif /*ALTIVEC*/#ifndef ALTIVEC/* Function: ResizePlan7Matrix() * * Purpose: Reallocate a dynamic programming matrix, if necessary, * for a problem of NxM: sequence length N, model size M. * (N=1 for small memory score-only variants; we allocate * N+1 rows in the DP matrix.) * * We know (because of the way hmmsearch and hmmpfam are coded) * that only one of the two dimensions is going to change * in size after the first call to ResizePlan7Matrix(); * that is, for hmmsearch, we have one HMM of fixed size M * and our target sequences may grow in N; for hmmpfam, * we have one sequence of fixed size N and our target models * may grow in M. What we have to watch out for is P7SmallViterbi() * working on a divide and conquer problem and passing us N < maxN, * M > maxM; we should definitely *not* reallocate a smaller N. * Since we know that only one dimension is going to grow, * we aren't scared of reallocating to maxN,maxM. (If both * M and N could grow, we would be more worried.) * * Returns individual ptrs to the four matrix components * as a convenience. * * Args: mx - an already allocated model to grow. * N - seq length to allocate for; N+1 rows * M - size of model * xmx, mmx, imx, dmx * - RETURN: ptrs to four mx components as a convenience * * Return: (void) * mx is (re)allocated here. */voidResizePlan7Matrix(struct dpmatrix_s *mx, int N, int M, int ***xmx, int ***mmx, int ***imx, int ***dmx){ int i; if (N <= mx->maxN && M <= mx->maxM) goto DONE; if (N > mx->maxN) { N += mx->padN; mx->maxN = N; mx->xmx = (int **) ReallocOrDie (mx->xmx, sizeof(int *) * (mx->maxN+1)); mx->mmx = (int **) ReallocOrDie (mx->mmx, sizeof(int *) * (mx->maxN+1)); mx->imx = (int **) ReallocOrDie (mx->imx, sizeof(int *) * (mx->maxN+1)); mx->dmx = (int **) ReallocOrDie (mx->dmx, sizeof(int *) * (mx->maxN+1)); } if (M > mx->maxM) { M += mx->padM; mx->maxM = M; } mx->xmx_mem = (void *) ReallocOrDie (mx->xmx_mem, sizeof(int) * ((mx->maxN+1)*5)); mx->mmx_mem = (void *) ReallocOrDie (mx->mmx_mem, sizeof(int) * ((mx->maxN+1)*(mx->maxM+2))); mx->imx_mem = (void *) ReallocOrDie (mx->imx_mem, sizeof(int) * ((mx->maxN+1)*(mx->maxM+2))); mx->dmx_mem = (void *) ReallocOrDie (mx->dmx_mem, sizeof(int) * ((mx->maxN+1)*(mx->maxM+2))); mx->xmx[0] = (int *) mx->xmx_mem; mx->mmx[0] = (int *) mx->mmx_mem; mx->imx[0] = (int *) mx->imx_mem; mx->dmx[0] = (int *) mx->dmx_mem; for (i = 1; i <= mx->maxN; i++) { mx->xmx[i] = mx->xmx[0] + (i*5); mx->mmx[i] = mx->mmx[0] + (i*(mx->maxM+2)); mx->imx[i] = mx->imx[0] + (i*(mx->maxM+2)); mx->dmx[i] = mx->dmx[0] + (i*(mx->maxM+2)); } DONE: if (xmx != NULL) *xmx = mx->xmx; if (mmx != NULL) *mmx = mx->mmx; if (imx != NULL) *imx = mx->imx; if (dmx != NULL) *dmx = mx->dmx;}#endif /*ALTIVEC*//* Function: AllocPlan7Matrix() * Date: SRE, Tue Nov 19 07:14:47 2002 [St. Louis] * * Purpose: Used to be the main allocator for dp matrices; we used to * allocate, calculate, free. But this spent a lot of time * in malloc(). Replaced with Create..() and Resize..() to * allow matrix reuse in P7Viterbi(), the main alignment * engine. But matrices are alloc'ed by other alignment engines * too, ones that are less frequently called and less * important to optimization of cpu performance. Instead of * tracking changes through them, for now, provide * an Alloc...() call with the same API that's just a wrapper. * * Args: rows - generally L+1, or 2; # of DP rows in seq dimension to alloc * M - size of model, in nodes * xmx, mmx, imx, dmx * - RETURN: ptrs to four mx components as a convenience * * Returns: mx * Caller free's w/ FreePlan7Matrix() */struct dpmatrix_s *AllocPlan7Matrix(int rows, int M, int ***xmx, int ***mmx, int ***imx, int ***dmx){ struct dpmatrix_s *mx; mx = CreatePlan7Matrix(rows-1, M, 0, 0); if (xmx != NULL) *xmx = mx->xmx; if (mmx != NULL) *mmx = mx->mmx; if (imx != NULL) *imx = mx->imx; if (dmx != NULL) *dmx = mx->dmx; return mx;}/* Function: FreePlan7Matrix() * * Purpose: Free a dynamic programming matrix allocated by CreatePlan7Matrix(). * * Return: (void) */voidFreePlan7Matrix(struct dpmatrix_s *mx){ free (mx->xmx_mem); free (mx->mmx_mem); free (mx->imx_mem); free (mx->dmx_mem); free (mx->xmx); free (mx->mmx); free (mx->imx); free (mx->dmx); free (mx);}/* Function: AllocShadowMatrix() * * Purpose: Allocate a dynamic programming traceback pointer matrix for * a Viterbi algorithm. * * Args: rows - number of rows to allocate; typically L+1 * M - size of model * xtb, mtb, itb, dtb * - RETURN: ptrs to four mx components as a convenience * * Return: mx * mx is allocated here. Caller frees with FreeDPMatrix(mx). */struct dpshadow_s *AllocShadowMatrix(int rows, int M, char ***xtb, char ***mtb, char ***itb, char ***dtb){ struct dpshadow_s *tb; int i; tb = (struct dpshadow_s *) MallocOrDie (sizeof(struct dpshadow_s)); tb->xtb = (char **) MallocOrDie (sizeof(char *) * rows); tb->mtb = (char **) MallocOrDie (sizeof(char *) * rows); tb->itb = (char **) MallocOrDie (sizeof(char *) * rows); tb->dtb = (char **) MallocOrDie (sizeof(char *) * rows); tb->esrc = (int *) MallocOrDie (sizeof(int) * rows); tb->xtb[0] = (char *) MallocOrDie (sizeof(char) * (rows*5)); tb->mtb[0] = (char *) MallocOrDie (sizeof(char) * (rows*(M+2))); tb->itb[0] = (char *) MallocOrDie (sizeof(char) * (rows*(M+2))); tb->dtb[0] = (char *) MallocOrDie (sizeof(char) * (rows*(M+2))); for (i = 1; i < rows; i++) { tb->xtb[i] = tb->xtb[0] + (i*5); tb->mtb[i] = tb->mtb[0] + (i*(M+2)); tb->itb[i] = tb->itb[0] + (i*(M+2)); tb->dtb[i] = tb->dtb[0] + (i*(M+2)); } if (xtb != NULL) *xtb = tb->xtb; if (mtb != NULL) *mtb = tb->mtb; if (itb != NULL) *itb = tb->itb; if (dtb != NULL) *dtb = tb->dtb; return tb;}/* Function: FreeShadowMatrix() * * Purpose: Free a dynamic programming matrix allocated by AllocShadowMatrix(). * * Return: (void) */voidFreeShadowMatrix(struct dpshadow_s *tb){ free (tb->xtb[0]); free (tb->mtb[0]); free (tb->itb[0]); free (tb->dtb[0]); free (tb->esrc); free (tb->xtb); free (tb->mtb); free (tb->itb); free (tb->dtb); free (tb);}/* Function: P7ViterbiSpaceOK() * Incept: SRE, Wed Oct 1 12:53:13 2003 [St. Louis] * * Purpose: Returns TRUE if the existing matrix allocation * is already sufficient to hold the requested MxN, or if * the matrix can be expanded in M and/or N without * exceeding RAMLIMIT megabytes. * * This gets called anytime we're about to do P7Viterbi(). * If it returns FALSE, we switch into the appropriate * small-memory alternative: P7SmallViterbi() or * P7WeeViterbi(). * * Checking the DP problem size against P7ViterbiSize() * is not enough, because the DP matrix may be already * allocated in MxN. For example, if we're already * allocated to maxM,maxN of 1447x979, and we try to * do a problem of MxN=12x125000, P7ViterbiSize() may * think that's fine - but when we resize, we can only * grow, so we'll expand to 1447x125000, which is * likely over the RAMLIMIT. [bug#h26; xref SLT7 p.122] * * Args: * * Returns: TRUE if we can run P7Viterbi(); FALSE if we need * to use a small memory variant. * * Xref: STL7 p.122. */intP7ViterbiSpaceOK(int L, int M, struct dpmatrix_s *mx){ int newM; int newN; if (M > mx->maxM) newM = M + mx->padM; else newM = mx->maxM; if (L > mx->maxN) newN = L + mx->padN; else newN = mx->maxN; if (P7ViterbiSize(newN, newM) <= RAMLIMIT) return TRUE; else return FALSE;}/* Function: P7ViterbiSize() * Date: SRE, Fri Mar 6 15:13:20 1998 [St. Louis] * * Purpose: Returns the ballpark predicted memory requirement for a * P7Viterbi() alignment, in MB. * * Currently L must fit in an int (< 2 GB), but we have * to deal with LM > 2 GB - e.g. watch out for overflow, do * the whole calculation in floating point. Bug here detected * in 2.1.1 by David Harper, Sanger Centre. * * Args: L - length of sequence * M - length of HMM * * Returns: # of MB */intP7ViterbiSize(int L, int M){ float Mbytes; /* We're excessively precise here, but it doesn't cost * us anything to be pedantic. The four terms are: * 1. the matrix structure itself; * 2. the O(NM) main matrix (this dominates!) * 3. ptrs into the rows of the matrix * 4. storage for 5 special states. (xmx) */ Mbytes = (float) sizeof(struct dpmatrix_s); Mbytes += 3. * (float) (L+1) * (float) (M+2) * (float) sizeof(int); Mbytes += 4. * (float) (L+1) * (float) sizeof(int *); Mbytes += 5. * (float) (L+1) * (float) sizeof(int); Mbytes /= 1048576.; return (int) Mbytes;}/* Function: P7SmallViterbiSize() * Date: SRE, Fri Mar 6 15:20:04 1998 [St. Louis] * * Purpose: Returns the ballpark predicted memory requirement for * a P7SmallViterbi() alignment, in MB. * * P7SmallViterbi() is a wrapper, calling both P7ParsingViterbi() * and P7WeeViterbi(). P7ParsingViterbi() typically dominates * the memory requirement, so the value returned * is the P7ParsingViterbi() number. *
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?