core_algorithms.c
来自「这是一个基于HMM 模型的生物多序列比对算法的linux实现版本。hmmer」· C语言 代码 · 共 1,965 行 · 第 1/5 页
C
1,965 行
* We don't (yet) worry about overflow issues like we did with * P7ViterbiSize(). We'll have many other 32-bit int issues in the * code if we overflow here. * * Args: L - length of sequence * M - length of HMM * * Returns: # of MB */intP7SmallViterbiSize(int L, int M){ return ((2 * sizeof(struct dpmatrix_s) + 12 * (M+2) * sizeof(int) + /* 2 matrices w/ 2 rows */ 16 * sizeof(int *) + /* ptrs into rows of matrix */ 20 * sizeof(int) + /* 5 special states */ 2 * (L+1) * sizeof(int)) /* traceback indices */ / 1000000);}/* Function: P7WeeViterbiSize() * Date: SRE, Fri Mar 6 15:40:42 1998 [St. Louis] * * Purpose: Returns the ballpark predicted memory requirement for * a P7WeeViterbi() alignment, in MB. * * Args: L - length of sequence * M - length of HMM * * Returns: # of MB */intP7WeeViterbiSize(int L, int M){ return ((2 * sizeof(struct dpmatrix_s) + 12 * (M+2) * sizeof(int) + /* 2 matrices w/ 2 rows */ 16 * sizeof(int *) + /* ptrs into rows of matrix */ 20 * sizeof(int) + /* 5 special states */ 2 * (L+2) * sizeof(int) + /* stacks for starts/ends (overkill) */ (L+2) * sizeof(int) + /* k assignments to seq positions */ (L+2) * sizeof(char)) /* state assignments to seq pos */ / 1000000);}/* Function: P7Forward() * * Purpose: The Forward dynamic programming algorithm. * The scaling issue is dealt with by working in log space * and calling ILogsum(); this is a slow but robust approach. * * Args: dsq - sequence in digitized form * L - length of dsq * hmm - the model * ret_mx - RETURN: dp matrix; pass NULL if it's not wanted * * Return: log P(S|M)/P(S|R), as a bit score. */floatP7Forward(unsigned char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s **ret_mx){ struct dpmatrix_s *mx; int **xmx; int **mmx; int **imx; int **dmx; int i,k; int sc; /* Allocate a DP matrix with 0..L rows, 0..M-1 columns. */ mx = AllocPlan7Matrix(L+1, hmm->M, &xmx, &mmx, &imx, &dmx); /* Initialization of the zero row. * Note that xmx[i][stN] = 0 by definition for all i, * and xmx[i][stT] = xmx[i][stC], so neither stN nor stT need * to be calculated in DP matrices. */ xmx[0][XMN] = 0; /* S->N, p=1 */ xmx[0][XMB] = hmm->xsc[XTN][MOVE]; /* S->N->B, no N-tail */ xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; /* need seq to get here */ for (k = 0; k <= hmm->M; k++) mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; /* need seq to get here */ /* Recursion. Done as a pull. * Note some slightly wasteful boundary conditions: * tsc[0] = -INFTY for all eight transitions (no node 0) */ for (i = 1; i <= L; i++) { mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY; for (k = 1; k < hmm->M; k++) { mmx[i][k] = ILogsum(ILogsum(mmx[i-1][k-1] + hmm->tsc[TMM][k-1], imx[i-1][k-1] + hmm->tsc[TIM][k-1]), ILogsum(xmx[i-1][XMB] + hmm->bsc[k], dmx[i-1][k-1] + hmm->tsc[TDM][k-1])); mmx[i][k] += hmm->msc[dsq[i]][k]; dmx[i][k] = ILogsum(mmx[i][k-1] + hmm->tsc[TMD][k-1], dmx[i][k-1] + hmm->tsc[TDD][k-1]); imx[i][k] = ILogsum(mmx[i-1][k] + hmm->tsc[TMI][k], imx[i-1][k] + hmm->tsc[TII][k]); imx[i][k] += hmm->isc[dsq[i]][k]; } mmx[i][hmm->M] = ILogsum(ILogsum(mmx[i-1][hmm->M-1] + hmm->tsc[TMM][hmm->M-1], imx[i-1][hmm->M-1] + hmm->tsc[TIM][hmm->M-1]), ILogsum(xmx[i-1][XMB] + hmm->bsc[hmm->M], dmx[i-1][hmm->M-1] + hmm->tsc[TDM][hmm->M-1])); mmx[i][hmm->M] += hmm->msc[dsq[i]][hmm->M]; /* Now the special states. * remember, C and J emissions are zero score by definition */ xmx[i][XMN] = xmx[i-1][XMN] + hmm->xsc[XTN][LOOP]; xmx[i][XME] = -INFTY; for (k = 1; k <= hmm->M; k++) xmx[i][XME] = ILogsum(xmx[i][XME], mmx[i][k] + hmm->esc[k]); xmx[i][XMJ] = ILogsum(xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP], xmx[i][XME] + hmm->xsc[XTE][LOOP]); xmx[i][XMB] = ILogsum(xmx[i][XMN] + hmm->xsc[XTN][MOVE], xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]); xmx[i][XMC] = ILogsum(xmx[i-1][XMC] + hmm->xsc[XTC][LOOP], xmx[i][XME] + hmm->xsc[XTE][MOVE]); } sc = xmx[L][XMC] + hmm->xsc[XTC][MOVE]; if (ret_mx != NULL) *ret_mx = mx; else FreePlan7Matrix(mx); return Scorify(sc); /* the total Forward score. */} #ifdef SLOW/* Function: P7Viterbi() * * Purpose: The Viterbi dynamic programming algorithm. * Identical to Forward() except that max's * replace sum's. * * This is the slower, more understandable version * of P7Viterbi(). The default version in fast_algorithms.c * is portably optimized and more difficult to understand; * the ALTIVEC version in fast_algorithms.c is vectorized * with Altivec-specific code, and is pretty opaque. * * This function is not enabled by default; it is only * activated by -DSLOW at compile time. * * Args: dsq - sequence in digitized form * L - length of dsq * hmm - the model * mx - reused DP matrix * ret_tr - RETURN: traceback; pass NULL if it's not wanted * * Return: log P(S|M)/P(S|R), as a bit score */floatP7Viterbi(unsigned char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s *mx, struct p7trace_s **ret_tr){ struct p7trace_s *tr; int **xmx; int **mmx; int **imx; int **dmx; int i,k; int sc; /* Allocate a DP matrix with 0..L rows, 0..M-1 columns. */ ResizePlan7Matrix(mx, L, hmm->M, &xmx, &mmx, &imx, &dmx); /* Initialization of the zero row. */ xmx[0][XMN] = 0; /* S->N, p=1 */ xmx[0][XMB] = hmm->xsc[XTN][MOVE]; /* S->N->B, no N-tail */ xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; /* need seq to get here */ for (k = 0; k <= hmm->M; k++) mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; /* need seq to get here */ /* Recursion. Done as a pull. * Note some slightly wasteful boundary conditions: * tsc[0] = -INFTY for all eight transitions (no node 0) * D_M and I_M are wastefully calculated (they don't exist) */ for (i = 1; i <= L; i++) { mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY; for (k = 1; k <= hmm->M; k++) { /* match state */ mmx[i][k] = -INFTY; if ((sc = mmx[i-1][k-1] + hmm->tsc[TMM][k-1]) > mmx[i][k]) mmx[i][k] = sc; if ((sc = imx[i-1][k-1] + hmm->tsc[TIM][k-1]) > mmx[i][k]) mmx[i][k] = sc; if ((sc = xmx[i-1][XMB] + hmm->bsc[k]) > mmx[i][k]) mmx[i][k] = sc; if ((sc = dmx[i-1][k-1] + hmm->tsc[TDM][k-1]) > mmx[i][k]) mmx[i][k] = sc; if (hmm->msc[dsq[i]][k] != -INFTY) mmx[i][k] += hmm->msc[dsq[i]][k]; else mmx[i][k] = -INFTY; /* delete state */ dmx[i][k] = -INFTY; if ((sc = mmx[i][k-1] + hmm->tsc[TMD][k-1]) > dmx[i][k]) dmx[i][k] = sc; if ((sc = dmx[i][k-1] + hmm->tsc[TDD][k-1]) > dmx[i][k]) dmx[i][k] = sc; /* insert state */ if (k < hmm->M) { imx[i][k] = -INFTY; if ((sc = mmx[i-1][k] + hmm->tsc[TMI][k]) > imx[i][k]) imx[i][k] = sc; if ((sc = imx[i-1][k] + hmm->tsc[TII][k]) > imx[i][k]) imx[i][k] = sc; if (hmm->isc[dsq[i]][k] != -INFTY) imx[i][k] += hmm->isc[dsq[i]][k]; else imx[i][k] = -INFTY; } } /* Now the special states. Order is important here. * remember, C and J emissions are zero score by definition, */ /* N state */ xmx[i][XMN] = -INFTY; if ((sc = xmx[i-1][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY) xmx[i][XMN] = sc; /* E state */ xmx[i][XME] = -INFTY; for (k = 1; k <= hmm->M; k++) if ((sc = mmx[i][k] + hmm->esc[k]) > xmx[i][XME]) xmx[i][XME] = sc; /* J state */ xmx[i][XMJ] = -INFTY; if ((sc = xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP]) > -INFTY) xmx[i][XMJ] = sc; if ((sc = xmx[i][XME] + hmm->xsc[XTE][LOOP]) > xmx[i][XMJ]) xmx[i][XMJ] = sc; /* B state */ xmx[i][XMB] = -INFTY; if ((sc = xmx[i][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY) xmx[i][XMB] = sc; if ((sc = xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]) > xmx[i][XMB]) xmx[i][XMB] = sc; /* C state */ xmx[i][XMC] = -INFTY; if ((sc = xmx[i-1][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY) xmx[i][XMC] = sc; if ((sc = xmx[i][XME] + hmm->xsc[XTE][MOVE]) > xmx[i][XMC]) xmx[i][XMC] = sc; } /* T state (not stored) */ sc = xmx[L][XMC] + hmm->xsc[XTC][MOVE]; if (ret_tr != NULL) { P7ViterbiTrace(hmm, dsq, L, mx, &tr); *ret_tr = tr; } return Scorify(sc); /* the total Viterbi score. */}#endif /*SLOW*//* Function: P7ViterbiTrace() * Date: SRE, Sat Aug 23 10:30:11 1997 (St. Louis Lambert Field) * * Purpose: Traceback of a Viterbi matrix: i.e. retrieval * of optimum alignment. * * Args: hmm - hmm, log odds form, used to make mx * dsq - sequence aligned to (digital form) 1..N * N - length of seq * mx - the matrix to trace back in, N x hmm->M * ret_tr - RETURN: traceback. * * Return: (void) * ret_tr is allocated here. Free using P7FreeTrace(). */voidP7ViterbiTrace(struct plan7_s *hmm, unsigned char *dsq, int N, struct dpmatrix_s *mx, struct p7trace_s **ret_tr){ struct p7trace_s *tr; int curralloc; /* current allocated length of trace */ int tpos; /* position in trace */ int i; /* position in seq (1..N) */ int k; /* position in model (1..M) */ int **xmx, **mmx, **imx, **dmx; int sc; /* temp var for pre-emission score */ /* Overallocate for the trace. * S-N-B- ... - E-C-T : 6 states + N is minimum trace; * add N more as buffer. */ curralloc = N * 2 + 6; P7AllocTrace(curralloc, &tr); xmx = mx->xmx; mmx = mx->mmx; imx = mx->imx; dmx = mx->dmx; /* Initialization of trace * We do it back to front; ReverseTrace() is called later. */ tr->statetype[0] = STT; tr->nodeidx[0] = 0; tr->pos[0] = 0; tr->statetype[1] = STC; tr->nodeidx[1] = 0; tr->pos[1] = 0; tpos = 2; i = N; /* current i (seq pos) we're trying to assign */ /* Traceback */ while (tr->statetype[tpos-1] != STS) { switch (tr->statetype[tpos-1]) { case STM: /* M connects from i-1,k-1, or B */ sc = mmx[i+1][k+1] - hmm->msc[dsq[i+1]][k+1]; if (sc <= -INFTY) { P7FreeTrace(tr); *ret_tr = NULL; return; } else if (sc == xmx[i][XMB] + hmm->bsc[k+1]) { /* Check for wing unfolding */ if (Prob2Score(hmm->begin[k+1], hmm->p1) + 1 * INTSCALE <= hmm->bsc[k+1]) while (k > 0) { tr->statetype[tpos] = STD; tr->nodeidx[tpos] = k--; tr->pos[tpos] = 0; tpos++; if (tpos == curralloc) { /* grow trace if necessary */ curralloc += N; P7ReallocTrace(tr, curralloc); } } tr->statetype[tpos] = STB; tr->nodeidx[tpos] = 0; tr->pos[tpos] = 0; } else if (sc == mmx[i][k] + hmm->tsc[TMM][k]) { tr->statetype[tpos] = STM; tr->nodeidx[tpos] = k--; tr->pos[tpos] = i--; } else if (sc == imx[i][k] + hmm->tsc[TIM][k]) { tr->statetype[tpos] = STI; tr->nodeidx[tpos] = k; tr->pos[tpos] = i--; } else if (sc == dmx[i][k] + hmm->tsc[TDM][k]) { tr->statetype[tpos] = STD; tr->nodeidx[tpos] = k--; tr->pos[tpos] = 0; } else Die("traceback failed"); break; case STD: /* D connects from M,D */ if (dmx[i][k+1] <= -INFTY) { P7FreeTrace(tr); *ret_tr = NULL; return; } else if (dmx[i][k+1] == mmx[i][k] + hmm->tsc[TMD][k]) { tr->statetype[tpos] = STM; tr->nodeidx[tpos] = k--; tr->pos[tpos] = i--; } else if (dmx[i][k+1] == dmx[i][k] + hmm->tsc[TDD][k]) { tr->statetype[tpos] = STD; tr->nodeidx[tpos] = k--; tr->pos[tpos] = 0; } else Die("traceback failed"); break; case STI: /* I connects from M,I */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?