core_algorithms.c
来自「这是一个基于HMM 模型的生物多序列比对算法的linux实现版本。hmmer」· C语言 代码 · 共 1,965 行 · 第 1/5 页
C
1,965 行
/* Initialization of the first row (DNA sequence of length 1); * only N state can make this nucleotide. */ xmx[1][XMN] = xmx[0][XMN] + hmm->xsc[XTN][LOOP]; xmx[1][XMB] = xmx[1][XMN] + hmm->xsc[XTN][MOVE]; xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; /* need 2 nt to get here */ for (k = 0; k <= hmm->M; k++) mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; /* need 2 nt to get into model */ /* Recursion. Done as a pull. * Note some slightly wasteful boundary conditions: * tsc[0] = -INFTY for all eight transitions (no node 0) * D_M and I_M are wastefully calculated (they don't exist) */ for (i = 2; i <= L; i++) { mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY; /* crude calculation of lookup value for codon */ if (i > 2) { if (dsq[i-2] < 4 && dsq[i-1] < 4 && dsq[i] < 4) codon = dsq[i-2] * 16 + dsq[i-1] * 4 + dsq[i]; else codon = 64; /* ambiguous codon; punt */ } for (k = 1; k <= hmm->M; k++) { /* match state */ if (i > 2) { mmx[i][k] = mmx[i-3][k-1] + hmm->tsc[TMM][k-1]; if ((sc = imx[i-3][k-1] + hmm->tsc[TIM][k-1]) > mmx[i][k]) mmx[i][k] = sc; if ((sc = xmx[i-3][XMB] + hmm->bsc[k]) > mmx[i][k]) mmx[i][k] = sc; if ((sc = dmx[i-3][k-1] + hmm->tsc[TDM][k-1]) > mmx[i][k]) mmx[i][k] = sc; mmx[i][k] += hmm->dnam[codon][k]; } /* -1 frameshifts into match state */ if ((sc = mmx[i-2][k-1] + hmm->tsc[TMM][k-1] + hmm->dna2) > mmx[i][k]) mmx[i][k] = sc; if ((sc = imx[i-2][k-1] + hmm->tsc[TIM][k-1] + hmm->dna2) > mmx[i][k]) mmx[i][k] = sc; if ((sc = xmx[i-2][XMB] + hmm->bsc[k] + hmm->dna2) > mmx[i][k]) mmx[i][k] = sc; if ((sc = dmx[i-2][k-1] + hmm->tsc[TDM][k-1] + hmm->dna2) > mmx[i][k]) mmx[i][k] = sc; /* +1 frameshifts into match state */ if (i > 3) { if ((sc = mmx[i-4][k-1] + hmm->tsc[TMM][k-1] + hmm->dna4) > mmx[i][k]) mmx[i][k] = sc; if ((sc = imx[i-4][k-1] + hmm->tsc[TIM][k-1] + hmm->dna4) > mmx[i][k]) mmx[i][k] = sc; if ((sc = xmx[i-4][XMB] + hmm->bsc[k] + hmm->dna4) > mmx[i][k]) mmx[i][k] = sc; if ((sc = dmx[i-4][k-1] + hmm->tsc[TDM][k-1] + hmm->dna4) > mmx[i][k]) mmx[i][k] = sc; } /* delete state */ dmx[i][k] = mmx[i][k-1] + hmm->tsc[TMD][k-1]; if ((sc = dmx[i][k-1] + hmm->tsc[TDD][k-1]) > dmx[i][k]) dmx[i][k] = sc; /* insert state */ if (i > 2) { imx[i][k] = mmx[i-3][k] + hmm->tsc[TMI][k]; if ((sc = imx[i-3][k] + hmm->tsc[TII][k]) > imx[i][k]) imx[i][k] = sc; imx[i][k] += hmm->dnai[codon][k]; } /* -1 frameshifts into insert state */ if ((sc = mmx[i-2][k] + hmm->tsc[TMI][k] + hmm->dna2) > imx[i][k]) imx[i][k] = sc; if ((sc = imx[i-2][k] + hmm->tsc[TII][k] + hmm->dna2) > imx[i][k]) imx[i][k] = sc; /* +1 frameshifts into insert state */ if (i > 4) { if ((sc = mmx[i-4][k] + hmm->tsc[TMI][k] + hmm->dna4) > imx[i][k]) imx[i][k] = sc; if ((sc = imx[i-4][k] + hmm->tsc[TII][k] + hmm->dna4) > imx[i][k]) imx[i][k] = sc; } } /* Now the special states. Order is important here. * remember, C and J emissions are zero score by definition, */ /* N state: +1 nucleotide */ xmx[i][XMN] = xmx[i-1][XMN] + hmm->xsc[XTN][LOOP]; /* E state: collect from M's, and last D */ xmx[i][XME] = dmx[i][hmm->M]; /* transition prob from last D = 1.0 */ for (k = 1; k <= hmm->M; k++) if ((sc = mmx[i][k] + hmm->esc[k]) > xmx[i][XME]) xmx[i][XME] = sc; /* J state: +1 nucleotide */ xmx[i][XMJ] = xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP]; if ((sc = xmx[i][XME] + hmm->xsc[XTE][LOOP]) > xmx[i][XMJ]) xmx[i][XMJ] = sc; /* B state: collect from N,J */ xmx[i][XMB] = xmx[i][XMN] + hmm->xsc[XTN][MOVE]; if ((sc = xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]) > xmx[i][XMB]) xmx[i][XMB] = sc; /* C state: +1 nucleotide */ xmx[i][XMC] = xmx[i-1][XMC] + hmm->xsc[XTC][LOOP]; if ((sc = xmx[i][XME] + hmm->xsc[XTE][MOVE]) > xmx[i][XMC]) xmx[i][XMC] = sc; } sc = xmx[L][XMC] + hmm->xsc[XTC][MOVE]; if (ret_mx != NULL) *ret_mx = mx; else FreePlan7Matrix(mx); return Scorify(sc); /* the total Viterbi score. */}/* Function: get_wee_midpt() * Date: SRE, Wed Mar 4 08:27:11 1998 [St. Louis] * * Purpose: The heart of the divide and conquer algorithm * for P7WeeViterbi(). This function is called * recursively to find successive optimal midpoints * in the alignment matrix. See P7WeeViterbi() for * further comments on the assumptions of this algorithm. * * Args: hmm - the model, set up for integer scores * dsq - the sequence, digitized * L - length of the sequence * k1 - model node to start with, 1..M * t1 - state type to start with, STM | STI | STN | STC; STS to start * s1 - sequence position to start with, 1..L; 1 to start * k3 - model node to end with, 1..M * t3 - state type to end with, STM | STI | STN | STC; STT to start * s3 - sequence position to end with, 1..L; L to start * ret_k2 - RETURN: optimal midpoint, node position in model * ret_t2 - RETURN: optimal midpoint, state type * ret_s2 - RETURN: optimal midpoint, sequence position * * Returns: score of optimal alignment, in bits. */static floatget_wee_midpt(struct plan7_s *hmm, unsigned char *dsq, int L, int k1, char t1, int s1, int k3, char t3, int s3, int *ret_k2, char *ret_t2, int *ret_s2){ struct dpmatrix_s *fwd; struct dpmatrix_s *bck; int **xmx; /* convenience ptr into special states */ int **mmx; /* convenience ptr into match states */ int **imx; /* convenience ptr into insert states */ int **dmx; /* convenience ptr into delete states */ int k2; char t2; int s2; int cur, prv, nxt; /* current, previous, next row index (0 or 1)*/ int i,k; /* indices for seq, model */ int sc; /* integer score */ int max; /* maximum integer score */ int start; /* s1 to start at (need, for STS special case) */ /* Choose our midpoint. * Special cases: s1, s3 adjacent and t1 == STS: s2 = s1 * s1, s3 adjacent and t3 == STT: s2 = s3 * (where we must replace STS, STT eventually) */ s2 = s1 + (s3-s1) / 2; if (s3-s1 == 1 && t1 == STS) s2 = s1; if (s3-s1 == 1 && t3 == STT) s2 = s3; /* STS is a special case. STS aligns to row zero by convention, * but we'll be passed s1=1, t1=STS. We have to init on row * zero then start DP on row 1. */ start = (t1 == STS) ? 0 : s1; /* Allocate our forward two rows. * Initialize row zero. */ fwd = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx); cur = start%2; xmx[cur][XMN] = xmx[cur][XMB] = -INFTY; xmx[cur][XME] = xmx[cur][XMC] = -INFTY; for (k = k1; k <= k3; k++) mmx[cur][k] = imx[cur][k] = dmx[cur][k] = -INFTY; /* Where to put our zero for our start point... * (only possible to start on an emitting state; J disallowed) */ switch (t1) { case STM: mmx[cur][k1] = 0; break; case STI: imx[cur][k1] = 0; break; case STN: xmx[cur][XMN] = 0; break; case STC: xmx[cur][XMC] = 0; break; case STS: xmx[cur][XMN] = 0; break; default: Die("you can't init get_wee_midpt with a %s\n", Statetype(t1)); } /* Still initializing. * Deal with pulling horizontal matrix moves in initial row. * These are any transitions to nonemitters: * STM-> E, D * STI-> none * STN-> B * STC-> (T, but we never observe this in the forward pass of a d&c) * STE-> C * STS-> (N, already implied by setting xmx[cur][XMN] = 0) * STB-> M */ if (t1 == STM) { for (k = k1+1; k <= k3; k++) { /* transits into STD */ dmx[cur][k] = -INFTY; if ((sc = mmx[cur][k-1] + hmm->tsc[TMD][k-1]) > -INFTY) dmx[cur][k] = sc; if ((sc = dmx[cur][k-1] + hmm->tsc[TDD][k-1]) > dmx[cur][k]) dmx[cur][k] = sc; } /* transit into STE */ xmx[cur][XME] = -INFTY; if ((sc = mmx[cur][k1] + hmm->esc[k1]) > -INFTY) xmx[cur][XME] = sc; } /* transit into STB from STN */ xmx[cur][XMB] = -INFTY; if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY) xmx[cur][XMB] = sc; /* transit into STC from STE */ xmx[cur][XMC] = -INFTY; if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > -INFTY) xmx[cur][XMC] = sc; /* Done initializing. * Start recursive DP; sweep forward to chosen s2 midpoint. Done as a pull. */ for (i = start+1; i <= s2; i++) { cur = i % 2; prv = !cur; mmx[cur][k1] = imx[cur][k1] = dmx[cur][k1] = -INFTY; /* Insert state in column k1, and B->M transition in k1. */ if (k1 < hmm->M) { imx[cur][k1] = -INFTY; if ((sc = mmx[prv][k1] + hmm->tsc[TMI][k1]) > -INFTY) imx[cur][k1] = sc; if ((sc = imx[prv][k1] + hmm->tsc[TII][k1]) > imx[cur][k1]) imx[cur][k1] = sc; if (hmm->isc[dsq[i]][k1] != -INFTY) imx[cur][k1] += hmm->isc[dsq[i]][k1]; else imx[cur][k1] = -INFTY; } if ((sc = xmx[prv][XMB] + hmm->bsc[k1]) > -INFTY) mmx[cur][k1] = sc; if (hmm->msc[dsq[i]][k1] != -INFTY) mmx[cur][k1] += hmm->msc[dsq[i]][k1]; else mmx[cur][k1] = -INFTY; /* Main chunk of recursion across model positions */ for (k = k1+1; k <= k3; k++) { /* match state */ mmx[cur][k] = -INFTY; if ((sc = mmx[prv][k-1] + hmm->tsc[TMM][k-1]) > -INFTY) mmx[cur][k] = sc; if ((sc = imx[prv][k-1] + hmm->tsc[TIM][k-1]) > mmx[cur][k]) mmx[cur][k] = sc; if ((sc = xmx[prv][XMB] + hmm->bsc[k]) > mmx[cur][k]) mmx[cur][k] = sc; if ((sc = dmx[prv][k-1] + hmm->tsc[TDM][k-1]) > mmx[cur][k]) mmx[cur][k] = sc; if (hmm->msc[dsq[i]][k] != -INFTY) mmx[cur][k] += hmm->msc[dsq[i]][k]; else mmx[cur][k] = -INFTY; /* delete state */ dmx[cur][k] = -INFTY; if (k < hmm->M) { if ((sc = mmx[cur][k-1] + hmm->tsc[TMD][k-1]) > -INFTY) dmx[cur][k] = sc; if ((sc = dmx[cur][k-1] + hmm->tsc[TDD][k-1]) > dmx[cur][k]) dmx[cur][k] = sc; } /* insert state */ imx[cur][k] = -INFTY; if (k < hmm->M) { if ((sc = mmx[prv][k] + hmm->tsc[TMI][k]) > -INFTY) imx[cur][k] = sc; if ((sc = imx[prv][k] + hmm->tsc[TII][k]) > imx[cur][k]) imx[cur][k] = sc; if (hmm->isc[dsq[i]][k] != -INFTY) imx[cur][k] += hmm->isc[dsq[i]][k]; else imx[cur][k] = -INFTY; } } /* N state */ xmx[cur][XMN] = -INFTY; if ((sc = xmx[prv][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY) xmx[cur][XMN] = sc; /* E state */ xmx[cur][XME] = -INFTY; for (k = k1; k <= k3 && k <= hmm->M; k++) if ((sc = mmx[cur][k] + hmm->esc[k]) > xmx[cur][XME]) xmx[cur][XME] = sc; /* B state */ xmx[cur][XMB] = -INFTY; if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY) xmx[cur][XMB] = sc; /* C state */ xmx[cur][XMC] = -INFTY; if ((sc = xmx[prv][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY) xmx[cur][XMC] = sc; if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > xmx[cur][XMC]) xmx[cur][XMC] = sc; } /* Row s2%2 in fwd matrix now contains valid scores from s1 (start) to s2, * with J transitions disallowed (no cycles through model). */ /***************************************************************** * Backwards pass. *****************************************************************/ /* Allocate our backwards two rows. Init last row. */ bck = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx); nxt = s3%2; xmx[nxt][XMN] = xmx[nxt][XMB] = -INFTY; xmx[nxt][XME] = xmx[nxt][XMC] = -INFTY; for (k = k1; k <= k3 + 1; k++) mmx[nxt][k] = imx[nxt][k] = dmx[nxt][k] = -INFTY; cur = !nxt; mmx[cur][k3+1] = imx[cur][k3+1] = dmx[cur][k3+1] = -INFTY; /* Where to put the zero for our end point on last row. */ switch (t3) { case STM: mmx[nxt][k3] = 0; break; case STI: imx[nxt][k3] = 0; break; case STN: xmx[nxt][XMN] = 0; break; case STC: xmx[nxt][XMC] = 0; break; /* must be an emitting C */ case STT: xmx[nxt][XMC] = hmm->xsc[XTC][MOVE]; break; /* C->T implied */ default: Die("you can't init get_wee_midpt with a %s\n", Statetype(t3)); } /* Still initializing. * In the case t3==STT, there are a few horizontal moves possible * on row s3, because STT isn't an emitter. All other states are * emitters, so their connections have to be to the previous row s3-1. */ if (t3 == STT) { /* E->C */ xmx[nxt][XME] = xmx[nxt][XMC] + hmm->xsc[XTE][MOVE]; /* M->E */ for (k = k3; k >= k1; k--) { mmx[nxt][k] = xmx[nxt][XME] + hmm->esc[k]; if (s3 != s2) mmx[nxt][k] += hmm->msc[dsq[s3]][k]; } } /* Start recursive DP; sweep backwards to chosen s2 midpoint. * Done as a pull. M, I scores at current row do /not/ include * emission scores. Be careful of integer underflow. */ for (i = s3-1; i >= s2; i--) { /* note i < L, so i+1 is always a legal index */ cur = i%2; nxt = !cur; /* C pulls from C (T is special cased) */ xmx[cur][XMC] = -INFTY; if ((sc = xmx[nxt][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY) xmx[cur][XMC] = sc; /* B pulls from M's */ xmx[cur][XMB] = -INFTY; for (k = k1; k <= k3; k++) if ((sc = mmx[nxt][k] + hmm->bsc[k]) > xmx[cur][XMB]) xmx[cur][XMB] = sc; /* E pulls from C (J disallowed) */ xmx[cur][XME] = -INFTY; if ((sc = xmx[cur][
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?