📄 core_algorithms.c
字号:
* STS-> (N, already implied by setting xmx[cur][XMN] = 0) * STB-> M */ if (t1 == STM) { for (k = k1+1; k <= k3; k++) { /* transits into STD */ dmx[cur][k] = -INFTY; if ((sc = mmx[cur][k-1] + hmm->tsc[k-1][TMD]) > -INFTY) dmx[cur][k] = sc; if ((sc = dmx[cur][k-1] + hmm->tsc[k-1][TDD]) > dmx[cur][k]) dmx[cur][k] = sc; } /* transit into STE */ xmx[cur][XME] = -INFTY; if ((sc = mmx[cur][k1] + hmm->esc[k1]) > -INFTY) xmx[cur][XME] = sc; } /* transit into STB from STN */ xmx[cur][XMB] = -INFTY; if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY) xmx[cur][XMB] = sc; /* transit into STC from STE */ xmx[cur][XMC] = -INFTY; if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > -INFTY) xmx[cur][XMC] = sc; /* Done initializing. * Start recursive DP; sweep forward to chosen s2 midpoint. Done as a pull. */ for (i = start+1; i <= s2; i++) { cur = i % 2; prv = !cur; mmx[cur][k1] = imx[cur][k1] = dmx[cur][k1] = -INFTY; /* Insert state in column k1, and B->M transition in k1. */ if (k1 < hmm->M) { imx[cur][k1] = -INFTY; if ((sc = mmx[prv][k1] + hmm->tsc[k1][TMI]) > -INFTY) imx[cur][k1] = sc; if ((sc = imx[prv][k1] + hmm->tsc[k1][TII]) > imx[cur][k1]) imx[cur][k1] = sc; if (hmm->isc[(int) dsq[i]][k1] != -INFTY) imx[cur][k1] += hmm->isc[(int) dsq[i]][k1]; else imx[cur][k1] = -INFTY; } if ((sc = xmx[prv][XMB] + hmm->bsc[k1]) > -INFTY) mmx[cur][k1] = sc; if (hmm->msc[(int) dsq[i]][k1] != -INFTY) mmx[cur][k1] += hmm->msc[(int) dsq[i]][k1]; else mmx[cur][k1] = -INFTY; /* Main chunk of recursion across model positions */ for (k = k1+1; k <= k3; k++) { /* match state */ mmx[cur][k] = -INFTY; if ((sc = mmx[prv][k-1] + hmm->tsc[k-1][TMM]) > -INFTY) mmx[cur][k] = sc; if ((sc = imx[prv][k-1] + hmm->tsc[k-1][TIM]) > mmx[cur][k]) mmx[cur][k] = sc; if ((sc = xmx[prv][XMB] + hmm->bsc[k]) > mmx[cur][k]) mmx[cur][k] = sc; if ((sc = dmx[prv][k-1] + hmm->tsc[k-1][TDM]) > mmx[cur][k]) mmx[cur][k] = sc; if (hmm->msc[(int) dsq[i]][k] != -INFTY) mmx[cur][k] += hmm->msc[(int) dsq[i]][k]; else mmx[cur][k] = -INFTY; /* delete state */ dmx[cur][k] = -INFTY; if (k < hmm->M) { if ((sc = mmx[cur][k-1] + hmm->tsc[k-1][TMD]) > -INFTY) dmx[cur][k] = sc; if ((sc = dmx[cur][k-1] + hmm->tsc[k-1][TDD]) > dmx[cur][k]) dmx[cur][k] = sc; } /* insert state */ imx[cur][k] = -INFTY; if (k < hmm->M) { if ((sc = mmx[prv][k] + hmm->tsc[k][TMI]) > -INFTY) imx[cur][k] = sc; if ((sc = imx[prv][k] + hmm->tsc[k][TII]) > imx[cur][k]) imx[cur][k] = sc; if (hmm->isc[(int) dsq[i]][k] != -INFTY) imx[cur][k] += hmm->isc[(int) dsq[i]][k]; else imx[cur][k] = -INFTY; } } /* N state */ xmx[cur][XMN] = -INFTY; if ((sc = xmx[prv][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY) xmx[cur][XMN] = sc; /* E state */ xmx[cur][XME] = -INFTY; for (k = k1; k <= k3 && k <= hmm->M; k++) if ((sc = mmx[cur][k] + hmm->esc[k]) > xmx[cur][XME]) xmx[cur][XME] = sc; /* B state */ xmx[cur][XMB] = -INFTY; if ((sc = xmx[cur][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY) xmx[cur][XMB] = sc; /* C state */ xmx[cur][XMC] = -INFTY; if ((sc = xmx[prv][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY) xmx[cur][XMC] = sc; if ((sc = xmx[cur][XME] + hmm->xsc[XTE][MOVE]) > xmx[cur][XMC]) xmx[cur][XMC] = sc; } /* Row s2%2 in fwd matrix now contains valid scores from s1 (start) to s2, * with J transitions disallowed (no cycles through model). */ /***************************************************************** * Backwards pass. *****************************************************************/ /* Allocate our backwards two rows. Init last row. */ bck = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx); nxt = s3%2; xmx[nxt][XMN] = xmx[nxt][XMB] = -INFTY; xmx[nxt][XME] = xmx[nxt][XMC] = -INFTY; for (k = k1; k <= k3 + 1; k++) mmx[nxt][k] = imx[nxt][k] = dmx[nxt][k] = -INFTY; cur = !nxt; mmx[cur][k3+1] = imx[cur][k3+1] = dmx[cur][k3+1] = -INFTY; /* Where to put the zero for our end point on last row. */ switch (t3) { case STM: mmx[nxt][k3] = 0; break; case STI: imx[nxt][k3] = 0; break; case STN: xmx[nxt][XMN] = 0; break; case STC: xmx[nxt][XMC] = 0; break; /* must be an emitting C */ case STT: xmx[nxt][XMC] = hmm->xsc[XTC][MOVE]; break; /* C->T implied */ default: Die("you can't init get_wee_midpt with a %s\n", Statetype(t3)); } /* Still initializing. * In the case t3==STT, there are a few horizontal moves possible * on row s3, because STT isn't an emitter. All other states are * emitters, so their connections have to be to the previous row s3-1. */ if (t3 == STT) { /* E->C */ xmx[nxt][XME] = xmx[nxt][XMC] + hmm->xsc[XTE][MOVE]; /* M->E */ for (k = k3; k >= k1; k--) { mmx[nxt][k] = xmx[nxt][XME] + hmm->esc[k]; if (s3 != s2) mmx[nxt][k] += hmm->msc[(int)dsq[s3]][k]; } } /* Start recursive DP; sweep backwards to chosen s2 midpoint. * Done as a pull. M, I scores at current row do /not/ include * emission scores. Be careful of integer underflow. */ for (i = s3-1; i >= s2; i--) { /* note i < L, so i+1 is always a legal index */ cur = i%2; nxt = !cur; /* C pulls from C (T is special cased) */ xmx[cur][XMC] = -INFTY; if ((sc = xmx[nxt][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY) xmx[cur][XMC] = sc; /* B pulls from M's */ xmx[cur][XMB] = -INFTY; for (k = k1; k <= k3; k++) if ((sc = mmx[nxt][k] + hmm->bsc[k]) > xmx[cur][XMB]) xmx[cur][XMB] = sc; /* E pulls from C (J disallowed) */ xmx[cur][XME] = -INFTY; if ((sc = xmx[cur][XMC] + hmm->xsc[XTE][MOVE]) > -INFTY) xmx[cur][XME] = sc; /* N pulls from B, N */ xmx[cur][XMN] = -INFTY; if ((sc = xmx[cur][XMB] + hmm->xsc[XTN][MOVE]) > -INFTY) xmx[cur][XMN] = sc; if ((sc = xmx[nxt][XMN] + hmm->xsc[XTN][LOOP]) > xmx[cur][XMN]) xmx[cur][XMN] = sc; /* Main recursion across model */ for (k = k3; k >= k1; k--) { /* special case k == M */ if (k == hmm->M) { mmx[cur][k] = xmx[cur][XME]; /* p=1 transition to E by definition */ dmx[cur][k] = -INFTY; /* doesn't exist */ imx[cur][k] = -INFTY; /* doesn't exist */ if (i != s2) mmx[cur][k] += hmm->msc[(int)dsq[i]][k]; continue; } /* below this k < M, so k+1 is a legal index */ /* pull into match state */ mmx[cur][k] = -INFTY; if ((sc = xmx[cur][XME] + hmm->esc[k]) > -INFTY) mmx[cur][k] = sc; if ((sc = mmx[nxt][k+1] + hmm->tsc[k][TMM]) > mmx[cur][k]) mmx[cur][k] = sc; if ((sc = imx[nxt][k] + hmm->tsc[k][TMI]) > mmx[cur][k]) mmx[cur][k] = sc; if ((sc = dmx[cur][k+1] + hmm->tsc[k][TMD]) > mmx[cur][k]) mmx[cur][k] = sc; if (i != s2) mmx[cur][k] += hmm->msc[(int)dsq[i]][k]; /* pull into delete state */ dmx[cur][k] = -INFTY; if ((sc = mmx[nxt][k+1] + hmm->tsc[k][TDM]) > -INFTY) dmx[cur][k] = sc; if ((sc = dmx[cur][k+1] + hmm->tsc[k][TDD]) > dmx[cur][k]) dmx[cur][k] = sc; /* pull into insert state */ imx[cur][k] = -INFTY; if ((sc = mmx[nxt][k+1] + hmm->tsc[k][TIM]) > -INFTY) imx[cur][k] = sc; if ((sc = imx[nxt][k] + hmm->tsc[k][TII]) > imx[cur][k]) imx[cur][k] = sc; if (i != s2) imx[cur][k] += hmm->isc[(int)dsq[i]][k]; } } /***************************************************************** * DP complete; we have both forward and backward passes. Now we * look across the s2 row and find the optimal emitting state. *****************************************************************/ cur = s2%2; max = -INFTY; for (k = k1; k <= k3; k++) { if ((sc = fwd->mmx[cur][k] + bck->mmx[cur][k]) > max) { k2 = k; t2 = STM; max = sc; } if ((sc = fwd->imx[cur][k] + bck->imx[cur][k]) > max) { k2 = k; t2 = STI; max = sc; } } if ((sc = fwd->xmx[cur][XMN] + bck->xmx[cur][XMN]) > max) { k2 = 1; t2 = STN; max = sc; } if ((sc = fwd->xmx[cur][XMC] + bck->xmx[cur][XMC]) > max) { k2 = hmm->M; t2 = STC; max = sc; } /***************************************************************** * Garbage collection, return. *****************************************************************/ FreePlan7Matrix(fwd); FreePlan7Matrix(bck); *ret_k2 = k2; *ret_t2 = t2; *ret_s2 = s2; return Scorify(max);}/* Function: P7ViterbiAlignAlignment() * Date: SRE, Sat Jul 4 13:39:00 1998 [St. Louis] * * Purpose: Align a multiple alignment to an HMM without * changing the multiple alignment itself. * Adapted from P7Viterbi(). * * Heuristic; not a guaranteed optimal alignment. * Guaranteeing an optimal alignment appears difficult. * [cryptic note to myself:] In paths connecting to I* metastates, * recursion breaks down; if there is a gap in the * previous column for a given seq, we can't determine what state the * I* metastate corresponds to for this sequence, unless we * look back in the DP matrix. The lookback would either involve * recursing back to the previous M* metastate (giving a * O(MN^2) algorithm instead of O(MN)) or expanding the I* * metastate into 3^nseq separate I* metastates to keep track * of which of three states each seq is in. Since the second * option blows up exponentially w/ nseq, it is not attractive. * If the first option were used, the correct algorithm would be related to * modelmakers.c:Maxmodelmaker(), but somewhat more difficult. * * The heuristic approach here is to calculate a "consensus" * sequence from the alignment, and align the consensus to the HMM. * Some hackery is employed, weighting transitions and emissions * to make things work (re: con and mocc arrays). * * Args: aseq - aligned sequences * ainfo - info for aseqs (includes alen, nseq, wgt) * hmm - model to align to * * Returns: Traceback. Caller must free with P7FreeTrace(). * pos[] contains alignment columns, indexed 1..alen. * statetype[] contains metastates M*, etc. as STM, etc. */struct p7trace_s *P7ViterbiAlignAlignment(MSA *msa, struct plan7_s *hmm){ struct dpmatrix_s *mx; /* Viterbi calculation lattice (two rows) */ struct dpshadow_s *tb; /* shadow matrix of traceback pointers */ struct p7trace_s *tr; /* RETURN: traceback */ int **xmx, **mmx, **imx, **dmx; char **xtb, **mtb, **itb, **dtb; float **con; /* [1..alen][0..Alphabet_size-1], consensus counts */ float *mocc; /* fractional occupancy of a column; used to weight transitions */ int i; /* counter for columns */ int k; /* counter for model positions */ int idx; /* counter for seqs */ int sym; /* counter for alphabet symbols */ int sc; /* temp variable for holding score */ float denom; /* total weight of seqs; used to "normalize" counts */ int cur, prv; /* The "consensus" is a counts matrix, [1..alen][0..Alphabet_size-1]. * Gaps are not counted explicitly, but columns with lots of gaps get * less total weight because they have fewer counts. */ /* allocation */ con = MallocOrDie(sizeof(float *) * (msa->alen+1)); mocc = MallocOrDie(sizeof(float) * (msa->alen+1)); for (i = 1; i <= msa->alen; i++) { con[i] = MallocOrDie(sizeof(float) * Alphabet_size); FSet(con[i], Alphabet_size, 0.0); } mocc[0] = -9999.; /* initialization */ /* note: aseq is off by one, 0..alen-1 */ /* "normalized" to have a max total count of 1 per col */ denom = FSum(msa->wgt, msa->nseq); for (i = 1; i <= msa->alen; i++) { for (idx = 0; idx < msa->nseq; idx++) if (! isgap(msa->aseq[idx][i-1])) P7CountSymbol(con[i], SYMIDX(msa->aseq[idx][i-1]), msa->wgt[idx]); FScale(con[i], Alphabet_size, 1./denom); mocc[i] = FSum(con[i], Alphabet_size); } /* Allocate a DP matrix with 2 rows, 0..M columns, * and a shadow matrix with 0,1..alen rows, 0..M columns. */ mx = AllocPlan7Matrix(2, hmm->M, &xmx, &mmx, &imx, &dmx); tb = AllocShadowMatrix(msa->alen+1, hmm->M, &xtb, &mtb, &itb, &dtb); /* Initialization of the zero row. */ xmx[0][XMN] = 0; /* S->N, p=1 */ xtb[0][XMN] = STS; xmx[0][XMB] = hmm->xsc[XTN][MOVE]; /* S->N->B, no N-tail */ xtb[0][XMB] = STN; xmx[0][XME] = xmx[0][XMC] = xmx[0][XMJ] = -INFTY; /* need seq to get here */ tb->esrc[0] = 0; xtb[0][XMC] = xtb[0][XMJ] = STBOGUS; for (k = 0; k <= hmm->M; k++) { mmx[0][k] = imx[0][k] = dmx[0][k] = -INFTY; /* need seq to get here */ mtb[0][k] = itb[0][k] = dtb[0][k] = STBOGUS; } /* Recursion. Done as a pull. * Note some slightly wasteful boundary conditions: * tsc[0] = -INFTY for all eight transitions (no node 0) * D_M and I_M are wastefully calculated (they don't exist) */ for (i = 1; i <= msa->alen; i++) { cur = i % 2; prv = ! cur; mmx[cur][0] = imx[cur][0] = dmx[cur][0] = -INFTY; mtb[i][0] = itb[i][0] = dtb[i][0] = STBOGUS; for (k = 1; k <= hmm->M; k++) { /* match state */ mmx[cur][k] = -INFTY; mtb[i][k] = STBOGUS; if (mmx[prv][k-1] > -INFTY && hmm->tsc[k-1][TMM] > -INFTY && (sc = mmx[prv][k-1] + hmm->tsc[k-1][TMM]) > mmx[cur][k]) { mmx[cur][k] = sc; mtb[i][k] = STM; } if (imx[prv][k-1] > -INFTY && hmm->tsc[k-1][TIM] > -INFTY && (sc = imx[prv][k-1] + hmm->tsc[k-1][TIM] * mocc[i-1]) > mmx[cur][k]) { mmx[cur][k] = sc; mtb[i][k] = STI; } if ((sc = xmx[prv][XMB] + hmm->bsc[k]) > mmx[cur][k]) { mmx[cur][k] = sc; mtb[i][k] = STB; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -