📄 core_algorithms.c
字号:
* D_M and I_M are wastefully calculated (they don't exist) */ for (i = 1; i <= L; i++) { mmx[i][0] = imx[i][0] = dmx[i][0] = -INFTY; for (k = 1; k <= hmm->M; k++) { /* match state */ mmx[i][k] = -INFTY; if ((sc = mmx[i-1][k-1] + hmm->tsc[k-1][TMM]) > mmx[i][k]) mmx[i][k] = sc; if ((sc = imx[i-1][k-1] + hmm->tsc[k-1][TIM]) > mmx[i][k]) mmx[i][k] = sc; if ((sc = xmx[i-1][XMB] + hmm->bsc[k]) > mmx[i][k]) mmx[i][k] = sc; if ((sc = dmx[i-1][k-1] + hmm->tsc[k-1][TDM]) > mmx[i][k]) mmx[i][k] = sc; if (hmm->msc[(int) dsq[i]][k] != -INFTY) mmx[i][k] += hmm->msc[(int) dsq[i]][k]; else mmx[i][k] = -INFTY; /* delete state */ dmx[i][k] = -INFTY; if ((sc = mmx[i][k-1] + hmm->tsc[k-1][TMD]) > dmx[i][k]) dmx[i][k] = sc; if ((sc = dmx[i][k-1] + hmm->tsc[k-1][TDD]) > dmx[i][k]) dmx[i][k] = sc; /* insert state */ if (k < hmm->M) { imx[i][k] = -INFTY; if ((sc = mmx[i-1][k] + hmm->tsc[k][TMI]) > imx[i][k]) imx[i][k] = sc; if ((sc = imx[i-1][k] + hmm->tsc[k][TII]) > imx[i][k]) imx[i][k] = sc; if (hmm->isc[(int)dsq[i]][k] != -INFTY) imx[i][k] += hmm->isc[(int) dsq[i]][k]; else imx[i][k] = -INFTY; } } /* Now the special states. Order is important here. * remember, C and J emissions are zero score by definition, */ /* N state */ xmx[i][XMN] = -INFTY; if ((sc = xmx[i-1][XMN] + hmm->xsc[XTN][LOOP]) > -INFTY) xmx[i][XMN] = sc; /* E state */ xmx[i][XME] = -INFTY; for (k = 1; k <= hmm->M; k++) if ((sc = mmx[i][k] + hmm->esc[k]) > xmx[i][XME]) xmx[i][XME] = sc; /* J state */ xmx[i][XMJ] = -INFTY; if ((sc = xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP]) > -INFTY) xmx[i][XMJ] = sc; if ((sc = xmx[i][XME] + hmm->xsc[XTE][LOOP]) > xmx[i][XMJ]) xmx[i][XMJ] = sc; /* B state */ xmx[i][XMB] = -INFTY; if ((sc = xmx[i][XMN] + hmm->xsc[XTN][MOVE]) > -INFTY) xmx[i][XMB] = sc; if ((sc = xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]) > xmx[i][XMB]) xmx[i][XMB] = sc; /* C state */ xmx[i][XMC] = -INFTY; if ((sc = xmx[i-1][XMC] + hmm->xsc[XTC][LOOP]) > -INFTY) xmx[i][XMC] = sc; if ((sc = xmx[i][XME] + hmm->xsc[XTE][MOVE]) > xmx[i][XMC]) xmx[i][XMC] = sc; } /* T state (not stored) */ sc = xmx[L][XMC] + hmm->xsc[XTC][MOVE]; if (ret_tr != NULL) { P7ViterbiTrace(hmm, dsq, L, mx, &tr); *ret_tr = tr; } FreePlan7Matrix(mx); return Scorify(sc); /* the total Viterbi score. */}/* Function: P7ViterbiTrace() * Date: SRE, Sat Aug 23 10:30:11 1997 (St. Louis Lambert Field) * * Purpose: Traceback of a Viterbi matrix: i.e. retrieval * of optimum alignment. * * Args: hmm - hmm, log odds form, used to make mx * dsq - sequence aligned to (digital form) 1..N * N - length of seq * mx - the matrix to trace back in, N x hmm->M * ret_tr - RETURN: traceback. * * Return: (void) * ret_tr is allocated here. Free using P7FreeTrace(). */voidP7ViterbiTrace(struct plan7_s *hmm, char *dsq, int N, struct dpmatrix_s *mx, struct p7trace_s **ret_tr){ struct p7trace_s *tr; int curralloc; /* current allocated length of trace */ int tpos; /* position in trace */ int i; /* position in seq (1..N) */ int k; /* position in model (1..M) */ int **xmx, **mmx, **imx, **dmx; int sc; /* temp var for pre-emission score */ /* Overallocate for the trace. * S-N-B- ... - E-C-T : 6 states + N is minimum trace; * add N more as buffer. */ curralloc = N * 2 + 6; P7AllocTrace(curralloc, &tr); xmx = mx->xmx; mmx = mx->mmx; imx = mx->imx; dmx = mx->dmx; /* Initialization of trace * We do it back to front; ReverseTrace() is called later. */ tr->statetype[0] = STT; tr->nodeidx[0] = 0; tr->pos[0] = 0; tr->statetype[1] = STC; tr->nodeidx[1] = 0; tr->pos[1] = 0; tpos = 2; i = N; /* current i (seq pos) we're trying to assign */ /* Traceback */ while (tr->statetype[tpos-1] != STS) { switch (tr->statetype[tpos-1]) { case STM: /* M connects from i-1,k-1, or B */ sc = mmx[i+1][k+1] - hmm->msc[(int) dsq[i+1]][k+1]; if (sc == xmx[i][XMB] + hmm->bsc[k+1]) { /* Check for wing unfolding */ if (Prob2Score(hmm->begin[k+1], hmm->p1) + 1 * INTSCALE <= hmm->bsc[k+1]) while (k > 0) { tr->statetype[tpos] = STD; tr->nodeidx[tpos] = k--; tr->pos[tpos] = 0; tpos++; if (tpos == curralloc) { /* grow trace if necessary */ curralloc += N; P7ReallocTrace(tr, curralloc); } } tr->statetype[tpos] = STB; tr->nodeidx[tpos] = 0; tr->pos[tpos] = 0; } else if (sc == mmx[i][k] + hmm->tsc[k][TMM]) { tr->statetype[tpos] = STM; tr->nodeidx[tpos] = k--; tr->pos[tpos] = i--; } else if (sc == imx[i][k] + hmm->tsc[k][TIM]) { tr->statetype[tpos] = STI; tr->nodeidx[tpos] = k; tr->pos[tpos] = i--; } else if (sc == dmx[i][k] + hmm->tsc[k][TDM]) { tr->statetype[tpos] = STD; tr->nodeidx[tpos] = k--; tr->pos[tpos] = 0; } else Die("traceback failed"); break; case STD: /* D connects from M,D */ if (dmx[i][k+1] == mmx[i][k] + hmm->tsc[k][TMD]) { tr->statetype[tpos] = STM; tr->nodeidx[tpos] = k--; tr->pos[tpos] = i--; } else if (dmx[i][k+1] == dmx[i][k] + hmm->tsc[k][TDD]) { tr->statetype[tpos] = STD; tr->nodeidx[tpos] = k--; tr->pos[tpos] = 0; } else Die("traceback failed"); break; case STI: /* I connects from M,I */ sc = imx[i+1][k] - hmm->isc[(int) dsq[i+1]][k]; if (sc == mmx[i][k] + hmm->tsc[k][TMI]) { tr->statetype[tpos] = STM; tr->nodeidx[tpos] = k--; tr->pos[tpos] = i--; } else if (sc == imx[i][k] + hmm->tsc[k][TII]) { tr->statetype[tpos] = STI; tr->nodeidx[tpos] = k; tr->pos[tpos] = i--; } else Die("traceback failed"); break; case STN: /* N connects from S, N */ if (i == 0 && xmx[i][XMN] == 0) { tr->statetype[tpos] = STS; tr->nodeidx[tpos] = 0; tr->pos[tpos] = 0; } else if (i > 0 && xmx[i+1][XMN] == xmx[i][XMN] + hmm->xsc[XTN][LOOP]) { tr->statetype[tpos] = STN; tr->nodeidx[tpos] = 0; tr->pos[tpos] = 0; /* note convention adherence: */ tr->pos[tpos-1] = i--; /* first N doesn't emit */ } else Die("traceback failed"); break; case STB: /* B connects from N, J */ if (xmx[i][XMB] == xmx[i][XMN] + hmm->xsc[XTN][MOVE]) { tr->statetype[tpos] = STN; tr->nodeidx[tpos] = 0; tr->pos[tpos] = 0; } else if (xmx[i][XMB] == xmx[i][XMJ] + hmm->xsc[XTJ][MOVE]) { tr->statetype[tpos] = STJ; tr->nodeidx[tpos] = 0; tr->pos[tpos] = 0; } else Die("traceback failed"); break; case STE: /* E connects from any M state. k set here */ for (k = hmm->M; k >= 1; k--) if (xmx[i][XME] == mmx[i][k] + hmm->esc[k]) { /* check for wing unfolding */ if (Prob2Score(hmm->end[k], 1.) + 1*INTSCALE <= hmm->esc[k]) { int dk; /* need a tmp k while moving thru delete wing */ for (dk = hmm->M; dk > k; dk--) { tr->statetype[tpos] = STD; tr->nodeidx[tpos] = dk; tr->pos[tpos] = 0; tpos++; if (tpos == curralloc) { /* grow trace if necessary */ curralloc += N; P7ReallocTrace(tr, curralloc); } } } tr->statetype[tpos] = STM; tr->nodeidx[tpos] = k--; tr->pos[tpos] = i--; break; } if (k < 0) Die("traceback failed"); break; case STC: /* C comes from C, E */ if (xmx[i][XMC] == xmx[i-1][XMC] + hmm->xsc[XTC][LOOP]) { tr->statetype[tpos] = STC; tr->nodeidx[tpos] = 0; tr->pos[tpos] = 0; /* note convention adherence: */ tr->pos[tpos-1] = i--; /* first C doesn't emit */ } else if (xmx[i][XMC] == xmx[i][XME] + hmm->xsc[XTE][MOVE]) { tr->statetype[tpos] = STE; tr->nodeidx[tpos] = 0; tr->pos[tpos] = 0; /* E is a nonemitter */ } else Die("Traceback failed."); break; case STJ: /* J connects from E, J */ if (xmx[i][XMJ] == xmx[i-1][XMJ] + hmm->xsc[XTJ][LOOP]) { tr->statetype[tpos] = STJ; tr->nodeidx[tpos] = 0; tr->pos[tpos] = 0; /* note convention adherence: */ tr->pos[tpos-1] = i--; /* first J doesn't emit */ } else if (xmx[i][XMJ] == xmx[i][XME] + hmm->xsc[XTE][LOOP]) { tr->statetype[tpos] = STE; tr->nodeidx[tpos] = 0; tr->pos[tpos] = 0; /* E is a nonemitter */ } else Die("Traceback failed."); break; default: Die("traceback failed"); } /* end switch over statetype[tpos-1] */ tpos++; if (tpos == curralloc) { /* grow trace if necessary */ curralloc += N; P7ReallocTrace(tr, curralloc); } } /* end traceback, at S state; tpos == tlen now */ tr->tlen = tpos; P7ReverseTrace(tr); *ret_tr = tr;}/* Function: P7SmallViterbi() * Date: SRE, Fri Mar 6 15:29:41 1998 [St. Louis] * * Purpose: Wrapper function, for linear memory alignment * with same arguments as P7Viterbi(). * * Calls P7ParsingViterbi to break the sequence * into fragments. Then, based on size of fragments, * calls either P7Viterbi() or P7WeeViterbi() to * get traces for them. Finally, assembles all these * traces together to produce an overall optimal * trace for the sequence. * * If the trace isn't needed for some reason, * all we do is call P7ParsingViterbi. * * Args: dsq - sequence in digitized form * L - length of dsq * hmm - the model * ret_tr - RETURN: traceback; pass NULL if it's not wanted * * Returns: Score of optimal alignment in bits. */floatP7SmallViterbi(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr){ struct p7trace_s *ctr; /* collapsed trace of optimal parse */ struct p7trace_s *tr; /* full trace of optimal alignment */ struct p7trace_s **tarr; /* trace array */ int ndom; /* number of subsequences */ int i; /* counter over domains */ int pos; /* position in sequence */ int tpos; /* position in trace */ int tlen; /* length of full trace */ int sqlen; /* length of a subsequence */ int totlen; /* length of L matched by model (as opposed to N/C/J) */ float sc; /* score of optimal alignment */ int t2; /* position in a subtrace */ /* Step 1. Call P7ParsingViterbi to calculate an optimal parse * of the sequence into single-hit subsequences; this parse * is returned in a "collapsed" trace */ sc = P7ParsingViterbi(dsq, L, hmm, &ctr); /* If we don't want full trace, we're done */ if (ret_tr == NULL) { P7FreeTrace(ctr); return sc; } /* Step 2. Call either P7Viterbi or P7WeeViterbi on each subsequence * to recover a full traceback of each, collecting them in * an array. */ ndom = ctr->tlen/2 - 1; tarr = MallocOrDie(sizeof(struct p7trace_s *) * ndom); tlen = totlen = 0; for (i = 0; i < ndom; i++)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -