📄 plan7.c
字号:
* S/W B->M and M->E transitions are folded together with * data-dependent B->D...D->M and M->D...D->E paths. * * This process is referred to in the code as "wing folding" * or "wing retraction"... the analogy is to a swept-wing * fighter in landing vs. high speed flight configuration. * * Note on Viterbi vs. forward flag: * Wing retraction must take forward vs. Viterbi * into account. If forward, sum two paths; if Viterbi, take * max. I tried to slide this by as a sum, without * the flag, but Alex detected it as a bug, because you can * then find cases where the Viterbi score doesn't match * the P7TraceScore(). * * Args: hmm - the hmm to calculate scores in. * viterbi_mode - TRUE to fold wings in Viterbi configuration. * * Return: (void) * hmm scores are filled in. */ voidP7Logoddsify(struct plan7_s *hmm, int viterbi_mode){ int k; /* counter for model position */ int x; /* counter for symbols */ float accum; float tbm, tme; if (hmm->flags & PLAN7_HASBITS) return; /* Symbol emission scores */ for (k = 1; k <= hmm->M; k++) { /* match/insert emissions in main model */ for (x = 0; x < Alphabet_size; x++) { hmm->msc[x][k] = Prob2Score(hmm->mat[k][x], hmm->null[x]); if (k < hmm->M) hmm->isc[x][k] = Prob2Score(hmm->ins[k][x], hmm->null[x]); } /* degenerate match/insert emissions */ for (x = Alphabet_size; x < Alphabet_iupac; x++) { hmm->msc[x][k] = DegenerateSymbolScore(hmm->mat[k], hmm->null, x); if (k < hmm->M) hmm->isc[x][k] = DegenerateSymbolScore(hmm->ins[k], hmm->null, x); } } /* State transitions. * * A note on "folding" of D_1 and D_M. * These two delete states are folded out of search form models * in order to prevent null cycles in the dynamic programming * algorithms (see code below). However, we use their log transitions * when we save the model! So the following log transition probs * are used *only* in save files, *never* in search algorithms: * log (tbd1), D1 -> M2, D1 -> D2 * Mm-1 -> Dm, Dm-1 -> Dm * * In a search algorithm, these have to be interpreted as -INFTY * because their contributions are folded into bsc[] and esc[] * entry/exit scores. They can't be set to -INFTY here because * we need them in save files. */ for (k = 1; k < hmm->M; k++) { hmm->tsc[k][TMM] = Prob2Score(hmm->t[k][TMM], hmm->p1); hmm->tsc[k][TMI] = Prob2Score(hmm->t[k][TMI], hmm->p1); hmm->tsc[k][TMD] = Prob2Score(hmm->t[k][TMD], 1.0); hmm->tsc[k][TIM] = Prob2Score(hmm->t[k][TIM], hmm->p1); hmm->tsc[k][TII] = Prob2Score(hmm->t[k][TII], hmm->p1); hmm->tsc[k][TDM] = Prob2Score(hmm->t[k][TDM], hmm->p1); hmm->tsc[k][TDD] = Prob2Score(hmm->t[k][TDD], 1.0); } /* B->M entry transitions. Note how D_1 is folded out. * M1 is just B->M1 * M2 is sum (or max) of B->M2 and B->D1->M2 * M_k is sum (or max) of B->M_k and B->D1...D_k-1->M_k * These have to be done in log space, else you'll get * underflow errors; and we also have to watch for log(0). * A little sloppier than it probably has to be; historically, * doing in this in log space was in response to a bug report. */ accum = hmm->tbd1 > 0.0 ? log(hmm->tbd1) : -9999.; for (k = 1; k <= hmm->M; k++) { tbm = hmm->begin[k] > 0. ? log(hmm->begin[k]) : -9999.; /* B->M_k part */ /* B->D1...D_k-1->M_k part we get from accum*/ if (k > 1 && accum > -9999.) { if (hmm->t[k-1][TDM] > 0.0) { if (viterbi_mode) tbm = MAX(tbm, accum + log(hmm->t[k-1][TDM])); else tbm = LogSum(tbm, accum + log(hmm->t[k-1][TDM])); } accum = (hmm->t[k-1][TDD] > 0.0) ? accum + log(hmm->t[k-1][TDD]) : -9999.; } /* Convert from log_e to scaled integer log_2 odds. */ if (tbm > -9999.) hmm->bsc[k] = (int) floor(0.5 + INTSCALE * 1.44269504 * (tbm - log(hmm->p1))); else hmm->bsc[k] = -INFTY; } /* M->E exit transitions. Note how D_M is folded out. * M_M is 1 by definition * M_M-1 is sum of M_M-1->E and M_M-1->D_M->E, where D_M->E is 1 by definition * M_k is sum of M_k->E and M_k->D_k+1...D_M->E * Must be done in log space to avoid underflow errors. * A little sloppier than it probably has to be; historically, * doing in this in log space was in response to a bug report. */ hmm->esc[hmm->M] = 0; accum = 0.; for (k = hmm->M-1; k >= 1; k--) { tme = hmm->end[k] > 0. ? log(hmm->end[k]) : -9999.; if (accum > -9999.) { if (hmm->t[k][TMD] > 0.0) { if (viterbi_mode) tme = MAX(tme, accum + log(hmm->t[k][TMD])); else tme = LogSum(tme, accum + log(hmm->t[k][TMD])); } accum = (hmm->t[k][TDD] > 0.0) ? accum + log(hmm->t[k][TDD]) : -9999.; } /* convert from log_e to scaled integer log odds. */ hmm->esc[k] = (tme > -9999.) ? (int) floor(0.5 + INTSCALE * 1.44269504 * tme) : -INFTY; } /* special transitions */ hmm->xsc[XTN][LOOP] = Prob2Score(hmm->xt[XTN][LOOP], hmm->p1); hmm->xsc[XTN][MOVE] = Prob2Score(hmm->xt[XTN][MOVE], 1.0); hmm->xsc[XTE][LOOP] = Prob2Score(hmm->xt[XTE][LOOP], 1.0); hmm->xsc[XTE][MOVE] = Prob2Score(hmm->xt[XTE][MOVE], 1.0); hmm->xsc[XTC][LOOP] = Prob2Score(hmm->xt[XTC][LOOP], hmm->p1); hmm->xsc[XTC][MOVE] = Prob2Score(hmm->xt[XTC][MOVE], 1.-hmm->p1); hmm->xsc[XTJ][LOOP] = Prob2Score(hmm->xt[XTJ][LOOP], hmm->p1); hmm->xsc[XTJ][MOVE] = Prob2Score(hmm->xt[XTJ][MOVE], 1.0); hmm->flags |= PLAN7_HASBITS; /* raise the log-odds ready flag */}/* Function: Plan7Renormalize() * * Purpose: Take an HMM in counts form, and renormalize * all of its probability vectors. Also enforces * Plan7 restrictions on nonexistent transitions. * * Args: hmm - the model to renormalize. * * Return: (void) * hmm is changed. */ voidPlan7Renormalize(struct plan7_s *hmm){ int k; /* counter for model position */ int st; /* counter for special states */ float d; /* denominator */ /* match emissions */ for (k = 1; k <= hmm->M; k++) FNorm(hmm->mat[k], Alphabet_size); /* insert emissions */ for (k = 1; k < hmm->M; k++) FNorm(hmm->ins[k], Alphabet_size); /* begin transitions */ d = FSum(hmm->begin+1, hmm->M) + hmm->tbd1; FScale(hmm->begin+1, hmm->M, 1./d); hmm->tbd1 /= d; /* main model transitions */ for (k = 1; k < hmm->M; k++) { d = FSum(hmm->t[k], 3) + hmm->end[k]; FScale(hmm->t[k], 3, 1./d); hmm->end[k] /= d; FNorm(hmm->t[k]+3, 2); /* insert */ FNorm(hmm->t[k]+5, 2); /* delete */ } /* null model emissions */ FNorm(hmm->null, Alphabet_size); /* special transitions */ for (st = 0; st < 4; st++) FNorm(hmm->xt[st], 2); /* enforce nonexistent transitions */ /* (is this necessary?) */ hmm->t[0][TDM] = hmm->t[0][TDD] = 0.0; hmm->flags &= ~PLAN7_HASBITS; /* clear the log-odds ready flag */ hmm->flags |= PLAN7_HASPROB; /* set the probabilities OK flag */} /* Function: Plan7RenormalizeExits() * Date: SRE, Fri Aug 14 11:22:19 1998 [St. Louis] * * Purpose: Renormalize just the match state transitions; * for instance, after a Config() function has * modified the exit distribution. * * Args: hmm - hmm to renormalize * * Returns: void */voidPlan7RenormalizeExits(struct plan7_s *hmm){ int k; float d; for (k = 1; k < hmm->M; k++) { d = FSum(hmm->t[k], 3); FScale(hmm->t[k], 3, 1./(d + d*hmm->end[k])); }}/***************************************************************** * Plan7 configuration functions * The following few functions are the Plan7 equivalent of choosing * different alignment styles (fully local, fully global, global/local, * multihit, etc.) * * There is (at least) one constraint worth noting. * If you want per-domain scores to sum up to per-sequence scores, * then one of the following two sets of conditions must be met: * * 1) t(E->J) = 0 * e.g. no multidomain hits * * 2) t(N->N) = t(C->C) = t(J->J) = hmm->p1 * e.g. unmatching sequence scores zero, and * N->B first-model score is equal to J->B another-model score. * * These constraints are obeyed in the default Config() functions below, * but in the future (when HMM editing may be allowed) we'll have * to remember this. Non-equality of the summed domain scores and * the total sequence score is a really easy "red flag" for people to * notice and report as a bug, even if it may make probabilistic * sense not to meet either constraint for certain modeling problems. ***************************************************************** *//* Function: Plan7NakedConfig() * * Purpose: Set the alignment-independent, algorithm-dependent parameters * of a Plan7 model so that no special states (N,C,J) emit anything: * one simple, full global pass through the model. * * Args: hmm - the plan7 model * * Return: (void) * The HMM is modified; algorithm dependent parameters are set. * Previous scores are invalidated if they existed. */voidPlan7NakedConfig(struct plan7_s *hmm) { hmm->xt[XTN][MOVE] = 1.; /* disallow N-terminal tail */ hmm->xt[XTN][LOOP] = 0.; hmm->xt[XTE][MOVE] = 1.; /* only 1 domain/sequence ("global" alignment) */ hmm->xt[XTE][LOOP] = 0.; hmm->xt[XTC][MOVE] = 1.; /* disallow C-terminal tail */ hmm->xt[XTC][LOOP] = 0.; hmm->xt[XTJ][MOVE] = 0.; /* J state unused */ hmm->xt[XTJ][LOOP] = 1.; FSet(hmm->begin+2, hmm->M-1, 0.); /* disallow internal entries. */ hmm->begin[1] = 1. - hmm->tbd1; FSet(hmm->end+1, hmm->M-1, 0.); /* disallow internal exits. */ hmm->end[hmm->M] = 1.; Plan7RenormalizeExits(hmm); hmm->flags &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */} /* Function: Plan7GlobalConfig() * * Purpose: Set the alignment-independent, algorithm-dependent parameters * of a Plan7 model to global (Needleman/Wunsch) configuration. * * Like a non-looping hmmls, since we actually allow flanking * N and C terminal sequence. * * Args: hmm - the plan7 model * * Return: (void) * The HMM is modified; algorithm dependent parameters are set. * Previous scores are invalidated if they existed. */voidPlan7GlobalConfig(struct plan7_s *hmm) { hmm->xt[XTN][MOVE] = 1. - hmm->p1; /* allow N-terminal tail */ hmm->xt[XTN][LOOP] = hmm->p1; hmm->xt[XTE][MOVE] = 1.; /* only 1 domain/sequence ("global" alignment) */ hmm->xt[XTE][LOOP] = 0.; hmm->xt[XTC][MOVE] = 1. - hmm->p1; /* allow C-terminal tail */ hmm->xt[XTC][LOOP] = hmm->p1; hmm->xt[XTJ][MOVE] = 0.; /* J state unused */ hmm->xt[XTJ][LOOP] = 1.; FSet(hmm->begin+2, hmm->M-1, 0.); /* disallow internal entries. */ hmm->begin[1] = 1. - hmm->tbd1; FSet(hmm->end+1, hmm->M-1, 0.); /* disallow internal exits. */ hmm->end[hmm->M] = 1.; Plan7RenormalizeExits(hmm); hmm->flags &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */} /* Function: Plan7LSConfig() * * Purpose: Set the alignment independent parameters of a Plan7 model * to hmmls (global in HMM, local in sequence) configuration. * * Args: hmm - the plan7 model * * Return: (void); * the HMM probabilities are modified. */voidPlan7LSConfig(struct plan7_s *hmm){ hmm->xt[XTN][MOVE] = 1.-hmm->p1; /* allow N-terminal tail */ hmm->xt[XTN][LOOP] = hmm->p1; hmm->xt[XTE][MOVE] = 0.5; /* expectation 2 domains/seq */ hmm->xt[XTE][LOOP] = 0.5; hmm->xt[XTC][MOVE] = 1.-hmm->p1; /* allow C-terminal tail */ hmm->xt[XTC][LOOP] = hmm->p1; hmm->xt[XTJ][MOVE] = 1.-hmm->p1; /* allow J junction state */ hmm->xt[XTJ][LOOP] = hmm->p1; FSet(hmm->begin+2, hmm->M-1, 0.); /* start at M1/D1 */ hmm->begin[1] = 1. - hmm->tbd1; FSet(hmm->end+1, hmm->M-1, 0.); /* end at M_m/D_m */ hmm->end[hmm->M] = 1.; Plan7RenormalizeExits(hmm); hmm->flags &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -