📄 plan7.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 3 页
字号:
 *         S/W B->M and M->E transitions are folded together with *         data-dependent B->D...D->M and M->D...D->E paths. *          *         This process is referred to in the code as "wing folding" *         or "wing retraction"... the analogy is to a swept-wing *         fighter in landing vs. high speed flight configuration. *          *    Note on Viterbi vs. forward flag:      *         Wing retraction must take forward vs. Viterbi *         into account. If forward, sum two paths; if Viterbi, take *         max. I tried to slide this by as a sum, without *         the flag, but Alex detected it as a bug, because you can *         then find cases where the Viterbi score doesn't match *         the P7TraceScore(). *              * Args:      hmm          - the hmm to calculate scores in. *            viterbi_mode - TRUE to fold wings in Viterbi configuration. *                   * Return:    (void) *            hmm scores are filled in. */  voidP7Logoddsify(struct plan7_s *hmm, int viterbi_mode){  int k;			/* counter for model position */  int x;			/* counter for symbols        */  float accum;  float tbm, tme;  if (hmm->flags & PLAN7_HASBITS) return;  /* Symbol emission scores   */  for (k = 1; k <= hmm->M; k++)     {				/* match/insert emissions in main model */      for (x = 0; x < Alphabet_size; x++) 	{	  hmm->msc[x][k] = Prob2Score(hmm->mat[k][x], hmm->null[x]);	  if (k < hmm->M) 	    hmm->isc[x][k] =  Prob2Score(hmm->ins[k][x], hmm->null[x]); 	}				/* degenerate match/insert emissions */      for (x = Alphabet_size; x < Alphabet_iupac; x++) 	{	  hmm->msc[x][k] = DegenerateSymbolScore(hmm->mat[k], hmm->null, x);	  if (k < hmm->M)	    hmm->isc[x][k] = DegenerateSymbolScore(hmm->ins[k], hmm->null, x);	}    }  /* State transitions.   *    * A note on "folding" of D_1 and D_M.   * These two delete states are folded out of search form models   * in order to prevent null cycles in the dynamic programming   * algorithms (see code below). However, we use their log transitions   * when we save the model! So the following log transition probs   * are used *only* in save files, *never* in search algorithms:   *    log (tbd1), D1 -> M2, D1 -> D2   *    Mm-1 -> Dm, Dm-1 -> Dm   *       * In a search algorithm, these have to be interpreted as -INFTY       * because their contributions are folded into bsc[] and esc[]   * entry/exit scores. They can't be set to -INFTY here because   * we need them in save files.   */  for (k = 1; k < hmm->M; k++)    {      hmm->tsc[k][TMM] = Prob2Score(hmm->t[k][TMM], hmm->p1);      hmm->tsc[k][TMI] = Prob2Score(hmm->t[k][TMI], hmm->p1);      hmm->tsc[k][TMD] = Prob2Score(hmm->t[k][TMD], 1.0);      hmm->tsc[k][TIM] = Prob2Score(hmm->t[k][TIM], hmm->p1);      hmm->tsc[k][TII] = Prob2Score(hmm->t[k][TII], hmm->p1);      hmm->tsc[k][TDM] = Prob2Score(hmm->t[k][TDM], hmm->p1);      hmm->tsc[k][TDD] = Prob2Score(hmm->t[k][TDD], 1.0);    }  /* B->M entry transitions. Note how D_1 is folded out.   * M1 is just B->M1   * M2 is sum (or max) of B->M2 and B->D1->M2   * M_k is sum (or max) of B->M_k and B->D1...D_k-1->M_k   * These have to be done in log space, else you'll get   * underflow errors; and we also have to watch for log(0).   * A little sloppier than it probably has to be; historically,   * doing in this in log space was in response to a bug report.   */  accum = hmm->tbd1 > 0.0 ? log(hmm->tbd1) : -9999.;  for (k = 1; k <= hmm->M; k++)    {      tbm = hmm->begin[k] > 0. ? log(hmm->begin[k]) : -9999.;	/* B->M_k part */      /* B->D1...D_k-1->M_k part we get from accum*/      if (k > 1 && accum > -9999.) 	{		  if (hmm->t[k-1][TDM] > 0.0)	    {	      if (viterbi_mode) tbm =  MAX(tbm, accum + log(hmm->t[k-1][TDM]));	      else              tbm =  LogSum(tbm, accum + log(hmm->t[k-1][TDM]));	    }	  accum = (hmm->t[k-1][TDD] > 0.0) ? accum + log(hmm->t[k-1][TDD]) : -9999.;	}				/* Convert from log_e to scaled integer log_2 odds. */      if (tbm > -9999.) 	hmm->bsc[k] = (int) floor(0.5 + INTSCALE * 1.44269504 * (tbm - log(hmm->p1)));      else	hmm->bsc[k] = -INFTY;    }  /* M->E exit transitions. Note how D_M is folded out.   * M_M is 1 by definition   * M_M-1 is sum of M_M-1->E and M_M-1->D_M->E, where D_M->E is 1 by definition   * M_k is sum of M_k->E and M_k->D_k+1...D_M->E   * Must be done in log space to avoid underflow errors.   * A little sloppier than it probably has to be; historically,   * doing in this in log space was in response to a bug report.   */  hmm->esc[hmm->M] = 0;  accum = 0.;  for (k = hmm->M-1; k >= 1; k--)    {      tme = hmm->end[k] > 0. ? log(hmm->end[k]) : -9999.;      if (accum > -9999.)	{	  if (hmm->t[k][TMD] > 0.0)	    {		      if (viterbi_mode) tme = MAX(tme, accum + log(hmm->t[k][TMD]));	      else              tme = LogSum(tme, accum + log(hmm->t[k][TMD]));	    }	  accum = (hmm->t[k][TDD] > 0.0) ? accum + log(hmm->t[k][TDD]) : -9999.;	}				/* convert from log_e to scaled integer log odds. */      hmm->esc[k] = (tme > -9999.) ? (int) floor(0.5 + INTSCALE * 1.44269504 * tme) : -INFTY;    }				/* special transitions */  hmm->xsc[XTN][LOOP] = Prob2Score(hmm->xt[XTN][LOOP], hmm->p1);  hmm->xsc[XTN][MOVE] = Prob2Score(hmm->xt[XTN][MOVE], 1.0);  hmm->xsc[XTE][LOOP] = Prob2Score(hmm->xt[XTE][LOOP], 1.0);  hmm->xsc[XTE][MOVE] = Prob2Score(hmm->xt[XTE][MOVE], 1.0);  hmm->xsc[XTC][LOOP] = Prob2Score(hmm->xt[XTC][LOOP], hmm->p1);  hmm->xsc[XTC][MOVE] = Prob2Score(hmm->xt[XTC][MOVE], 1.-hmm->p1);  hmm->xsc[XTJ][LOOP] = Prob2Score(hmm->xt[XTJ][LOOP], hmm->p1);  hmm->xsc[XTJ][MOVE] = Prob2Score(hmm->xt[XTJ][MOVE], 1.0);  hmm->flags |= PLAN7_HASBITS;	/* raise the log-odds ready flag */}/* Function: Plan7Renormalize() *  * Purpose:  Take an HMM in counts form, and renormalize *           all of its probability vectors. Also enforces *           Plan7 restrictions on nonexistent transitions. *            * Args:     hmm - the model to renormalize. *                  * Return:   (void) *           hmm is changed. */                          voidPlan7Renormalize(struct plan7_s *hmm){  int   k;			/* counter for model position */  int   st;			/* counter for special states */  float d;			/* denominator */				/* match emissions */  for (k = 1; k <= hmm->M; k++)     FNorm(hmm->mat[k], Alphabet_size);				/* insert emissions */  for (k = 1; k < hmm->M; k++)    FNorm(hmm->ins[k], Alphabet_size);				/* begin transitions */  d = FSum(hmm->begin+1, hmm->M) + hmm->tbd1;  FScale(hmm->begin+1, hmm->M, 1./d);  hmm->tbd1 /= d;				/* main model transitions */  for (k = 1; k < hmm->M; k++)    {      d = FSum(hmm->t[k], 3) + hmm->end[k];       FScale(hmm->t[k], 3, 1./d);      hmm->end[k] /= d;      FNorm(hmm->t[k]+3, 2);	/* insert */      FNorm(hmm->t[k]+5, 2);	/* delete */    }				/* null model emissions */  FNorm(hmm->null, Alphabet_size);				/* special transitions  */  for (st = 0; st < 4; st++)    FNorm(hmm->xt[st], 2);				/* enforce nonexistent transitions */				/* (is this necessary?) */  hmm->t[0][TDM] = hmm->t[0][TDD] = 0.0;  hmm->flags &= ~PLAN7_HASBITS;	/* clear the log-odds ready flag */  hmm->flags |= PLAN7_HASPROB;	/* set the probabilities OK flag */}  /* Function: Plan7RenormalizeExits() * Date:     SRE, Fri Aug 14 11:22:19 1998 [St. Louis] * * Purpose:  Renormalize just the match state transitions; *           for instance, after a Config() function has *           modified the exit distribution. * * Args:     hmm - hmm to renormalize * * Returns:  void */voidPlan7RenormalizeExits(struct plan7_s *hmm){  int   k;  float d;  for (k = 1; k < hmm->M; k++)    {      d = FSum(hmm->t[k], 3);      FScale(hmm->t[k], 3, 1./(d + d*hmm->end[k]));    }}/***************************************************************** * Plan7 configuration functions * The following few functions are the Plan7 equivalent of choosing * different alignment styles (fully local, fully global, global/local, * multihit, etc.) *  * There is (at least) one constraint worth noting. * If you want per-domain scores to sum up to per-sequence scores, * then one of the following two sets of conditions must be met: *    *   1) t(E->J) = 0     *      e.g. no multidomain hits *       *   2) t(N->N) = t(C->C) = t(J->J) = hmm->p1  *      e.g. unmatching sequence scores zero, and  *      N->B first-model score is equal to J->B another-model score. *       * These constraints are obeyed in the default Config() functions below, * but in the future (when HMM editing may be allowed) we'll have * to remember this. Non-equality of the summed domain scores and * the total sequence score is a really easy "red flag" for people to * notice and report as a bug, even if it may make probabilistic * sense not to meet either constraint for certain modeling problems. ***************************************************************** *//* Function: Plan7NakedConfig() *  * Purpose:  Set the alignment-independent, algorithm-dependent parameters *           of a Plan7 model so that no special states (N,C,J) emit anything: *           one simple, full global pass through the model. *  * Args:     hmm - the plan7 model *                  * Return:   (void) *           The HMM is modified; algorithm dependent parameters are set. *           Previous scores are invalidated if they existed. */voidPlan7NakedConfig(struct plan7_s *hmm)                           {  hmm->xt[XTN][MOVE] = 1.;	      /* disallow N-terminal tail */  hmm->xt[XTN][LOOP] = 0.;  hmm->xt[XTE][MOVE] = 1.;	      /* only 1 domain/sequence ("global" alignment) */  hmm->xt[XTE][LOOP] = 0.;  hmm->xt[XTC][MOVE] = 1.;	      /* disallow C-terminal tail */  hmm->xt[XTC][LOOP] = 0.;  hmm->xt[XTJ][MOVE] = 0.;	      /* J state unused */  hmm->xt[XTJ][LOOP] = 1.;  FSet(hmm->begin+2, hmm->M-1, 0.);   /* disallow internal entries. */  hmm->begin[1]    = 1. - hmm->tbd1;  FSet(hmm->end+1,   hmm->M-1, 0.);   /* disallow internal exits. */  hmm->end[hmm->M] = 1.;  Plan7RenormalizeExits(hmm);  hmm->flags       &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */}   /* Function: Plan7GlobalConfig() *  * Purpose:  Set the alignment-independent, algorithm-dependent parameters *           of a Plan7 model to global (Needleman/Wunsch) configuration. *  *           Like a non-looping hmmls, since we actually allow flanking *           N and C terminal sequence.  *            * Args:     hmm - the plan7 model *                  * Return:   (void) *           The HMM is modified; algorithm dependent parameters are set. *           Previous scores are invalidated if they existed. */voidPlan7GlobalConfig(struct plan7_s *hmm)                           {  hmm->xt[XTN][MOVE] = 1. - hmm->p1;  /* allow N-terminal tail */  hmm->xt[XTN][LOOP] = hmm->p1;  hmm->xt[XTE][MOVE] = 1.;	      /* only 1 domain/sequence ("global" alignment) */  hmm->xt[XTE][LOOP] = 0.;  hmm->xt[XTC][MOVE] = 1. - hmm->p1;  /* allow C-terminal tail */  hmm->xt[XTC][LOOP] = hmm->p1;  hmm->xt[XTJ][MOVE] = 0.;	      /* J state unused */  hmm->xt[XTJ][LOOP] = 1.;  FSet(hmm->begin+2, hmm->M-1, 0.);   /* disallow internal entries. */  hmm->begin[1]    = 1. - hmm->tbd1;  FSet(hmm->end+1,   hmm->M-1, 0.);   /* disallow internal exits. */  hmm->end[hmm->M] = 1.;  Plan7RenormalizeExits(hmm);  hmm->flags       &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */}   /* Function: Plan7LSConfig() *  * Purpose:  Set the alignment independent parameters of a Plan7 model *           to hmmls (global in HMM, local in sequence) configuration. *            * Args:     hmm  - the plan7 model *                  * Return:   (void); *           the HMM probabilities are modified. */voidPlan7LSConfig(struct plan7_s *hmm){  hmm->xt[XTN][MOVE] = 1.-hmm->p1;    /* allow N-terminal tail */  hmm->xt[XTN][LOOP] = hmm->p1;  hmm->xt[XTE][MOVE] = 0.5;	     /* expectation 2 domains/seq */  hmm->xt[XTE][LOOP] = 0.5;  hmm->xt[XTC][MOVE] = 1.-hmm->p1;    /* allow C-terminal tail */  hmm->xt[XTC][LOOP] = hmm->p1;  hmm->xt[XTJ][MOVE] = 1.-hmm->p1;   /* allow J junction state */  hmm->xt[XTJ][LOOP] = hmm->p1;  FSet(hmm->begin+2, hmm->M-1, 0.);  /* start at M1/D1 */  hmm->begin[1]    = 1. - hmm->tbd1;  FSet(hmm->end+1,   hmm->M-1, 0.);  /* end at M_m/D_m */  hmm->end[hmm->M] = 1.;  Plan7RenormalizeExits(hmm);  hmm->flags       &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -