📄 plan7.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
                             /* Function: Plan7SWConfig() *  * Purpose:  Set the alignment independent parameters of *           a Plan7 model to hmmsw (Smith/Waterman) configuration. *            * Notes:    entry/exit is asymmetric because of the left/right *           nature of the HMM/profile. Entry probability is distributed *           simply by assigning p_x = pentry / (M-1) to M-1  *           internal match states. However, the same approach doesn't *           lead to a flat distribution over exit points. Exit p's *           must be corrected for the probability of a previous exit *           from the model. Requiring a flat distribution over exit *           points leads to an easily solved piece of algebra, giving: *                      p_1 = pexit / (M-1) *                      p_x = p_1 / (1 - (x-1) p_1) *            * Args:     hmm    - the Plan7 model w/ data-dep prob's valid *           pentry - probability of an internal entry somewhere; *                    will be evenly distributed over M-1 match states *           pexit  - probability of an internal exit somewhere;  *                    will be distributed over M-1 match states. *                     * Return:   (void) *           HMM probabilities are modified. */voidPlan7SWConfig(struct plan7_s *hmm, float pentry, float pexit){  float basep;			/* p1 for exits: the base p */  int   k;			/* counter over states      */  /* Configure special states.   */  hmm->xt[XTN][MOVE] = 1-hmm->p1;    /* allow N-terminal tail */  hmm->xt[XTN][LOOP] = hmm->p1;  hmm->xt[XTE][MOVE] = 1.;	     /* disallow jump state   */  hmm->xt[XTE][LOOP] = 0.;  hmm->xt[XTC][MOVE] = 1-hmm->p1;    /* allow C-terminal tail */  hmm->xt[XTC][LOOP] = hmm->p1;  hmm->xt[XTJ][MOVE] = 1.;           /* J is unused */  hmm->xt[XTJ][LOOP] = 0.;  /* Configure entry.   */  hmm->begin[1] = (1. - pentry) * (1. - hmm->tbd1);  FSet(hmm->begin+2, hmm->M-1, (pentry * (1.- hmm->tbd1)) / (float)(hmm->M-1));    /* Configure exit.   */  hmm->end[hmm->M] = 1.0;  basep = pexit / (float) (hmm->M-1);  for (k = 1; k < hmm->M; k++)    hmm->end[k] = basep / (1. - basep * (float) (k-1));  Plan7RenormalizeExits(hmm);  hmm->flags       &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */}/* Function: Plan7FSConfig() * Date:     SRE, Fri Jan  2 15:34:40 1998 [StL] *  * Purpose:  Set the alignment independent parameters of *           a Plan7 model to hmmfs (multihit Smith/Waterman) configuration. *            *           See comments on Plan7SWConfig() for explanation of *           how pentry and pexit are used. *            * Args:     hmm    - the Plan7 model w/ data-dep prob's valid *           pentry - probability of an internal entry somewhere; *                    will be evenly distributed over M-1 match states *           pexit  - probability of an internal exit somewhere;  *                    will be distributed over M-1 match states. *                     * Return:   (void) *           HMM probabilities are modified. */voidPlan7FSConfig(struct plan7_s *hmm, float pentry, float pexit){  float basep;			/* p1 for exits: the base p */  int   k;			/* counter over states      */  /* Configure special states.   */  hmm->xt[XTN][MOVE] = 1-hmm->p1;    /* allow N-terminal tail     */  hmm->xt[XTN][LOOP] = hmm->p1;  hmm->xt[XTE][MOVE] = 0.5;	     /* allow loops / multihits   */  hmm->xt[XTE][LOOP] = 0.5;  hmm->xt[XTC][MOVE] = 1-hmm->p1;    /* allow C-terminal tail     */  hmm->xt[XTC][LOOP] = hmm->p1;  hmm->xt[XTJ][MOVE] = 1.-hmm->p1;   /* allow J junction between domains */  hmm->xt[XTJ][LOOP] = hmm->p1;  /* Configure entry.   */  hmm->begin[1] = (1. - pentry) * (1. - hmm->tbd1);  FSet(hmm->begin+2, hmm->M-1, (pentry * (1.-hmm->tbd1)) / (float)(hmm->M-1));    /* Configure exit.   */  hmm->end[hmm->M] = 1.0;  basep = pexit / (float) (hmm->M-1);  for (k = 1; k < hmm->M; k++)    hmm->end[k] = basep / (1. - basep * (float) (k-1));  Plan7RenormalizeExits(hmm);  hmm->flags       &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */}/* Function: Plan7ESTConfig() *  * Purpose:  Configure a Plan7 model for EST Smith/Waterman *           analysis. *            *           OUTDATED; DO NOT USE WITHOUT RECHECKING *            * Args:     hmm        - hmm to configure. *           aacode     - 0..63 vector mapping genetic code to amino acids *           estmodel   - 20x64 translation matrix, w/ codon bias and substitution error *           dna2       - probability of a -1 frameshift in a triplet *           dna4       - probability of a +1 frameshift in a triplet      */ voidPlan7ESTConfig(struct plan7_s *hmm, int *aacode, float **estmodel, 	       float dna2, float dna4){  int k;  int x;  float p;  float *tripnull;		/* UNFINISHED!!! */				/* configure specials */  hmm->xt[XTN][MOVE] = 1./351.;  hmm->xt[XTN][LOOP] = 350./351.;  hmm->xt[XTE][MOVE] = 1.;  hmm->xt[XTE][LOOP] = 0.;  hmm->xt[XTC][MOVE] = 1./351.;  hmm->xt[XTC][LOOP] = 350./351.;  hmm->xt[XTJ][MOVE] = 1.;  hmm->xt[XTJ][LOOP] = 0.;				/* configure entry/exit */  hmm->begin[1] = 0.5;  FSet(hmm->begin+2, hmm->M-1, 0.5 / ((float)hmm->M - 1.));  hmm->end[hmm->M] = 1.;  FSet(hmm->end, hmm->M-1, 0.5 / ((float)hmm->M - 1.));				/* configure dna triplet/frameshift emissions */  for (k = 1; k <= hmm->M; k++)    {				/* translate aa to triplet probabilities */      for (x = 0; x < 64; x++) {	p =  hmm->mat[k][aacode[x]] * estmodel[aacode[x]][x] * (1.-dna2-dna4);	hmm->dnam[x][k] = Prob2Score(p, tripnull[x]);	p = hmm->ins[k][aacode[x]] * estmodel[aacode[x]][x] * (1.-dna2-dna4);	hmm->dnai[x][k] = Prob2Score(p, tripnull[x]);      }      hmm->dnam[64][k] = 0;	/* ambiguous codons score 0 (danger?) */      hmm->dna2 = Prob2Score(dna2, 1.);      hmm->dna4 = Prob2Score(dna4, 1.);    }}	  /* Function: PrintPlan7Stats() *  * Purpose:  Given a newly constructed HMM and the tracebacks *           of the sequences it was trained on, print out all *           the interesting information at the end of hmmb  *           and hmmt runs that convinces the user we actually *           did something. *            * Args:     fp   - where to send the output (stdout, usually) *           hmm  - the new HMM, probability form *           dsq  - digitized training seqs *           nseq - number of dsq's *           tr   - array of tracebacks for dsq *                   * Return:   (void) */voidPrintPlan7Stats(FILE *fp, struct plan7_s *hmm, char **dsq, int nseq,		struct p7trace_s **tr){  int   idx;			/* counter for sequences                */  float score;			/* an individual trace score            */  float total, best, worst;	/* for the avg. and range of the scores */  float sqsum, stddev;		/* for the std. deviation of the scores */  P7Logoddsify(hmm, TRUE);	/* make sure model scores are ready */				/* find individual trace scores */  score = P7TraceScore(hmm, dsq[0], tr[0]);  total = best = worst = score;  sqsum = score * score;  for (idx = 1; idx < nseq; idx++) {    /* P7PrintTrace(stdout, tr[idx], hmm, dsq[idx]); */    score  = P7TraceScore(hmm, dsq[idx], tr[idx]);    total += score;    sqsum += score * score;    if (score > best)  best = score;    if (score < worst) worst = score;  }  if (nseq > 1) {    stddev = (sqsum - (total * total / (float) nseq)) / ((float) nseq - 1.);    stddev = (stddev > 0) ? sqrt(stddev) : 0.0;  } else stddev = 0.0;				/* print out stuff. */  fprintf(fp, "Average score:  %10.2f bits\n", total / (float) nseq);  fprintf(fp, "Minimum score:  %10.2f bits\n", worst);  fprintf(fp, "Maximum score:  %10.2f bits\n", best);  fprintf(fp, "Std. deviation: %10.2f bits\n", stddev);}/* Function: DegenerateSymbolScore() *  * Purpose:  Given a sequence character x and an hmm emission probability *           vector, calculate the log-odds (base 2) score of *           the symbol. *           *           Easy if x is in the emission alphabet, but not so easy *           is x is a degenerate symbol. The "correct" Bayesian *           philosophy is to calculate score(X) by summing over *           p(x) for all x in the degenerate symbol X to get P(X), *           doing the same sum over the prior to get F(X), and *           doing log_2 (P(X)/F(X)). This gives an X a zero score, *           for instance. *            *           Though this is correct in a formal Bayesian sense -- *           we have no information on the sequence, so we can't *           say if it's random or model, so it scores zero -- *           it sucks, big time, for scoring biological sequences. *           Sequences with lots of X's score near zero, while *           real sequences have average scores that are negative -- *           so the X-laden sequences appear to be lifted out *           of the noise of a full histogram of a database search. *           Correct or not, this is highly undesirable. *            *           So therefore we calculated the expected score of *           the degenerate symbol by summing over all x in X: *                 e_x log_2 (p(x)/f(x)) *           where the expectation of x, e_x, is calculated from *           the random model. * *           Empirically, this works; it also has a wooly hand-waving *           probabilistic justification that I'm happy enough about. *            * Args:     p      - probabilities of normal symbols *           null   - null emission model *           ambig  - index of the degenerate character in Alphabet[] *                     * Return:   the integer log odds score of x given the emission *           vector and the null model, scaled up by INTSCALE.               */int DegenerateSymbolScore(float *p, float *null, int ambig){  int x;  float numer = 0.;  float denom = 0.;  for (x = 0; x < Alphabet_size; x++) {    if (Degenerate[ambig][x]) {      numer += null[x] * sreLOG2(p[x] / null[x]);      denom += null[x];    }  }  return (int) (INTSCALE * numer / denom);}/***************************************************************** *  * Plan9/Plan7 interface *  * Very important code during the evolutionary takeover by Plan7 -- * convert between Krogh/Haussler and Plan7 models. *****************************************************************//* Function: Plan9toPlan7() *  * Purpose:  Convert an old HMM into Plan7. Configures it in *           ls mode. *            * Args:     hmm       - old ugly plan9 style HMM *           ret_plan7 - new wonderful Plan7 HMM *            * Return:   (void)     *           Plan7 HMM is allocated here. Free w/ FreePlan7(). */               voidPlan9toPlan7(struct plan9_s *hmm, struct plan7_s **ret_plan7){  struct plan7_s *plan7;  int k, x;  plan7 = AllocPlan7(hmm->M);    for (k = 1; k < hmm->M; k++)    {      plan7->t[k][TMM] = hmm->mat[k].t[MATCH];      plan7->t[k][TMD] = hmm->mat[k].t[DELETE];      plan7->t[k][TMI] = hmm->mat[k].t[INSERT];      plan7->t[k][TDM] = hmm->del[k].t[MATCH];      plan7->t[k][TDD] = hmm->del[k].t[DELETE];      plan7->t[k][TIM] = hmm->ins[k].t[MATCH];      plan7->t[k][TII] = hmm->ins[k].t[INSERT];    }  for (k = 1; k <= hmm->M; k++)    for (x = 0; x < Alphabet_size; x++)      plan7->mat[k][x] = hmm->mat[k].p[x];  for (k = 1; k < hmm->M; k++)    for (x = 0; x < Alphabet_size; x++)      plan7->ins[k][x] = hmm->ins[k].p[x];  plan7->tbd1 = hmm->mat[0].t[DELETE] / (hmm->mat[0].t[DELETE] + hmm->mat[0].t[MATCH]);  		/* We have to make up the null transition p1; use default */  P7DefaultNullModel(plan7->null, &(plan7->p1));  for (x = 0; x < Alphabet_size; x++)    plan7->null[x] = hmm->null[x];        if (hmm->name != NULL)     Plan7SetName(plan7, hmm->name);  if (hmm->flags & HMM_REF) {    strcpy(plan7->rf, hmm->ref);    plan7->flags |= PLAN7_RF;  }  if (hmm->flags & HMM_CS) {    strcpy(plan7->cs, hmm->cs);    plan7->flags |= PLAN7_CS;  }  Plan7LSConfig(plan7);		/* configure specials for ls-style alignment */  Plan7Renormalize(plan7);	/* mainly to correct for missing ID and DI */  plan7->flags |= PLAN7_HASPROB;	/* probabilities are valid */  plan7->flags &= ~PLAN7_HASBITS;	/* scores are not valid    */  *ret_plan7 = plan7;}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -