⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trace.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 3 页
字号:
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. ************************************************************//* trace.c * SRE, Sat Nov 16 12:34:57 1996 * RCS $Id: trace.c,v 1.15 1999/12/04 22:58:56 eddy Exp $ *  * Support for Plan 7 traceback data structure, p7trace_s. */#include <stdio.h>#include <string.h>#include <ctype.h>#include "structs.h"#include "config.h"#include "squid.h"#include "funcs.h"#include "version.h"#ifdef MEMDEBUG#include "dbmalloc.h"#endifstatic void rightjustify(char *s, int n);/* Function: P7AllocTrace(), P7ReallocTrace(), P7FreeTrace() *  * Purpose:  allocation and freeing of traceback structures */voidP7AllocTrace(int tlen, struct p7trace_s **ret_tr){  struct p7trace_s *tr;    tr =            MallocOrDie (sizeof(struct p7trace_s));  tr->statetype = MallocOrDie (sizeof(char) * tlen);  tr->nodeidx   = MallocOrDie (sizeof(int)  * tlen);  tr->pos       = MallocOrDie (sizeof(int)  * tlen);  *ret_tr = tr;}voidP7ReallocTrace(struct p7trace_s *tr, int tlen){  tr->statetype = ReallocOrDie (tr->statetype, tlen * sizeof(char));  tr->nodeidx   = ReallocOrDie (tr->nodeidx,   tlen * sizeof(int));  tr->pos       = ReallocOrDie (tr->pos,       tlen * sizeof(int));}void P7FreeTrace(struct p7trace_s *tr){  free(tr->pos);  free(tr->nodeidx);  free(tr->statetype);  free(tr);}/* Function: TraceSet() * Date:     SRE, Sun Mar  8 12:39:00 1998 [St. Louis] * * Purpose:  Convenience function; set values at position tpos *           in a trace.  *            * * Args:     tr   - trace object to write to *           tpos - ptr to position in trace to set *           type - statetype e.g. STS, etc. *           idx  - nodeidx 1..M or 0 *           pos  - seq position 1..L or 0 * * Returns:  void */voidTraceSet(struct p7trace_s *tr, int tpos, char type, int idx, int pos){  tr->statetype[tpos] = type;  tr->nodeidx[tpos]   = idx;  tr->pos[tpos]       = pos;}/* Function: MergeTraceArrays() * Date:     SRE, Sun Jul  5 15:09:10 1998 [St. Louis] * * Purpose:  Combine two arrays of traces into a single array. *           Used in hmmalign to merge traces from a fixed alignment *           with traces from individual unaligned seqs. *  *           t1 traces always precede t2 traces in the resulting array. * * Args:     t1 - first set of traces *           n1 - number of traces in t1 *           t2 - second set of traces *           n2 - number of traces in t2 * * Returns:  pointer to new array of traces. *           Both t1 and t2 are free'd here! Do not reuse. */struct p7trace_s **MergeTraceArrays(struct p7trace_s **t1, int n1, struct p7trace_s **t2, int n2){  struct p7trace_s **tr;  int i;			/* index in traces */  tr = MallocOrDie(sizeof(struct p7trace_s *) * (n1+n2));  for (i = 0; i < n1; i++) tr[i]    = t1[i];  for (i = 0; i < n2; i++) tr[n1+i] = t2[i];  free(t1);  free(t2);  return tr;}/* Function: P7ReverseTrace() * Date:     SRE, Mon Aug 25 12:57:29 1997; Denver CO.  *  * Purpose:  Reverse the arrays in a traceback structure. *           Tracebacks from Forward() and Viterbi() are *           collected backwards, and call this function *           when they're done. *            *           It's possible to reverse the arrays in place *           more efficiently; but the realloc/copy strategy *           has the advantage of reallocating the trace *           into the right size of memory. (Tracebacks *           overallocate.) *            * Args:     tr - the traceback to reverse. tr->tlen must be set. *                 * Return:   (void) *           tr is modified. */                voidP7ReverseTrace(struct p7trace_s *tr){  char  *statetype;  int   *nodeidx;  int   *pos;  int    opos, npos;  /* Allocate   */  statetype = MallocOrDie (sizeof(char)* tr->tlen);  nodeidx   = MallocOrDie (sizeof(int) * tr->tlen);  pos       = MallocOrDie (sizeof(int) * tr->tlen);    /* Reverse the trace.   */  for (opos = tr->tlen-1, npos = 0; npos < tr->tlen; npos++, opos--)    {      statetype[npos] = tr->statetype[opos];      nodeidx[npos]   = tr->nodeidx[opos];      pos[npos]       = tr->pos[opos];    }  /* Swap old, new arrays.   */  free(tr->statetype);  free(tr->nodeidx);  free(tr->pos);  tr->statetype = statetype;  tr->nodeidx   = nodeidx;  tr->pos       = pos;}/* Function: P7TraceCount() *  * Purpose:  Count a traceback into a count-based HMM structure. *           (Usually as part of a model parameter re-estimation.) *            * Args:     hmm   - counts-based HMM *           dsq   - digitized sequence that traceback aligns to the HMM (1..L) *           wt    - weight on the sequence *           tr    - alignment of seq to HMM *            * Return:   (void) */voidP7TraceCount(struct plan7_s *hmm, char *dsq, float wt, struct p7trace_s *tr){  int tpos;                     /* position in tr */  int i;			/* symbol position in seq */    for (tpos = 0; tpos < tr->tlen; tpos++)    {      i = tr->pos[tpos];      /* Emission counts.        * Don't bother counting null states N,J,C.       */      if (tr->statetype[tpos] == STM) 	P7CountSymbol(hmm->mat[tr->nodeidx[tpos]], dsq[i], wt);      else if (tr->statetype[tpos] == STI) 	P7CountSymbol(hmm->ins[tr->nodeidx[tpos]], dsq[i], wt);      /* State transition counts       */      switch (tr->statetype[tpos]) {      case STS:	break;			/* don't bother; P=1 */      case STN:	switch (tr->statetype[tpos+1]) {	case STB: hmm->xt[XTN][MOVE] += wt; break;	case STN: hmm->xt[XTN][LOOP] += wt; break;	default:	  Die("illegal state transition %s->%s in traceback", 	      Statetype(tr->statetype[tpos]),	      Statetype(tr->statetype[tpos+1]));	}	break;      case STB:	switch (tr->statetype[tpos+1]) {	case STM: hmm->begin[tr->nodeidx[tpos+1]] += wt; break;	case STD: hmm->tbd1 += wt;                       break;	default:      	  Die("illegal state transition %s->%s in traceback", 	      Statetype(tr->statetype[tpos]),	      Statetype(tr->statetype[tpos+1]));	}	break;      case STM:	switch (tr->statetype[tpos+1]) {	case STM: hmm->t[tr->nodeidx[tpos]][TMM] += wt; break;	case STI: hmm->t[tr->nodeidx[tpos]][TMI] += wt; break;	case STD: hmm->t[tr->nodeidx[tpos]][TMD] += wt; break;	case STE: hmm->end[tr->nodeidx[tpos]]    += wt; break;	default:    	  Die("illegal state transition %s->%s in traceback", 	      Statetype(tr->statetype[tpos]),	      Statetype(tr->statetype[tpos+1]));	}	break;      case STI:	switch (tr->statetype[tpos+1]) {	case STM: hmm->t[tr->nodeidx[tpos]][TIM] += wt; break;	case STI: hmm->t[tr->nodeidx[tpos]][TII] += wt; break;	default:    	  Die("illegal state transition %s->%s in traceback", 	      Statetype(tr->statetype[tpos]),	      Statetype(tr->statetype[tpos+1]));	}	break;      case STD:	switch (tr->statetype[tpos+1]) {	case STM: hmm->t[tr->nodeidx[tpos]][TDM] += wt; break;	case STD: hmm->t[tr->nodeidx[tpos]][TDD] += wt; break;	case STE: /* ignore; p(D->E) = 1.0 */           break;	default: 	  Die("illegal state transition %s->%s in traceback", 	      Statetype(tr->statetype[tpos]),	      Statetype(tr->statetype[tpos+1]));	}	break;      case STE:	switch (tr->statetype[tpos+1]) {	case STC: hmm->xt[XTE][MOVE] += wt; break;	case STJ: hmm->xt[XTE][LOOP] += wt; break;	default:     	  Die("illegal state transition %s->%s in traceback", 	      Statetype(tr->statetype[tpos]),	      Statetype(tr->statetype[tpos+1]));	}	break;     case STJ:	switch (tr->statetype[tpos+1]) {	case STB: hmm->xt[XTJ][MOVE] += wt; break;	case STJ: hmm->xt[XTJ][LOOP] += wt; break;	default:     	  Die("illegal state transition %s->%s in traceback", 	      Statetype(tr->statetype[tpos]),	      Statetype(tr->statetype[tpos+1]));	}	break;      case STC:	switch (tr->statetype[tpos+1]) {	case STT: hmm->xt[XTC][MOVE] += wt; break;	case STC: hmm->xt[XTC][LOOP] += wt; break;	default:     	 Die("illegal state transition %s->%s in traceback", 	     Statetype(tr->statetype[tpos]),	     Statetype(tr->statetype[tpos+1])); 	}	break;      case STT:	break;			/* T is the last. It makes no transitions. */      default:	Die("illegal state %s in traceback", 	    Statetype(tr->statetype[tpos]));      }    }}/* Function: P7TraceScore() *  * Purpose:  Score a traceback and return the score in scaled bits. *            * Args:     hmm   - HMM with valid log odds scores. *           dsq   - digitized sequence that traceback aligns to the HMM (1..L) *           tr    - alignment of seq to HMM *            * Return:   (void) */floatP7TraceScore(struct plan7_s *hmm, char *dsq, struct p7trace_s *tr){  int score;			/* total score as a scaled integer */  int tpos;                     /* position in tr */  int sym;			/* digitized symbol in dsq */    /*  P7PrintTrace(stdout, tr, hmm, dsq); */  score = 0;  for (tpos = 0; tpos < tr->tlen-1; tpos++)    {      sym = (int) dsq[tr->pos[tpos]];      /* Emissions.       * Don't bother counting null states N,J,C.       */      if (tr->statetype[tpos] == STM) 	score += hmm->msc[sym][tr->nodeidx[tpos]];      else if (tr->statetype[tpos] == STI) 	score += hmm->isc[sym][tr->nodeidx[tpos]];      /* State transitions.       */      score += TransitionScoreLookup(hmm, 			     tr->statetype[tpos], tr->nodeidx[tpos],			     tr->statetype[tpos+1], tr->nodeidx[tpos+1]);    }  return Scorify(score);}/* Function: P7Traces2Alignment() *  * Purpose:  Convert an array of traceback structures for a set *           of sequences into a new multiple alignment.  *            *           Insertions are put into lower case and  *           are not aligned; instead, Nterm is right-justified, *           Cterm is left-justified, and internal insertions *           are split in half and the halves are justified in *           each direction (the objective being to increase *           the chances of getting insertions aligned well enough *           for them to become a match). SAM gap char conventions *           are used: - in match columns, . in insert columns *  * NOTE:     Does not recognize J state. * * Args:     dsq        - digitized unaligned sequences  *           sqinfo     - array of info about the sequences *           wgt        - weights on seqs *           nseq       - number of sequences *           mlen       - length of model (number of match states) *           tr         - array of tracebacks *           matchonly  - TRUE if we don't print insert-generated symbols at all * Return:   MSA structure; NULL on failure. *           Caller responsible for freeing msa with MSAFree(msa); */          MSA *P7Traces2Alignment(char **dsq, SQINFO *sqinfo, float *wgt, int nseq, int mlen, 		   struct p7trace_s **tr, int matchonly) {  MSA   *msa;                   /* RETURN: new alignment */  int    idx;                   /* counter for sequences */  int    alen;                  /* width of alignment */  int   *inserts;               /* array of max gaps between aligned columns */  int   *matmap;                /* matmap[k] = apos of match k [1..M] */  int    nins;                  /* counter for inserts */  int    apos;                  /* position in aligned sequence (0..alen-1)*/  int    rpos;                  /* position in raw digital sequence (1..L)*/  int    tpos;                  /* position counter in traceback */  int    statetype;		/* type of current state, e.g. STM */  int    k;                     /* counter over states in model */  /* Here's the problem. We want to align the match states in columns,   * but some sequences have inserted symbols in them; we need some   * sort of overall knowledge of where the inserts are and how long   * they are in order to create the alignment.   *    * Here's our trick. inserts[] is a 0..hmm->M array; inserts[i] stores   * the maximum number of times insert substate i was used. This   * is the maximum number of gaps to insert between canonical    * column i and i+1.  inserts[0] is the N-term tail; inserts[M] is   * the C-term tail.   *    * Remember that N and C emit on transition, hence the check for an   * N->N or C->C transition before bumping nins.    */  inserts = (int *) MallocOrDie (sizeof(int) * (mlen+1));

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -