📄 trace.c
字号:
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved * * This source code is distributed under the terms of the * GNU General Public License. See the files COPYING and LICENSE * for details. ************************************************************//* trace.c * SRE, Sat Nov 16 12:34:57 1996 * RCS $Id: trace.c,v 1.15 1999/12/04 22:58:56 eddy Exp $ * * Support for Plan 7 traceback data structure, p7trace_s. */#include <stdio.h>#include <string.h>#include <ctype.h>#include "structs.h"#include "config.h"#include "squid.h"#include "funcs.h"#include "version.h"#ifdef MEMDEBUG#include "dbmalloc.h"#endifstatic void rightjustify(char *s, int n);/* Function: P7AllocTrace(), P7ReallocTrace(), P7FreeTrace() * * Purpose: allocation and freeing of traceback structures */voidP7AllocTrace(int tlen, struct p7trace_s **ret_tr){ struct p7trace_s *tr; tr = MallocOrDie (sizeof(struct p7trace_s)); tr->statetype = MallocOrDie (sizeof(char) * tlen); tr->nodeidx = MallocOrDie (sizeof(int) * tlen); tr->pos = MallocOrDie (sizeof(int) * tlen); *ret_tr = tr;}voidP7ReallocTrace(struct p7trace_s *tr, int tlen){ tr->statetype = ReallocOrDie (tr->statetype, tlen * sizeof(char)); tr->nodeidx = ReallocOrDie (tr->nodeidx, tlen * sizeof(int)); tr->pos = ReallocOrDie (tr->pos, tlen * sizeof(int));}void P7FreeTrace(struct p7trace_s *tr){ free(tr->pos); free(tr->nodeidx); free(tr->statetype); free(tr);}/* Function: TraceSet() * Date: SRE, Sun Mar 8 12:39:00 1998 [St. Louis] * * Purpose: Convenience function; set values at position tpos * in a trace. * * * Args: tr - trace object to write to * tpos - ptr to position in trace to set * type - statetype e.g. STS, etc. * idx - nodeidx 1..M or 0 * pos - seq position 1..L or 0 * * Returns: void */voidTraceSet(struct p7trace_s *tr, int tpos, char type, int idx, int pos){ tr->statetype[tpos] = type; tr->nodeidx[tpos] = idx; tr->pos[tpos] = pos;}/* Function: MergeTraceArrays() * Date: SRE, Sun Jul 5 15:09:10 1998 [St. Louis] * * Purpose: Combine two arrays of traces into a single array. * Used in hmmalign to merge traces from a fixed alignment * with traces from individual unaligned seqs. * * t1 traces always precede t2 traces in the resulting array. * * Args: t1 - first set of traces * n1 - number of traces in t1 * t2 - second set of traces * n2 - number of traces in t2 * * Returns: pointer to new array of traces. * Both t1 and t2 are free'd here! Do not reuse. */struct p7trace_s **MergeTraceArrays(struct p7trace_s **t1, int n1, struct p7trace_s **t2, int n2){ struct p7trace_s **tr; int i; /* index in traces */ tr = MallocOrDie(sizeof(struct p7trace_s *) * (n1+n2)); for (i = 0; i < n1; i++) tr[i] = t1[i]; for (i = 0; i < n2; i++) tr[n1+i] = t2[i]; free(t1); free(t2); return tr;}/* Function: P7ReverseTrace() * Date: SRE, Mon Aug 25 12:57:29 1997; Denver CO. * * Purpose: Reverse the arrays in a traceback structure. * Tracebacks from Forward() and Viterbi() are * collected backwards, and call this function * when they're done. * * It's possible to reverse the arrays in place * more efficiently; but the realloc/copy strategy * has the advantage of reallocating the trace * into the right size of memory. (Tracebacks * overallocate.) * * Args: tr - the traceback to reverse. tr->tlen must be set. * * Return: (void) * tr is modified. */ voidP7ReverseTrace(struct p7trace_s *tr){ char *statetype; int *nodeidx; int *pos; int opos, npos; /* Allocate */ statetype = MallocOrDie (sizeof(char)* tr->tlen); nodeidx = MallocOrDie (sizeof(int) * tr->tlen); pos = MallocOrDie (sizeof(int) * tr->tlen); /* Reverse the trace. */ for (opos = tr->tlen-1, npos = 0; npos < tr->tlen; npos++, opos--) { statetype[npos] = tr->statetype[opos]; nodeidx[npos] = tr->nodeidx[opos]; pos[npos] = tr->pos[opos]; } /* Swap old, new arrays. */ free(tr->statetype); free(tr->nodeidx); free(tr->pos); tr->statetype = statetype; tr->nodeidx = nodeidx; tr->pos = pos;}/* Function: P7TraceCount() * * Purpose: Count a traceback into a count-based HMM structure. * (Usually as part of a model parameter re-estimation.) * * Args: hmm - counts-based HMM * dsq - digitized sequence that traceback aligns to the HMM (1..L) * wt - weight on the sequence * tr - alignment of seq to HMM * * Return: (void) */voidP7TraceCount(struct plan7_s *hmm, char *dsq, float wt, struct p7trace_s *tr){ int tpos; /* position in tr */ int i; /* symbol position in seq */ for (tpos = 0; tpos < tr->tlen; tpos++) { i = tr->pos[tpos]; /* Emission counts. * Don't bother counting null states N,J,C. */ if (tr->statetype[tpos] == STM) P7CountSymbol(hmm->mat[tr->nodeidx[tpos]], dsq[i], wt); else if (tr->statetype[tpos] == STI) P7CountSymbol(hmm->ins[tr->nodeidx[tpos]], dsq[i], wt); /* State transition counts */ switch (tr->statetype[tpos]) { case STS: break; /* don't bother; P=1 */ case STN: switch (tr->statetype[tpos+1]) { case STB: hmm->xt[XTN][MOVE] += wt; break; case STN: hmm->xt[XTN][LOOP] += wt; break; default: Die("illegal state transition %s->%s in traceback", Statetype(tr->statetype[tpos]), Statetype(tr->statetype[tpos+1])); } break; case STB: switch (tr->statetype[tpos+1]) { case STM: hmm->begin[tr->nodeidx[tpos+1]] += wt; break; case STD: hmm->tbd1 += wt; break; default: Die("illegal state transition %s->%s in traceback", Statetype(tr->statetype[tpos]), Statetype(tr->statetype[tpos+1])); } break; case STM: switch (tr->statetype[tpos+1]) { case STM: hmm->t[tr->nodeidx[tpos]][TMM] += wt; break; case STI: hmm->t[tr->nodeidx[tpos]][TMI] += wt; break; case STD: hmm->t[tr->nodeidx[tpos]][TMD] += wt; break; case STE: hmm->end[tr->nodeidx[tpos]] += wt; break; default: Die("illegal state transition %s->%s in traceback", Statetype(tr->statetype[tpos]), Statetype(tr->statetype[tpos+1])); } break; case STI: switch (tr->statetype[tpos+1]) { case STM: hmm->t[tr->nodeidx[tpos]][TIM] += wt; break; case STI: hmm->t[tr->nodeidx[tpos]][TII] += wt; break; default: Die("illegal state transition %s->%s in traceback", Statetype(tr->statetype[tpos]), Statetype(tr->statetype[tpos+1])); } break; case STD: switch (tr->statetype[tpos+1]) { case STM: hmm->t[tr->nodeidx[tpos]][TDM] += wt; break; case STD: hmm->t[tr->nodeidx[tpos]][TDD] += wt; break; case STE: /* ignore; p(D->E) = 1.0 */ break; default: Die("illegal state transition %s->%s in traceback", Statetype(tr->statetype[tpos]), Statetype(tr->statetype[tpos+1])); } break; case STE: switch (tr->statetype[tpos+1]) { case STC: hmm->xt[XTE][MOVE] += wt; break; case STJ: hmm->xt[XTE][LOOP] += wt; break; default: Die("illegal state transition %s->%s in traceback", Statetype(tr->statetype[tpos]), Statetype(tr->statetype[tpos+1])); } break; case STJ: switch (tr->statetype[tpos+1]) { case STB: hmm->xt[XTJ][MOVE] += wt; break; case STJ: hmm->xt[XTJ][LOOP] += wt; break; default: Die("illegal state transition %s->%s in traceback", Statetype(tr->statetype[tpos]), Statetype(tr->statetype[tpos+1])); } break; case STC: switch (tr->statetype[tpos+1]) { case STT: hmm->xt[XTC][MOVE] += wt; break; case STC: hmm->xt[XTC][LOOP] += wt; break; default: Die("illegal state transition %s->%s in traceback", Statetype(tr->statetype[tpos]), Statetype(tr->statetype[tpos+1])); } break; case STT: break; /* T is the last. It makes no transitions. */ default: Die("illegal state %s in traceback", Statetype(tr->statetype[tpos])); } }}/* Function: P7TraceScore() * * Purpose: Score a traceback and return the score in scaled bits. * * Args: hmm - HMM with valid log odds scores. * dsq - digitized sequence that traceback aligns to the HMM (1..L) * tr - alignment of seq to HMM * * Return: (void) */floatP7TraceScore(struct plan7_s *hmm, char *dsq, struct p7trace_s *tr){ int score; /* total score as a scaled integer */ int tpos; /* position in tr */ int sym; /* digitized symbol in dsq */ /* P7PrintTrace(stdout, tr, hmm, dsq); */ score = 0; for (tpos = 0; tpos < tr->tlen-1; tpos++) { sym = (int) dsq[tr->pos[tpos]]; /* Emissions. * Don't bother counting null states N,J,C. */ if (tr->statetype[tpos] == STM) score += hmm->msc[sym][tr->nodeidx[tpos]]; else if (tr->statetype[tpos] == STI) score += hmm->isc[sym][tr->nodeidx[tpos]]; /* State transitions. */ score += TransitionScoreLookup(hmm, tr->statetype[tpos], tr->nodeidx[tpos], tr->statetype[tpos+1], tr->nodeidx[tpos+1]); } return Scorify(score);}/* Function: P7Traces2Alignment() * * Purpose: Convert an array of traceback structures for a set * of sequences into a new multiple alignment. * * Insertions are put into lower case and * are not aligned; instead, Nterm is right-justified, * Cterm is left-justified, and internal insertions * are split in half and the halves are justified in * each direction (the objective being to increase * the chances of getting insertions aligned well enough * for them to become a match). SAM gap char conventions * are used: - in match columns, . in insert columns * * NOTE: Does not recognize J state. * * Args: dsq - digitized unaligned sequences * sqinfo - array of info about the sequences * wgt - weights on seqs * nseq - number of sequences * mlen - length of model (number of match states) * tr - array of tracebacks * matchonly - TRUE if we don't print insert-generated symbols at all * Return: MSA structure; NULL on failure. * Caller responsible for freeing msa with MSAFree(msa); */ MSA *P7Traces2Alignment(char **dsq, SQINFO *sqinfo, float *wgt, int nseq, int mlen, struct p7trace_s **tr, int matchonly) { MSA *msa; /* RETURN: new alignment */ int idx; /* counter for sequences */ int alen; /* width of alignment */ int *inserts; /* array of max gaps between aligned columns */ int *matmap; /* matmap[k] = apos of match k [1..M] */ int nins; /* counter for inserts */ int apos; /* position in aligned sequence (0..alen-1)*/ int rpos; /* position in raw digital sequence (1..L)*/ int tpos; /* position counter in traceback */ int statetype; /* type of current state, e.g. STM */ int k; /* counter over states in model */ /* Here's the problem. We want to align the match states in columns, * but some sequences have inserted symbols in them; we need some * sort of overall knowledge of where the inserts are and how long * they are in order to create the alignment. * * Here's our trick. inserts[] is a 0..hmm->M array; inserts[i] stores * the maximum number of times insert substate i was used. This * is the maximum number of gaps to insert between canonical * column i and i+1. inserts[0] is the N-term tail; inserts[M] is * the C-term tail. * * Remember that N and C emit on transition, hence the check for an * N->N or C->C transition before bumping nins. */ inserts = (int *) MallocOrDie (sizeof(int) * (mlen+1));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -