⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 structs.h

📁 hmmer源程序
💻 H
📖 第 1 页 / 共 2 页
字号:
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. ************************************************************//* structs.h *  * Data structures used in HMMER. * Also, a few miscellaneous macros and global variable declarations. *  * RCS $Id: structs.h,v 1.22 2001/06/07 17:38:48 eddy Exp $ */#ifndef STRUCTSH_INCLUDED#define STRUCTSH_INCLUDED#include "squid.h"#include "config.h"#include "ssi.h"/* Miscellaneous math macros used in the package */#define sreLOG2(x)  ((x) > 0 ? log(x) * 1.44269504 : -9999.)#define sreEXP2(x)  (exp((x) * 0.69314718 )) #define SQR(x)      ((x) * (x))/* an idiom for determining a symbol's position in the array * by pointer arithmetic. * does no error checking, so caller must already be damned sure x is * valid in the alphabet! */#define SYMIDX(x)   (strchr(Alphabet, (x)) - Alphabet)/* The symbol alphabet. * Must deal with IUPAC degeneracies. Nondegenerate symbols  * come first in Alphabet[], followed by degenerate symbols. * Nucleic alphabet also must deal with other common symbols * like U (in RNA) and X (often misused for N).      * Example:  *   Nucleic: "ACGTUNRYMKSWHBVDX"          size=4  iupac=17 *   Amino:   "ACDEFGHIKLMNPQRSTVWYBZX"    size=20 iupac=23 * * Parts of the code assume that the last symbol is a * symbol for an unknown residue, i.e. 'X'. *  * MAXCODE and MAXABET constants are defined in config.h */   extern char  Alphabet[MAXCODE]; /* "ACDEFGHIKLMNPQRSTVWYBZX" for example */extern int   Alphabet_type;     /* hmmNUCLEIC or hmmAMINO                */extern int   Alphabet_size;     /* uniq alphabet size: 4 or 20           */extern int   Alphabet_iupac;    /* total size of alphabet + IUPAC degen. */extern char  Degenerate[MAXCODE][MAXABET];extern int   DegenCount[MAXCODE];#define hmmNOTSETYET 0#define hmmNUCLEIC   2		/* compatibility with squid's kRNA   */#define hmmAMINO     3		/* compatibility with squid's kAmino *//********************************************************************** * * Plan7  * Implementation of the new Plan7 HMM architecture. * Fully probabilistic even for hmmsw, hmmls, and hmmfs; * No insert->delete or delete->insert transitions; * Improved structure layout. *  * The strategy is to infiltrate plan7 code into HMMER in * an evolutionary rather than revolutionary manner.  * **********************************************************************//* Plan 7 construction strategies. */enum p7_construction {  P7_MAP_CONSTRUCTION,		/* maximum a posteriori architecture */  P7_HAND_CONSTRUCTION,		/* hand specified architecture       */  P7_FAST_CONSTRUCTION		/* fast ad hoc architecture          */};/* Plan 7 parameter optimization strategies */enum p7_param {  P7_MAP_PARAM,			/* standard maximum a posteriori    */  P7_MD_PARAM,			/* maximum discrimination           */  P7_MRE_PARAM,			/* maximum relative entropy         */  P7_WMAP_PARAM			/* ad hoc weighted MAP              */};/* Structure: plan7_s *  * Declaration of a Plan 7 profile-HMM. */struct plan7_s {  /* Annotation on the model. A name is mandatory.   * Other fields are optional; whether they are present is   * flagged in the stateflags bit array.   *    * desc is only valid if PLAN7_DESC is set in flags.   *  acc is only valid if PLAN7_ACC is set in flags.   *   rf is only valid if PLAN7_RF is set in flags.   *   cs is only valid if PLAN7_CS is set in flags.   *   ca is only valid if PLAN7_CA is set in flags.   *  map is only valid if PLAN7_MAP is set in flags.   */  char  *name;                  /* name of the model                    +*/  char  *acc;			/* accession number of model (Pfam)     +*/  char  *desc;                  /* brief description of model           +*/   char  *rf;                    /* reference line from alignment 0..M   +*/  char  *cs;                    /* consensus structure line      0..M   +*/   char  *ca;			/* consensus accessibility line  0..M    */  char  *comlog;		/* command line(s) that built model     +*/  int    nseq;			/* number of training sequences         +*/  char  *ctime;			/* creation date                        +*/  int   *map;			/* map of alignment cols onto model 1..M+*/  int    checksum;              /* checksum of training sequences       +*/  /* The following are annotations added to support work by Michael Asman,    * CGR Stockholm. They are not stored in model files; they are only   * used in model construction.   *    * #=GC X-PRM (PRT,PRI) annotation is picked up by hmmbuild and interpreted   * as specifying which mixture Dirichlet component to use. If these flags   * are non-NULL, the normal mixture Dirichlet code is bypassed, and a   * single specific Dirichlet is used at each position.   */  int   *tpri;                  /* which transition mixture prior to use */   int   *mpri;                  /* which match mixture prior to use */  int   *ipri;                  /* which insert mixture prior to use */  /* Pfam-specific score cutoffs.   *    * ga1, ga2 are valid if PLAN7_GA is set in flags.   * tc1, tc2 are valid if PLAN7_TC is set in flags.   * nc1, nc2 are valid if PLAN7_NC is set in flags.   */  float  ga1, ga2;		/* per-seq/per-domain gathering thresholds (bits) +*/  float  tc1, tc2;		/* per-seq/per-domain trusted cutoff (bits)       +*/  float  nc1, nc2;		/* per-seq/per-domain noise cutoff (bits)         +*/  /* The main model in probability form: data-dependent probabilities.   * This is the core Krogh/Haussler model.   * Transition probabilities are usually accessed as a   *   two-D array: hmm->t[k][TMM], for instance. They are allocated   *   such that they can also be stepped through in 1D by pointer   *   manipulations, for efficiency in DP algorithms.   */  int     M;                    /* length of the model (# nodes)        +*/  float **t;                    /* transition prob's. t[1..M-1][0..6]   +*/  float **mat;                  /* match emissions.  mat[1..M][0..19]   +*/   float **ins;                  /* insert emissions. ins[1..M-1][0..19] +*/  float   tbd1;			/* B->D1 prob (data dependent)          +*/  /* The unique states of Plan 7 in probability form.   * These are the algorithm-dependent, data-independent probabilities.   * Some parts of the code may briefly use a trick of copying tbd1   * into begin[0]; this makes it easy to call FChoose() or FNorm()   * on the resulting vector. However, in general begin[0] is not   * a valid number.   */  float  xt[4][2];              /* N,E,C,J extra states: 2 transitions      +*/  float *begin;                 /* 1..M B->M state transitions              +*/  float *end;                   /* 1..M M->E state transitions (!= a dist!) +*/  /* The null model probabilities.   */  float  null[MAXABET];         /* "random sequence" emission prob's     +*/  float  p1;                    /* null model loop probability           +*/  /* The model in log-odds score form.   * These are created from the probabilities by LogoddsifyHMM().   * By definition, null[] emission scores are all zero.   * Note that emission distributions are over 26 upper-case letters,   * not just the unambiguous protein or DNA alphabet: we   * precalculate the scores for all IUPAC degenerate symbols we   * may see. Non-IUPAC symbols simply have a -INFTY score.   * Note the reversed indexing on msc and isc -- for efficiency reasons.   *    * Only valid if PLAN7_HASBITS is set.   */  int  **tsc;                   /* transition scores     [1.M-1][0.6]       -*/  int  **msc;                   /* match emission scores [0.MAXCODE-1][1.M] -*/  int  **isc;                   /* ins emission scores [0.MAXCODE-1][1.M-1] -*/  int    xsc[4][2];             /* N,E,C,J transitions                      -*/  int   *bsc;                   /* begin transitions     [1.M]              -*/  int   *esc;			/* end transitions       [1.M]              -*/  /* DNA translation scoring parameters   * For aligning protein Plan7 models to DNA sequence.   * Lookup value for a codon is calculated by pos1 * 16 + pos2 * 4 + pos3,   * where 'pos1' is the digitized value of the first nucleotide position;   * if any of the positions are ambiguous codes, lookup value 64 is used   * (which will generally have a score of zero)   *    * Only valid if PLAN7_HASDNA is set.   */  int  **dnam;                  /* triplet match scores  [0.64][1.M]       -*/  int  **dnai;                  /* triplet insert scores [0.64][1.M]       -*/  int    dna2;			/* -1 frameshift, doublet emission, M or I -*/  int    dna4;			/* +1 frameshift, doublet emission, M or I -*/  /* P-value and E-value statistical parameters   * Only valid if PLAN7_STATS is set.   */  float  mu;			/* EVD mu       +*/  float  lambda;		/* EVD lambda   +*/  int flags;                    /* bit flags indicating state of HMM, valid data +*/};/* Flags for plan7->flags. * Note: Some models have scores but no probabilities (for instance, *       after reading from an HMM save file). Other models have *       probabilities but no scores (for instance, during training *       or building). Since it costs time to convert either way, *       I use PLAN7_HASBITS and PLAN7_HASPROB flags to defer conversion *       until absolutely necessary. This means I have to be careful *       about keeping these flags set properly when I fiddle a model.  */#define PLAN7_HASBITS (1<<0)    /* raised if model has log-odds scores      */#define PLAN7_DESC    (1<<1)    /* raised if description exists             */#define PLAN7_RF      (1<<2)    /* raised if #RF annotation available       */#define PLAN7_CS      (1<<3)    /* raised if #CS annotation available       */#define PLAN7_XRAY    (1<<4)    /* raised if structural data available      */#define PLAN7_HASPROB (1<<5)    /* raised if model has probabilities        */#define PLAN7_HASDNA  (1<<6)	/* raised if protein HMM->DNA seq params set*/#define PLAN7_STATS   (1<<7)	/* raised if EVD parameters are available   */#define PLAN7_MAP     (1<<8)	/* raised if alignment map is available     */#define PLAN7_ACC     (1<<9)	/* raised if accession number is available  */#define PLAN7_GA      (1<<10)	/* raised if gathering thresholds available */#define PLAN7_TC      (1<<11)	/* raised if trusted cutoffs available      */#define PLAN7_NC      (1<<12)	/* raised if noise cutoffs available        */#define PLAN7_CA      (1<<13)   /* raised if surface accessibility avail.   *//* Indices for special state types, I: used for dynamic programming xmx[][] * mnemonic: eXtra Matrix for B state = XMB */#define XMB 0#define XME 1#define XMC 2#define XMJ 3#define XMN 4/* Indices for special state types, II: used for hmm->xt[] indexing * mnemonic: eXtra Transition for N state = XTN */#define XTN  0#define XTE  1#define XTC  2#define XTJ  3/* Indices for Plan7 main model state transitions. * Used for indexing hmm->t[k][] * mnemonic: Transition from Match to Match = TMM */#define TMM  0#define TMI  1#define TMD  2#define TIM  3#define TII  4#define TDM  5#define TDD  6 /* Indices for extra state transitions * Used for indexing hmm->xt[][]. */#define MOVE 0          /* trNB, trEC, trCT, trJB */#define LOOP 1          /* trNN, trEJ, trCC, trJJ *//* Declaration of Plan7 dynamic programming matrix structure. */struct dpmatrix_s {  int **xmx;			/* special scores [0.1..N][BECJN]     */  int **mmx;			/* match scores [0.1..N][0.1..M]      */  int **imx;			/* insert scores [0.1..N][0.1..M-1.M] */  int **dmx;			/* delete scores [0.1..N][0.1..M-1.M] */};/* Declaration of Plan7 shadow matrix structure. * In general, allowed values are STM, STI, etc.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -