⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hmmio.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 4 页
字号:
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. ************************************************************//* hmmio.c *  * Input/output of HMMs. * * As of HMMER 2.0, HMMs are saved by default in a tabular ASCII format * as log-odds or log probabilities scaled to an integer. A binary save * file format is also available which is faster to access (a * consideration which might be important for HMM library applications). * HMMs can be concatenated into HMM libraries. *  * A comment on loss of accuracy. Storing a number as a scaled log * probability guarantees us an error of about 0.035% or * less in the retrieved probability. We are relatively invulnerable * to the truncation errors which HMMER 1.8 was vulnerable to.   *  * Magic numbers (both for the ASCII and binary save formats) are used  * to label save files with a major version number. This simplifies the task of * backwards compatibility as new versions of the program are created.  * Reverse but not forward compatibility is guaranteed. I.e. HMMER 2.0 * can read `1.7' save files, but not vice versa. Note that the major * version number in the save files is NOT the version of the software * that generated it; rather, the number of the last major version in which * save format changed. *  ******************************************************************  *  * The HMM input API: *  *       HMMFILE        *hmmfp; *       char           *hmmfile; *       struct plan7_s *hmm; *       char            env[] = "HMMERDB";  (a la BLASTDB)  * *       hmmfp = HMMFileOpen(hmmfile, env)   NULL on failure *       while (HMMFileRead(hmmfp, &hmm))    0 if no more HMMs *          if (hmm == NULL) Die();          NULL on file parse failure *          whatever; *          FreeHMM(hmm); *       } *       HMMFileClose(hmmfp); *        ***************************************************************** * * The HMM output API: *  *       FILE           *ofp; *       struct plan7_s *hmm; *        *       WriteAscHMM(ofp, hmm);    to write/append an HMM to open file *   or  WriteBinHMM(ofp, hmm);    to write/append binary format HMM to open file *  *****************************************************************  *  * V1.0: original implementation * V1.1: regularizers removed from model structure * V1.7: ref and cs annotation lines added from alignment, one  *       char per match state 1..M * V1.9: null model and name added to HMM structure. ASCII format changed *       to compact tabular one. * V2.0: Plan7. Essentially complete rewrite. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <time.h>#include <unistd.h> /* to get SEEK_CUR definition on silly Suns */#include "squid.h"#include "config.h"#include "structs.h"#include "funcs.h"#include "version.h"#include "ssi.h"/* Magic numbers identifying binary formats. * Do not change the old magics! Necessary for backwards compatibility. */static unsigned int  v10magic = 0xe8ededb1; /* v1.0 binary: "hmm1" + 0x80808080 */static unsigned int  v10swap  = 0xb1edede8; /* byteswapped v1.0                 */static unsigned int  v11magic = 0xe8ededb2; /* v1.1 binary: "hmm2" + 0x80808080 */static unsigned int  v11swap  = 0xb2edede8; /* byteswapped v1.1                 */static unsigned int  v17magic = 0xe8ededb3; /* v1.7 binary: "hmm3" + 0x80808080 */static unsigned int  v17swap  = 0xb3edede8; /* byteswapped v1.7                 */static unsigned int  v19magic = 0xe8ededb4; /* V1.9 binary: "hmm4" + 0x80808080 */static unsigned int  v19swap  = 0xb4edede8; /* V1.9 binary, byteswapped         */ static unsigned int  v20magic = 0xe8ededb5; /* V2.0 binary: "hmm5" + 0x80808080 */static unsigned int  v20swap  = 0xb5edede8; /* V2.0 binary, byteswapped         *//* Old HMMER 1.x file formats. */#define HMMER1_0B  1            /* binary HMMER 1.0     */#define HMMER1_0F  2            /* flat ascii HMMER 1.0 */#define HMMER1_1B  3            /* binary HMMER 1.1     */#define HMMER1_1F  4            /* flat ascii HMMER 1.1 */#define HMMER1_7B  5            /* binary HMMER 1.7     */#define HMMER1_7F  6            /* flat ascii HMMER 1.7 */#define HMMER1_9B  7            /* HMMER 1.9 binary     */#define HMMER1_9F  8            /* HMMER 1.9 flat ascii */static int  read_asc20hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm);static int  read_bin20hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm);static int  read_asc19hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm);static int  read_bin19hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm);static int  read_asc17hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm);static int  read_bin17hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm);static int  read_asc11hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm);static int  read_bin11hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm);static int  read_asc10hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm);static int  read_bin10hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm);static void  byteswap(char *swap, int nbytes);static char *prob2ascii(float p, float null);static float ascii2prob(char *s, float null);static void  write_bin_string(FILE *fp, char *s);static int   read_bin_string(FILE *fp, int doswap, char **ret_s);static void  multiline(FILE *fp, char *pfx, char *s);static struct plan9_s *read_plan9_binhmm(FILE *fp, int version, int swapped);static struct plan9_s *read_plan9_aschmm(FILE *fp, int version);/***************************************************************** * HMM input API functions: *   HMMFileOpen() *   HMMFileRead() *   HMMFileClose() *   HMMFileRewind() *****************************************************************/   /* Function: HMMFileOpen() *  * Purpose:  Open an HMM file for reading. The file may be either *           an index for a library of HMMs, or an HMM.  *            * Args:     hmmfile - name of file *           env     - NULL, or environment variable for HMM database. *            * Return:   Valid HMMFILE *, or NULL on failure. */HMMFILE * HMMFileOpen(char *hmmfile, char *env){  HMMFILE     *hmmfp;  unsigned int magic;  char         buf[512];  char        *ssifile;  char        *dir;        /* dir name in which HMM file was found */  int          status;  hmmfp = (HMMFILE *) MallocOrDie (sizeof(HMMFILE));  hmmfp->f          = NULL;   hmmfp->parser     = NULL;  hmmfp->is_binary  = FALSE;  hmmfp->byteswap   = FALSE;  hmmfp->is_seekable= TRUE;	/* always; right now, an HMM must always be in a file. */    /* Open the file. Look in current directory.   * If that doesn't work, check environment var for   * a second possible directory (usually the location   * of a system-wide HMM library).   * Using dir name if necessary, construct correct SSI file name.   */  hmmfp->f   = NULL;  hmmfp->ssi = NULL;  if ((hmmfp->f = fopen(hmmfile, "r")) != NULL)    {      ssifile = MallocOrDie(sizeof(char) * (strlen(hmmfile) + 5));      sprintf(ssifile, "%s.ssi", hmmfile);      if ((hmmfp->mode = SSIRecommendMode(hmmfile)) == -1)	Die("SSIRecommendMode() failed");    }  else if ((hmmfp->f = EnvFileOpen(hmmfile, env, &dir)) != NULL)    {      char *full;      full    = FileConcat(dir, hmmfile);      ssifile = MallocOrDie(sizeof(char) * (strlen(full) + strlen(hmmfile) + 5));      sprintf(ssifile, "%s.ssi", full);      if ((hmmfp->mode = SSIRecommendMode(full)) == -1)	Die("SSIRecommendMode() failed");      free(full);      free(dir);    }  else return NULL;    /* Open the SSI index file. If it doesn't exist, or it's corrupt, or    * some error happens, hmmfp->ssi stays NULL.   */  SQD_DPRINTF1(("Opening ssifile %s...\n", ssifile));  SSIOpen(ssifile, &(hmmfp->ssi));  free(ssifile);  /* Initialize the disk offset stuff.   */  status = SSIGetFilePosition(hmmfp->f, hmmfp->mode, &(hmmfp->offset));  if (status != 0) Die("SSIGetFilePosition() failed");  /* Check for binary or byteswapped binary format   * by peeking at first 4 bytes.   */   if (! fread((char *) &magic, sizeof(unsigned int), 1, hmmfp->f)) {    HMMFileClose(hmmfp);    return NULL;  }  rewind(hmmfp->f);  if (magic == v20magic) {     hmmfp->parser    = read_bin20hmm;    hmmfp->is_binary = TRUE;    return hmmfp;  }   else if (magic == v20swap) {     SQD_DPRINTF1(("Opened a HMMER 2.0 binary file [byteswapped]\n"));    hmmfp->parser    = read_bin20hmm;    hmmfp->is_binary = TRUE;    hmmfp->byteswap  = TRUE;    return hmmfp;  }  else if (magic == v19magic) {    hmmfp->parser    = read_bin19hmm;    hmmfp->is_binary = TRUE;    return hmmfp;  }  else if (magic == v19swap) {    hmmfp->parser    = read_bin19hmm;    hmmfp->is_binary = TRUE;    hmmfp->byteswap  = TRUE;    return hmmfp;  }  else if (magic == v17magic) {    hmmfp->parser    = read_bin17hmm;    hmmfp->is_binary = TRUE;    return hmmfp;  }  else if (magic == v17swap) {    hmmfp->parser    = read_bin17hmm;    hmmfp->is_binary = TRUE;    hmmfp->byteswap  = TRUE;    return hmmfp;  }     else if (magic == v11magic) {    hmmfp->parser    = read_bin11hmm;    hmmfp->is_binary = TRUE;    return hmmfp;  }  else if (magic == v11swap) {    hmmfp->parser    = read_bin11hmm;    hmmfp->is_binary = TRUE;    hmmfp->byteswap  = TRUE;    return hmmfp;  }  else if (magic == v10magic) {    hmmfp->parser    = read_bin10hmm;    hmmfp->is_binary = TRUE;    return hmmfp;  }  else if (magic == v10swap) {    hmmfp->parser    = read_bin10hmm;    hmmfp->is_binary = TRUE;    hmmfp->byteswap  = TRUE;    return hmmfp;  }  /* else we fall thru; it may be an ASCII file. */  /* If magic looks binary but we don't recognize it, choke and die.   */  if (magic & 0x80000000) {    Warn("\%s appears to be a binary but format is not recognized\n\It may be from a HMMER version more recent than yours,\n\or may be a different kind of binary altogether.\n", hmmfile);    HMMFileClose(hmmfp);    return NULL;  }  /* Check for ASCII format by peeking at first word.   */  if (fgets(buf, 512, hmmfp->f) == NULL) {    HMMFileClose(hmmfp);    return NULL;  }  rewind(hmmfp->f);    if        (strncmp("HMMER2.0", buf, 8) == 0) {    hmmfp->parser = read_asc20hmm;    return hmmfp;  } else if (strncmp("HMMER v1.9", buf, 10) == 0) {    hmmfp->parser = read_asc19hmm;    return hmmfp;  } else if (strncmp("# HMM v1.7", buf, 10) == 0) {    hmmfp->parser = read_asc17hmm;    return hmmfp;  } else if (strncmp("# HMM v1.1", buf, 10) == 0) {    hmmfp->parser = read_asc11hmm;    return hmmfp;  } else if (strncmp("# HMM v1.0", buf, 10) == 0) {    hmmfp->parser = read_asc10hmm;    return hmmfp;  }     /* If we haven't recognized it yet, it's bogus.   */  HMMFileClose(hmmfp);  return NULL;}intHMMFileRead(HMMFILE *hmmfp, struct plan7_s **ret_hmm){  int status;				/* Set the disk position marker. */  if (hmmfp->is_seekable) {    status = SSIGetFilePosition(hmmfp->f, hmmfp->mode, &(hmmfp->offset));    if (status != 0) Die("SSIGetFilePosition() failed");  }				/* Parse the HMM and return it. */  return (*hmmfp->parser)(hmmfp, ret_hmm);}voidHMMFileClose(HMMFILE *hmmfp){  if (hmmfp->f   != NULL)  fclose(hmmfp->f);        if (hmmfp->ssi != NULL)  SSIClose(hmmfp->ssi);  free(hmmfp);}void HMMFileRewind(HMMFILE *hmmfp){  rewind(hmmfp->f);}intHMMFilePositionByName(HMMFILE *hmmfp, char *name){	  SSIOFFSET  offset;		/* offset in hmmfile, from SSI */  int        fh;		/* ignored.                    */  if (hmmfp->ssi == NULL) return 0;  if (SSIGetOffsetByName(hmmfp->ssi, name, &fh, &offset) != 0) return 0;  if (SSISetFilePosition(hmmfp->f, &offset) != 0) return 0;  return 1;}int HMMFilePositionByIndex(HMMFILE *hmmfp, int idx){				/* idx runs from 0..nhmm-1 */  int        fh;		/* file handle is ignored; only one HMM file */  SSIOFFSET  offset;		/* file position of HMM */  if (hmmfp->ssi == NULL) return 0;  if (SSIGetOffsetByNumber(hmmfp->ssi, idx, &fh, &offset) != 0) return 0;  if (SSISetFilePosition(hmmfp->f, &offset) != 0) return 0;  return 1;}/***************************************************************** * HMM output API: *    WriteAscHMM() *    WriteBinHMM() *  *****************************************************************/ /* Function: WriteAscHMM() *  * Purpose:  Save an HMM in flat text ASCII format. * * Args:     fp        - open file for writing *           hmm       - HMM to save */voidWriteAscHMM(FILE *fp, struct plan7_s *hmm){  int k;                        /* counter for nodes             */  int x;                        /* counter for symbols           */  int ts;			/* counter for state transitions */  fprintf(fp, "HMMER2.0  [%s]\n", RELEASE);  /* magic header */  /* write header information   */  fprintf(fp, "NAME  %s\n", hmm->name);  if (hmm->flags & PLAN7_ACC)    fprintf(fp, "ACC   %s\n", hmm->acc);  if (hmm->flags & PLAN7_DESC)     fprintf(fp, "DESC  %s\n", hmm->desc);  fprintf(fp, "LENG  %d\n", hmm->M);  fprintf(fp, "ALPH  %s\n",   	  (Alphabet_type == hmmAMINO) ? "Amino":"Nucleic");  fprintf(fp, "RF    %s\n", (hmm->flags & PLAN7_RF)  ? "yes" : "no");  fprintf(fp, "CS    %s\n", (hmm->flags & PLAN7_CS)  ? "yes" : "no");  fprintf(fp, "MAP   %s\n", (hmm->flags & PLAN7_MAP) ? "yes" : "no");  multiline(fp, "COM   ", hmm->comlog);  fprintf(fp, "NSEQ  %d\n", hmm->nseq);  fprintf(fp, "DATE  %s\n", hmm->ctime);   fprintf(fp, "CKSUM %d\n", hmm->checksum);  if (hmm->flags & PLAN7_GA)    fprintf(fp, "GA    %.1f %.1f\n", hmm->ga1, hmm->ga2);  if (hmm->flags & PLAN7_TC)    fprintf(fp, "TC    %.1f %.1f\n", hmm->tc1, hmm->tc2);  if (hmm->flags & PLAN7_NC)    fprintf(fp, "NC    %.1f %.1f\n", hmm->nc1, hmm->nc2);  /* Specials   */  fputs("XT     ", fp);  for (k = 0; k < 4; k++)    for (x = 0; x < 2; x++)      fprintf(fp, "%6s ", prob2ascii(hmm->xt[k][x], 1.0));  fputs("\n", fp);  /* Save the null model first, so HMM readers can decode   * log odds scores on the fly. Save as log odds probabilities   * relative to 1/Alphabet_size (flat distribution)   */  fprintf(fp, "NULT  ");  fprintf(fp, "%6s ", prob2ascii(hmm->p1, 1.0)); /* p1 */  fprintf(fp, "%6s\n", prob2ascii(1.0-hmm->p1, 1.0));   /* p2 */  fputs("NULE  ", fp);  for (x = 0; x < Alphabet_size; x++)    fprintf(fp, "%6s ", prob2ascii(hmm->null[x], 1/(float)(Alphabet_size)));  fputs("\n", fp);  /* EVD statistics   */  if (hmm->flags & PLAN7_STATS)     fprintf(fp, "EVD   %10f %10f\n", hmm->mu, hmm->lambda);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -