⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ssi.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 4 页
字号:
/***************************************************************** * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. *****************************************************************/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <sys/stat.h>#include <sys/types.h>#include <unistd.h>#include "squid.h"#include "ssi.h"static sqd_uint32 v20magic = 0xf3f3e9b1; /* SSI 1.0: "ssi1" + 0x80808080 */static sqd_uint32 v20swap  = 0xb1e9f3f3; /* byteswapped */static int read_i16(FILE *fp, sqd_uint16 *ret_result);static int read_i32(FILE *fp, sqd_uint32 *ret_result);static int read_i64(FILE *fp, sqd_uint64 *ret_result);static int read_offset(FILE *fp, char mode, SSIOFFSET *ret_offset);static int write_i16(FILE *fp, sqd_uint16 n);static int write_i32(FILE *fp, sqd_uint32 n);static int write_i64(FILE *fp, sqd_uint64 n);static int write_offset(FILE *fp, SSIOFFSET *offset);static int binary_search(SSIFILE *sfp, char *key, int klen, SSIOFFSET *base, 			 sqd_uint32 recsize, sqd_uint32 maxidx);static int indexfile_position(SSIFILE *sfp, SSIOFFSET *base, sqd_uint32 len,			      sqd_uint32 n);static void clear_ssifile(SSIFILE *sfp);static int  write_index(FILE *fp, SSIINDEX *g);static int  write_index_chunk(SSIINDEX *g);static sqd_uint64 current_chunk_size(SSIINDEX *g);static int load_indexfile(SSIFILE *sfp);/* Function: SSIOpen() * Date:     SRE, Sun Dec 31 12:40:03 2000 [St. Louis] * * Purpose:  Opens the SSI index file {filename} and returns *           a SSIFILE * stream thru {ret_sfp}. *           The caller must eventually close this stream using *           SSIClose(). More than one index file can be open *           at once. * * Args:     filename - full path to a SSI index file * * Returns:  Returns 0 on success, nonzero on failure. */intSSIOpen(char *filename, SSIFILE **ret_sfp){  SSIFILE  *sfp = NULL;  int       status;  if ((sfp = malloc(sizeof(SSIFILE))) == NULL)   return SSI_ERR_MALLOC;  if ((sfp->fp = fopen(filename, "rb")) == NULL) return SSI_ERR_NOFILE;      status = load_indexfile(sfp);  *ret_sfp = sfp;  return status;}/* load_indexfile(): given a SSIFILE structure with an open and positioned  *    stream (fp) -- but no other data loaded -- read the next SSIFILE *    in from disk. We use this routine without its SSIOpen() wrapper *    as part of the external mergesort when creating large indices. */static intload_indexfile(SSIFILE *sfp){  sqd_uint32   magic;  sqd_uint16   i;		/* counter over files */  int          status;		/* overall return status if an error is thrown */  status = SSI_ERR_BADFORMAT; /* default: almost every kind of error is a bad format error */  sfp->filename   = NULL;  sfp->fileformat = NULL;  sfp->fileflags  = NULL;  sfp->bpl        = NULL;  sfp->rpl        = NULL;  sfp->nfiles     = 0;            if (! read_i32(sfp->fp, &magic))               {status = SSI_ERR_BADMAGIC;  goto FAILURE; }  if (magic != v20magic && magic != v20swap)     {status = SSI_ERR_BADMAGIC;  goto FAILURE; }  if (! read_i32(sfp->fp, &(sfp->flags))) goto FAILURE;   /* If we have 64-bit offsets, make sure we can deal with them.   */#ifndef HAS_64BIT_FILE_OFFSETS    if ((sfp->flags & SSI_USE64_INDEX) ||      (sfp->flags & SSI_USE64))    { status = SSI_ERR_NO64BIT; goto FAILURE; }#endif  sfp->imode = (sfp->flags & SSI_USE64_INDEX) ? SSI_OFFSET_I64 : SSI_OFFSET_I32;  sfp->smode = (sfp->flags & SSI_USE64) ?       SSI_OFFSET_I64 : SSI_OFFSET_I32;  if (! read_i16(sfp->fp, &(sfp->nfiles)))     goto FAILURE;  if (! read_i32(sfp->fp, &(sfp->nprimary)))   goto FAILURE;  if (! read_i32(sfp->fp, &(sfp->nsecondary))) goto FAILURE;  if (! read_i32(sfp->fp, &(sfp->flen)))       goto FAILURE;  if (! read_i32(sfp->fp, &(sfp->plen)))       goto FAILURE;  if (! read_i32(sfp->fp, &(sfp->slen)))       goto FAILURE;  if (! read_i32(sfp->fp, &(sfp->frecsize)))   goto FAILURE;  if (! read_i32(sfp->fp, &(sfp->precsize)))   goto FAILURE;  if (! read_i32(sfp->fp, &(sfp->srecsize)))   goto FAILURE;    if (! read_offset(sfp->fp, sfp->imode, &(sfp->foffset))) goto FAILURE;  if (! read_offset(sfp->fp, sfp->imode, &(sfp->poffset))) goto FAILURE;  if (! read_offset(sfp->fp, sfp->imode, &(sfp->soffset))) goto FAILURE;  /* Read the file information and keep it.   * We expect the number of files to be small, so reading it   * once should be advantageous overall. If SSI ever had to   * deal with large numbers of files, you'd probably want to   * read file information on demand.   */  if (sfp->nfiles == 0)                                                   goto FAILURE;  if ((sfp->filename=malloc(sizeof(char *)    *sfp->nfiles)) == NULL)   {status = SSI_ERR_MALLOC; goto FAILURE; }  for (i = 0; i < sfp->nfiles; i++) sfp->filename[i] = NULL;   if ((sfp->fileformat=malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; }  if ((sfp->fileflags =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; }  if ((sfp->bpl     =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL)   {status = SSI_ERR_MALLOC; goto FAILURE; }  if ((sfp->rpl     =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL)   {status = SSI_ERR_MALLOC; goto FAILURE; }  for (i = 0; i < sfp->nfiles; i++)     {      /* We have to explicitly position, because header and file        * records may expand in the future; frecsize and foffset        * give us forwards compatibility.        */       if (indexfile_position(sfp, &(sfp->foffset), sfp->frecsize, i) !=0)  goto FAILURE;      if ((sfp->filename[i] =malloc(sizeof(char)*sfp->flen)) == NULL)        {status = SSI_ERR_MALLOC; goto FAILURE; }      if (fread(sfp->filename[i],sizeof(char),sfp->flen, sfp->fp)!=sfp->flen) goto FAILURE;      if (! read_i32(sfp->fp, &(sfp->fileformat[i])))                             goto FAILURE;      if (! read_i32(sfp->fp, &(sfp->fileflags[i])))                              goto FAILURE;      if (! read_i32(sfp->fp, &(sfp->bpl[i])))                                    goto FAILURE;      if (! read_i32(sfp->fp, &(sfp->rpl[i])))                                    goto FAILURE;    }    /* Success. Return 0.   */  return 0;			 FAILURE:  /* Failure: free the damaged structure, return status code.   */  SSIClose(sfp);  return status;}/* Function: SSIGetOffsetByName() * Date:     SRE, Sun Dec 31 13:55:31 2000 [St. Louis] * * Purpose:  Looks up the string {key} in the open index {sfp}. *           {key} can be either a primary or secondary key. If {key} *           is found, {*ret_fh} contains a unique handle on *           the file that contains {key} (suitable for an SSIFileInfo() *           call, or for comparison to the handle of the last file *           that was opened for retrieval), and {offset} is filled  *           in with the offset in that file. *            * Args:     sfp         - open index file *           key         - string to search for *           ret_fh      - RETURN: handle on file that key is in *           ret_offset  - RETURN: offset of the start of that key's record * * Returns:  0 on success. *           non-zero on error. */intSSIGetOffsetByName(SSIFILE *sfp, char *key, int *ret_fh,		   SSIOFFSET *ret_offset){  int         status;  sqd_uint16  fnum;  /* Look in the primary keys.   */  status = binary_search(sfp, key, sfp->plen, &(sfp->poffset), sfp->precsize,			 sfp->nprimary);  if (status == 0) {		    /* We found it as a primary key; get our data & return.     */    if (! read_i16(sfp->fp, &fnum)) return SSI_ERR_NODATA;    *ret_fh = (int) fnum;    if (! read_offset(sfp->fp, sfp->smode, ret_offset))  return SSI_ERR_NODATA;    return 0;	/* success! (we don't need the other key data) */  } else if (status == SSI_ERR_NO_SUCH_KEY) {    /* Not in the primary keys? OK, try the secondary keys.     */    if (sfp->nsecondary > 0) {      char *pkey;      status = binary_search(sfp, key, sfp->slen, &(sfp->soffset), sfp->srecsize,			     sfp->nsecondary);      if (status != 0) return status;      if ((pkey = malloc(sizeof(char) * sfp->plen)) == NULL) return SSI_ERR_MALLOC;      if (fread(pkey, sizeof(char), sfp->plen, sfp->fp) != sfp->plen) return SSI_ERR_NODATA;      status = SSIGetOffsetByName(sfp, pkey, ret_fh, ret_offset);      free(pkey);    }    return status;  } else return status;		  /*NOTREACHED*/}/* Function: SSIGetOffsetByNumber() * Date:     SRE, Mon Jan  1 19:42:42 2001 [St. Louis] * * Purpose:  Looks up primary key #{n} in the open index {sfp}. *           {n} ranges from 0..nprimary-1. When key #{n}  *           is found, {*ret_fh} contains a unique  *           handle on the file that contains {key} (suitable *           for an SSIFileInfo() call, or for comparison to  *           the handle of the last file that was opened for retrieval), *           and {offset} is filled in with the offset in that file. *            * Args:     sfp        - open index file *           n          - primary key number to retrieve. *           ret_fh     - RETURN: handle on file that key is in *           ret_offset - RETURN: offset of the start of that key's record * * Returns:  0 on success. *           non-zero on error. */intSSIGetOffsetByNumber(SSIFILE *sfp, int n, int *ret_fh, SSIOFFSET *ret_offset){  sqd_uint16 fnum;  char      *pkey;  if (n >= sfp->nprimary) return SSI_ERR_NO_SUCH_KEY;  if (indexfile_position(sfp, &(sfp->poffset), sfp->precsize, n) != 0)     return SSI_ERR_SEEK_FAILED;  if ((pkey = malloc(sizeof(char) * sfp->plen)) == NULL) return SSI_ERR_MALLOC;  if (fread(pkey, sizeof(char), sfp->plen, sfp->fp) != sfp->plen) return SSI_ERR_NODATA;  if (! read_i16(sfp->fp, &fnum))                      return SSI_ERR_NODATA;  if (! read_offset(sfp->fp, sfp->smode, ret_offset))  return SSI_ERR_NODATA;    *ret_fh = fnum;  free(pkey);  return 0;}/* Function: SSIGetSubseqOffset() * Date:     SRE, Mon Jan  1 19:49:31 2001 [St. Louis] * * Purpose:  Implements SSI_FAST_SUBSEQ. *  *           Looks up a primary or secondary {key} in the open *           index {sfp}. Asks for the nearest offset to a *           subsequence starting at position {requested_start} *           in the sequence (numbering the sequence 1..L).  *           If {key} is found, on return, {ret_fh} *           contains a unique handle on the file that contains  *           {key} (suitable for an SSIFileInfo() call, or for  *           comparison to the handle of the last file that was  *           opened for retrieval); {record_offset} contains the *           disk offset to the start of the record; {data_offset} *           contains the disk offset either exactly at the requested *           residue, or at the start of the line containing the *           requested residue; {ret_actual_start} contains the  *           coordinate (1..L) of the first valid residue at or *           after {data_offset}. {ret_actual_start} is <=  *           {requested_start}.  * * Args:     sfp             - open index file *           key             - primary or secondary key to find *           requested_start - residue we'd like to start at (1..L) *           ret_fh          - RETURN: handle for file the key is in *           record_offset   - RETURN: offset of entire record *           data_offset     - RETURN: offset of subseq (see above) *           ret_actual_start- RETURN: coord (1..L) of residue at data_offset * * Returns:  0 on success, non-zero on failure. */intSSIGetSubseqOffset(SSIFILE *sfp, char *key, int requested_start,		    int *ret_fh, SSIOFFSET *record_offset,		    SSIOFFSET *data_offset, int *ret_actual_start){  int        status;  sqd_uint32 len;  int        r, b, i, l;	/* tmp variables for "clarity", to match docs */    /* Look up the key. Rely on the fact that SSIGetOffsetByName()   * leaves the index file positioned at the rest of the data for this key.   */  status = SSIGetOffsetByName(sfp, key, ret_fh, record_offset);  if (status != 0) return status;  /* Check that we're allowed to do subseq lookup on that file.   */  if (! (sfp->fileflags[*ret_fh] & SSI_FAST_SUBSEQ))    return SSI_ERR_NO_SUBSEQS;  /* Read the data we need for subseq lookup   */  if (! read_offset(sfp->fp, sfp->smode, data_offset)) return SSI_ERR_NODATA;  if (! read_i32(sfp->fp, &len))                         return SSI_ERR_NODATA;  /* Set up tmp variables for clarity of equations below,   * and to make them match documentation (ssi-format.tex).   */  r = sfp->rpl[*ret_fh];    /* residues per line */  b = sfp->bpl[*ret_fh];    /* bytes per line    */  i = requested_start;	    /* start position 1..L */  l = (i-1)/r;		    /* data line # (0..) that the residue is on */  if (r == 0 || b == 0) return SSI_ERR_NO_SUBSEQS;  if (i < 0 || i > len) return SSI_ERR_RANGE;    /* When b = r+1, there's nothing but sequence on each data line (and the \0),   * and we can find each residue precisely.   */  if (b == r+1) {    if (sfp->smode == SSI_OFFSET_I32) {      data_offset->mode    = SSI_OFFSET_I32;      data_offset->off.i32 = data_offset->off.i32 + l*b + (i-1)%r;    } else if (sfp->smode == SSI_OFFSET_I64) {      data_offset->mode    = SSI_OFFSET_I64;      data_offset->off.i64 = data_offset->off.i64 + l*b + (i-1)%r;    }     *ret_actual_start = requested_start;  } else {     /* else, there's other stuff on seq lines, so the best     * we can do easily is to position at start of relevant line.     */    if (sfp->smode == SSI_OFFSET_I32) {      data_offset->mode    = SSI_OFFSET_I32;      data_offset->off.i32 = data_offset->off.i32 + l*b;    } else if (sfp->smode == SSI_OFFSET_I64) {      data_offset->mode    = SSI_OFFSET_I64;      data_offset->off.i64 = data_offset->off.i64 + l*b;    }     /* yes, the eq below is = 1 + (i-1)/r*r but it's not = i. that's an integer /. */    *ret_actual_start = 1 + l*r;  }  return 0;}/* Function: SSISetFilePosition() * Date:     SRE, Tue Jan  2 09:13:46 2001 [St. Louis] * * Purpose:  Uses {offset} to sets the file position for {fp}, usually an *           open sequence file, relative to the start of the file. *           Hides the details of system-dependent shenanigans necessary for *           file positioning in large (>2 GB) files.  *            *           Behaves just like fseek(fp, offset, SEEK_SET) for 32 bit *           offsets and <2 GB files. *            *           Warning: if all else fails, in desperation, it will try to *           use fsetpos(). This requires making assumptions about fpos_t *           that may be unwarranted... assumptions that ANSI C prohibits *           me from making... though I believe the ./configure *           script robustly tests whether I can play with fpos_t like this. * * Args:     fp      - file to position. *           offset  - SSI offset relative to file start. *                  * Returns:  0 on success, nonzero on error. */intSSISetFilePosition(FILE *fp, SSIOFFSET *offset){  if (offset->mode == SSI_OFFSET_I32) {    if (fseek(fp, offset->off.i32, SEEK_SET) != 0)       return SSI_ERR_SEEK_FAILED;  }#ifndef HAS_64BIT_FILE_OFFSETS  else return SSI_ERR_NO64BIT;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -