📄 squid.h.in
字号:
/* @configure_input@ *//***************************************************************** * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved * * This source code is distributed under the terms of the * GNU General Public License. See the files COPYING and LICENSE * for details. *****************************************************************/#ifndef SQUIDH_INCLUDED#define SQUIDH_INCLUDED/* squid.h * Header file for my library of sequence functions. * * CVS $Id: squid.h.in,v 1.3 2001/02/21 21:09:10 eddy Exp $ */#include <stdio.h>#include <math.h>#include <stdlib.h>#include <unistd.h> /* for sysconf() #define's */#if DEBUGLEVEL > 0#include <assert.h> /* for SQD_DASSERT1(), etc. */#endif#include "squidconf.h" /* #define's generated by ./configure script *//***************************************************************** * Integers of guaranteed size. (used for instance in gsi.c, gsi2.c) * These are set by the ./configure script; if they show up as FIXME, * they must be manually edited to appropriate type definitions. You * do need 64-bit integers in the current code; email me if this * prevents you from compiling SQUID and tell me your system (I don't * know of any systems that don't have 64-bit integers these days). *****************************************************************/typedef @SQD_UINT16@ sqd_uint16;typedef @SQD_UINT32@ sqd_uint32;typedef @SQD_UINT64@ sqd_uint64;#ifdef USE_HOST_BYTESWAP_FUNCTIONS#include <sys/types.h> /* only for ntohl() and friends. */#include <netinet/in.h> /* only for ntohl() and friends. */#define sre_ntoh16(x) ntohs(x);#define sre_ntoh32(x) ntohl(x);#define sre_hton16(x) htons(x);#define sre_hton32(x) htonl(x);#endif /* USE_HOST_BYTESWAP_FUNCTIONS *//* Library version info is made available as a global to * any interested program. These are defined in iupac.c * with the other globals. */extern char squid_version[]; /* version number */extern char squid_date[]; /* date of release */extern int squid_errno; /* error codes *//**************************************************** * Error codes returned by squid library functions (squid_errno) ****************************************************/#define SQERR_OK 0 /* no error */#define SQERR_UNKNOWN 1 /* generic error, unidentified */#define SQERR_NODATA 2 /* unexpectedly NULL stream */#define SQERR_MEM 3 /* malloc or realloc failed */#define SQERR_NOFILE 4 /* file not found */#define SQERR_FORMAT 5 /* file format not recognized */#define SQERR_PARAMETER 6 /* bad parameter passed to func */#define SQERR_DIVZERO 7 /* error in sre_math.c */#define SQERR_INCOMPAT 8 /* incompatible parameters */#define SQERR_EOD 9 /* end-of-data (often normal) *//**************************************************** * Single sequence information ****************************************************/ #define SQINFO_NAMELEN 64#define SQINFO_DESCLEN 128struct seqinfo_s { int flags; /* what extra data are available */ char name[SQINFO_NAMELEN];/* up to 63 characters of name */ char id[SQINFO_NAMELEN]; /* up to 63 char of database identifier */ char acc[SQINFO_NAMELEN]; /* up to 63 char of database accession # */ char desc[SQINFO_DESCLEN];/* up to 127 char of description */ int len; /* length of this seq */ int start; /* (1..len) start position on source seq */ int stop; /* (1..len) end position on source seq */ int olen; /* original length of source seq */ int type; /* kRNA, kDNA, kAmino, or kOther */ char *ss; /* 0..len-1 secondary structure string */ char *sa; /* 0..len-1 % side chain surface access. */};typedef struct seqinfo_s SQINFO;#define SQINFO_NAME (1 << 0)#define SQINFO_ID (1 << 1)#define SQINFO_ACC (1 << 2)#define SQINFO_DESC (1 << 3)#define SQINFO_START (1 << 4)#define SQINFO_STOP (1 << 5)#define SQINFO_LEN (1 << 6)#define SQINFO_TYPE (1 << 7)#define SQINFO_OLEN (1 << 8)#define SQINFO_SS (1 << 9)#define SQINFO_SA (1 << 10)/**************************************************** * Sequence alphabet: see also iupac.c ****************************************************/ /* IUPAC symbols defined globally in iupac.c */struct iupactype { char sym; /* character representation */ char symcomp; /* complement (regular char */ char code; /* my binary rep */ char comp; /* binary encoded complement */};extern struct iupactype iupac[];#define IUPACSYMNUM 17extern char *stdcode1[]; /* 1-letter amino acid translation code */extern char *stdcode3[]; /* 3-letter amino acid translation code */extern float dnafq[]; /* nucleotide occurrence frequencies */extern float aafq[]; /* amino acid occurrence frequencies */extern char aa_alphabet[]; /* amino acid alphabet */extern int aa_index[]; /* convert 0..19 indices to 0..26 */ /* valid symbols in IUPAC code */#define NUCLEOTIDES "ACGTUNRYMKSWHBVDacgtunrymkswhbvd"#define AMINO_ALPHABET "ACDEFGHIKLMNPQRSTVWY"#define DNA_ALPHABET "ACGT"#define RNA_ALPHABET "ACGU"#define WHITESPACE " \t\n"#define isgap(c) ((c) == ' ' || (c) == '.' || (c) == '_' || (c) == '-' || (c) == '~')/**************************************************** * Sequence i/o: originally from Don Gilbert's readseq ****************************************************/#include "msa.h" /* for multiple sequence alignment support */ /* buffer size for reading in lines from sequence files*/#define LINEBUFLEN 4096/* sequence types parsed by Seqtype() *//* note that these must match hmmAMINO and hmmNUCLEIC in HMMER */#define kOtherSeq 0 /* hmmNOTSETYET */#define kDNA 1#define kRNA 2 /* hmmNUCLEIC */#define kAmino 3 /* hmmAMINO *//* Unaligned sequence file formats recognized * Coexists with definitions of multiple alignment formats in msa.h: * >100 reserved for alignment formats * <100 reserved for unaligned formats * 0 reserved for unknown * * Some "legacy" formats are supported only when explicitly * requested; not autodetected by SeqfileFormat(). * * DON'T REASSIGN THESE CODES. They're written into * GSI index files. You can use new ones, but reassigning * the sense of old ones will break GSI indices. * Alignment format codes were reassigned with the creation * of msa.c, but before Stockholm format, there were no * indexed alignment databases. */#define SQFILE_UNKNOWN 0 /* unknown format */#define SQFILE_IG 1 /* Intelligenetics (!) */#define SQFILE_GENBANK 2 /* GenBank flatfile */ /* 3 was A2M. Now an alignment format */#define SQFILE_EMBL 4 /* EMBL or Swissprot flatfile */#define SQFILE_GCG 5 /* GCG single sequence files */#define SQFILE_STRIDER 6 /* MacStrider (!!) */#define SQFILE_FASTA 7 /* FASTA format: default */#define SQFILE_ZUKER 8 /* Zuker MFOLD format (legacy) */#define SQFILE_IDRAW 9 /* Idraw-style PostScript (legacy) */ /* 10 was SELEX. Now alignment format */ /* 11 was MSF. Now alignment format */#define SQFILE_PIR 12 /* PIR format */#define SQFILE_RAW 13 /* raw sequence */#define SQFILE_SQUID 14 /* my obsolete squid format */ /* 15 was kXPearson, extended FASTA; withdrawn */#define SQFILE_GCGDATA 16 /* GCG data library file */ /* 17 was Clustal. Now alignment format*/#define IsUnalignedFormat(fmt) ((fmt) && (fmt) < 100)#include "ssi.h"struct ReadSeqVars { FILE *f; /* open file pointer */ char *fname; /* name of file; used for diagnostics */ int linenumber; /* what line are we on in the file */ char *buf; /* dynamically allocated sre_fgets() buffer */ int buflen; /* allocation length for buf */ int ssimode; /* SSI_OFFSET_I32 or SSI_OFFSET_I64 */ SSIOFFSET ssioffset; /* disk offset to last line read into buf */ SSIOFFSET r_off; /* offset to start of record */ SSIOFFSET d_off; /* offset to start of sequence data */ int rpl; /* residues per data line for this file; -1 if unset, 0 if invalid */ int lastrpl; /* rpl on last line seen */ int maxrpl; /* max rpl on any line of the file */ int bpl; /* bytes per data line; -1 if unset, 0 if invalid */ int lastbpl; /* bpl on last line seen */ int maxbpl; /* max bpl on any line of the file */ char *seq; /* growing sequence during parse */ SQINFO *sqinfo; /* name, id, etc, gathered during parse */ char *sp; int seqlen; /* current sequence length */ int maxseq; /* current allocation length for seq */ int format; /* format of seqfile we're reading. */ int do_gzip; /* TRUE if f is a pipe from gzip -dc */ int do_stdin; /* TRUE if f is stdin */ /* An (important) hack for sequential access of multiple alignment files: * we read the whole alignment in, * and then copy it one sequence at a time into seq and sqinfo. * It is active if msa is non NULL. * msa->lastidx is reused/overloaded: used to keep track of what * seq we'll return next. * afp->format is the real format, while SQFILE->format is kMSA. * Because we keep it in the SQFILE structure, * ReadSeq() and friends are always reentrant for multiple seqfiles. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -