⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sqio.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 4 页
字号:
voidToIUPAC(char *seq) {  for (; *seq != '\0'; seq++)    if (strchr(NUCLEOTIDES, *seq) == NULL && ! isgap(*seq)) *seq = 'N';}/* Function: addseq() *  * Purpose:  Add a line of sequence to the growing string in V. *           Skip all nonalphabetic characters in the input string: *           in particular, spaces and digits (coordinates). This *           allows us to generically read sequence data from most *           any format. */static void addseq(char *s, struct ReadSeqVars *V){  char *s0;  char *sq;  int   rpl;			/* valid residues per line */  int   bpl;			/* characters per line     */  if (V->ssimode == -1)    {				/* Normal mode: keeping the seq */      /* Make sure we have enough room. We know that s is <= buflen,       * so just make sure we've got room for a whole new buflen worth       * of sequence.       */      if (V->seqlen + V->buflen > V->maxseq) {	V->maxseq += MAX(V->buflen, kStartLength);	V->seq = ReallocOrDie (V->seq, V->maxseq+1);      }      s0 = s;      sq = V->seq + V->seqlen;      while (*s != 0) {	if (isalpha((int) *s)) {	  *sq = *s;	  sq++;	}	s++;      }      V->seqlen = sq - V->seq;    }  else				/* else: indexing mode, discard the seq */    {      s0 = s;      rpl = 0;      while (*s != 0) {	if (isalpha((int) *s)) rpl++; 	s++;      }      V->seqlen += rpl;      bpl = s - s0;      /* Keep track of the global rpl, bpl for the file.       * This is overly complicated because we have to        * allow the last line of each record (e.g. the last addseq() call       * on each sequence) to have a different length - and sometimes       * we'll have one-line sequence records, too.  Thus we only       * do something with the global V->rpl when we have *passed over*       * a line - we keep the last line's rpl in last_rpl. And because       * a file might consist entirely of single-line records, we keep       * a third guy, maxrpl, that tells us the maximum rpl of any line       * in the file. If we reach the end of file and rpl is still unset,       * we'll set it to maxrpl. If we reach eof and rpl is set, but is       * less than maxrpl, that's a weird case where a last line in some       * record is longer than every other line.       */      if (V->rpl != 0) {		/* 0 means we already know rpl is invalid       */	if (V->lastrpl > 0) {	/* we're on something that's not the first line */	  if (V->rpl > 0 && V->lastrpl != V->rpl)  V->rpl = 0; 	  else if (V->rpl == -1)                   V->rpl = V->lastrpl;	}      	V->lastrpl = rpl;	if (rpl > V->maxrpl) V->maxrpl = rpl; /* make sure we check max length of final lines */      }      if (V->bpl != 0) {		/* 0 means we already know bpl is invalid       */	if (V->lastbpl > 0) {	/* we're on something that's not the first line */	  if (V->bpl > 0 && V->lastbpl != V->bpl)  V->bpl = 0; 	  else if (V->bpl == -1)                   V->bpl = V->lastbpl;	}      	V->lastbpl = bpl;	if (bpl > V->maxbpl) V->maxbpl = bpl; /* make sure we check max length of final lines */      }    } /* end of indexing mode of addseq(). */}static void readLoop(int addfirst, int (*endTest)(char *,int *), struct ReadSeqVars *V){  int addend = 0;  int done   = 0;  V->seqlen = 0;  V->lastrpl = V->lastbpl = 0;  if (addfirst) {    if (V->ssimode >= 0) V->d_off = V->ssioffset;    addseq(V->buf, V);  } else if (V->ssimode >= 0)    if (0 != SSIGetFilePosition(V->f, V->ssimode, &(V->d_off)))      Die("SSIGetFilePosition() failed");  do {    SeqfileGetLine(V);	/* feof() alone is a bug; files not necessarily \n terminated */    if (*(V->buf) == '\0' && feof(V->f))      done = TRUE;    done |= (*endTest)(V->buf, &addend);    if (addend || !done)      addseq(V->buf, V);  } while (!done);}static intendPIR(char *s, int  *addend){  *addend = 0;  if ((strncmp(s, "///", 3) == 0) ||       (strncmp(s, "ENTRY", 5) == 0))    return 1;  else    return 0;}static voidreadPIR(struct ReadSeqVars *V){  char *sptr;				/* load first line of entry  */  while (!feof(V->f) && strncmp(V->buf, "ENTRY", 5) != 0) {    SeqfileGetLine(V);  }  if (feof(V->f)) return;  if (V->ssimode >= 0) V->r_off = V->ssioffset;  if ((sptr = strtok(V->buf + 15, "\n\t ")) != NULL)    {      SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);      SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID);    }  do {    SeqfileGetLine(V);    if (!feof(V->f) && strncmp(V->buf, "TITLE", 5) == 0)      SetSeqinfoString(V->sqinfo, V->buf+15, SQINFO_DESC);    else if (!feof(V->f) && strncmp(V->buf, "ACCESSION", 9) == 0)      {	if ((sptr = strtok(V->buf+15, " \t\n")) != NULL)	  SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC);      }  } while (! feof(V->f) && (strncmp(V->buf,"SEQUENCE", 8) != 0));  SeqfileGetLine(V);			/* skip next line, coords */  readLoop(0, endPIR, V);  /* reading a real PIR-CODATA database file, we keep the source coords   */  V->sqinfo->start = 1;  V->sqinfo->stop  = V->seqlen;  V->sqinfo->olen  = V->seqlen;  V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN;  /* get next line   */  while (!feof(V->f) && strncmp(V->buf, "ENTRY", 5) != 0) {    SeqfileGetLine(V);  }}static int endIG(char *s, int  *addend){  *addend = 1; /* 1 or 2 occur in line w/ bases */  return((strchr(s,'1')!=NULL) || (strchr(s,'2')!=NULL));}static void readIG(struct ReadSeqVars *V){  char *nm;				/* position past ';' comments */  do {    SeqfileGetLine(V);  } while (! (feof(V->f) || ((*V->buf != 0) && (*V->buf != ';')) ));  if (!feof(V->f))    {      if ((nm = strtok(V->buf, "\n\t ")) != NULL)	SetSeqinfoString(V->sqinfo, nm, SQINFO_NAME);      readLoop(0, endIG, V);    }    while (!(feof(V->f) || ((*V->buf != '\0') && (*V->buf == ';'))))    SeqfileGetLine(V);}static int endStrider(char *s, int *addend){  *addend = 0;  return (strstr( s, "//") != NULL);}static void readStrider(struct ReadSeqVars *V){   char *nm;    while ((!feof(V->f)) && (*V->buf == ';'))     {      if (strncmp(V->buf,"; DNA sequence", 14) == 0)	{	  if ((nm = strtok(V->buf+16, ",\n\t ")) != NULL)	    SetSeqinfoString(V->sqinfo, nm, SQINFO_NAME);	}      SeqfileGetLine(V);    }  if (! feof(V->f))    readLoop(1, endStrider, V);  /* load next line   */  while ((!feof(V->f)) && (*V->buf != ';'))     SeqfileGetLine(V);}static int endGB(char *s, int *addend){  *addend = 0;  return ((strstr(s,"//") != NULL) || (strstr(s,"LOCUS") == s));}static void readGenBank(struct ReadSeqVars *V){  char *sptr;  int   in_definition;  while (strncmp(V->buf, "LOCUS", 5) != 0) {    SeqfileGetLine(V);  }  if (V->ssimode >= 0) V->r_off = V->ssioffset;  if ((sptr = strtok(V->buf+12, "\n\t ")) != NULL)    {      SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);      SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID);    }  in_definition = FALSE;  while (! feof(V->f))    {      SeqfileGetLine(V);      if (! feof(V->f) && strstr(V->buf, "DEFINITION") == V->buf)	{	  if ((sptr = strtok(V->buf+12, "\n")) != NULL)	    SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);	  in_definition = TRUE;	}      else if (! feof(V->f) && strstr(V->buf, "ACCESSION") == V->buf)	{	  if ((sptr = strtok(V->buf+12, "\n\t ")) != NULL)	    SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC);	  in_definition = FALSE;	}      else if (strncmp(V->buf,"ORIGIN", 6) != 0)	{	  if (in_definition)	    SetSeqinfoString(V->sqinfo, V->buf, SQINFO_DESC);	}      else	break;    }  readLoop(0, endGB, V);  /* reading a real GenBank database file, we keep the source coords   */  V->sqinfo->start = 1;  V->sqinfo->stop  = V->seqlen;  V->sqinfo->olen  = V->seqlen;  V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN;  while (!(feof(V->f) || ((*V->buf!=0) && (strstr(V->buf,"LOCUS") == V->buf))))    SeqfileGetLine(V);				/* SRE: V->s now holds "//", so sequential				   reads are wedged: fixed Tue Jul 13 1993 */  while (!feof(V->f) && strstr(V->buf, "LOCUS  ") != V->buf)    SeqfileGetLine(V);}static intendGCGdata(char *s, int *addend) {  *addend = 0;  return (*s == '>');}static voidreadGCGdata(struct ReadSeqVars *V){  int   binary = FALSE;		/* whether data are binary or not */  int   blen = 0;		/* length of binary sequence */  				/* first line contains ">>>>" followed by name */  if (Strparse(">>>>([^ ]+) .+2BIT +Len: ([0-9]+)", V->buf, 2))    {      binary = TRUE;      SetSeqinfoString(V->sqinfo, sqd_parse[1], SQINFO_NAME);      blen = atoi(sqd_parse[2]);    }   else if (Strparse(">>>>([^ ]+) .+ASCII +Len: [0-9]+", V->buf, 1))    SetSeqinfoString(V->sqinfo, sqd_parse[1], SQINFO_NAME);  else     Die("bogus GCGdata format? %s", V->buf);				/* second line contains free text description */  SeqfileGetLine(V);  SetSeqinfoString(V->sqinfo, V->buf, SQINFO_DESC);  if (binary) {    /* allocate for blen characters +3... (allow for 3 bytes of slop) */    if (blen >= V->maxseq) {      V->maxseq = blen;      if ((V->seq = (char *) realloc (V->seq, sizeof(char)*(V->maxseq+4)))==NULL)	Die("malloc failed");    }				/* read (blen+3)/4 bytes from file */    if (fread(V->seq, sizeof(char), (blen+3)/4, V->f) < (size_t) ((blen+3)/4))      Die("fread failed");    V->seqlen = blen;				/* convert binary code to seq */    GCGBinaryToSequence(V->seq, blen);  }  else readLoop(0, endGCGdata, V);    while (!(feof(V->f) || ((*V->buf != 0) && (*V->buf == '>'))))    SeqfileGetLine(V);}static intendPearson(char *s, int *addend){  *addend = 0;  return(*s == '>');}static void readPearson(struct ReadSeqVars *V){  char *sptr;  if (V->ssimode >= 0) V->r_off = V->ssioffset;  if (*V->buf != '>')     Die("\File %s does not appear to be in FASTA format at line %d.\n\You may want to invoke the Babelfish to autodetect your file's format.\n\Usually this is done with a -B option.\n", 	V->fname, V->linenumber);  if ((sptr = strtok(V->buf+1, "\n\t ")) != NULL)    SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);  if ((sptr = strtok(NULL, "\n")) != NULL)    SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);  readLoop(0, endPearson, V);  while (!(feof(V->f) || ((*V->buf != 0) && (*V->buf == '>')))) {    SeqfileGetLine(V);  }}static intendEMBL(char *s, int *addend){  *addend = 0;  /* Some people (Berlin 5S rRNA database, f'r instance) use   * an extended EMBL format that attaches extra data after   * the sequence -- watch out for that. We use the fact that   * real EMBL sequence lines begin with five spaces.   *    * We can use this as the sole end test because readEMBL() will   * advance to the next ID line before starting to read again.   */  return (strncmp(s,"     ",5) != 0);/*  return ((strstr(s,"//") != NULL) || (strstr(s,"ID   ") == s)); */}static void readEMBL(struct ReadSeqVars *V){  char *sptr;				/* make sure we have first line */  while (!feof(V->f) && strncmp(V->buf, "ID  ", 4) != 0) {    SeqfileGetLine(V);  }  if (V->ssimode >= 0) V->r_off = V->ssioffset;  if ((sptr = strtok(V->buf+5, "\n\t ")) != NULL)    {      SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);      SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID);    }  do {    SeqfileGetLine(V);    if (!feof(V->f) && strstr(V->buf, "AC  ") == V->buf)      {	if ((sptr = strtok(V->buf+5, ";  \t\n")) != NULL)	  SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC);      }    else if (!feof(V->f) && strstr(V->buf, "DE  ") == V->buf)      {	if ((sptr = strtok(V->buf+5, "\n")) != NULL)	  SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);      }  } while (! feof(V->f) && strncmp(V->buf,"SQ",2) != 0);    readLoop(0, endEMBL, V);  /* Hack for Staden experiment files: convert - to N   */  if (V->ssimode == -1)		/* if we're in ssi mode, we're not keeping the seq */    for (sptr = V->seq; *sptr != '\0'; sptr++)      if (*sptr == '-') *sptr = 'N';  /* reading a real EMBL database file, we keep the source coords   */  V->sqinfo->start = 1;  V->sqinfo->stop  = V->seqlen;  V->sqinfo->olen  = V->seqlen;  V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN;				/* load next record's ID line */  while (!feof(V->f) && strncmp(V->buf, "ID  ", 4) != 0) {    SeqfileGetLine(V);  }    }static intendZuker(char *s, int *addend){  *addend = 0;  return( *s == '(' );}static voidreadZuker(struct ReadSeqVars *V){  char *sptr;  SeqfileGetLine(V);  /*s == "seqLen seqid string..."*/  if ((sptr = strtok(V->buf+6, " \t\n")) != NULL)    SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);  if ((sptr = strtok(NULL, "\n")) != NULL)    SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);  readLoop(0, endZuker, V);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -