📄 selex.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
	    }				/* Side chain % surface accessibility code */	  else if (strcmp(nptr, "#=SA") == 0)	    {	      if (! copy_alignment_line(ainfo->sqinfo[seqidx-1].sa, currlen, strlen(nptr)-1,					buffer, blocks[currblock].lcol, 					blocks[currblock].rcol, (char) '.'))		Die("Parse error in #=SA line in ReadSELEX()");	    }				/* Aligned sequence; avoid unparsed machine comments */	  else if (strncmp(nptr, "#=", 2) != 0)	    {	      if (! copy_alignment_line(aseqs[seqidx], currlen, strlen(nptr)-1, 					buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.'))		Die("Parse error in alignment line in ReadSELEX()");	      seqidx++;	    }				/* get next line */	  for (;;)	    {	      nptr = NULL;	      if (fgets(buffer, LINEBUFLEN, fp) == NULL) break;	/* EOF */	      strcpy(bufcpy, buffer);	      if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) break; /* blank */	      if (strncmp(buffer, "#=", 2) == 0) break;      /* machine comment */	      if (strchr(commentsyms, *nptr) == NULL) break; /* data */	    }	} /* end of a block */      currlen += blocks[currblock].rcol - blocks[currblock].lcol + 1;				/* get line 1 of next block */      for (;;)	{	  if (fgets(buffer, LINEBUFLEN, fp) == NULL) break; /* no data */	  strcpy(bufcpy, buffer);	  if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) continue; /* blank */	  if (strncmp(buffer, "#=", 2) == 0)       break; /* machine comment */	  if (strchr(commentsyms, *nptr) == NULL) break; /* non-comment */	}    } /* end of the file */  /* Lengths in sqinfo are for raw sequence (ungapped),   * and SS, SA are 0..rlen-1 not 0..alen-1.   * Only the seqs with structures come out of here with lengths set.   */  for (seqidx = 0; seqidx < num; seqidx++)    {      int apos, rpos;				/* secondary structures */      if (ainfo->sqinfo[seqidx].flags & SQINFO_SS)	{	  for (apos = rpos = 0; apos < alen; apos++)	    if (! isgap(aseqs[seqidx][apos]))	      {		ainfo->sqinfo[seqidx].ss[rpos] = ainfo->sqinfo[seqidx].ss[apos];		rpos++;	      }	  ainfo->sqinfo[seqidx].ss[rpos] = '\0';	}				/* Surface accessibility */      if (ainfo->sqinfo[seqidx].flags & SQINFO_SA)	{	  for (apos = rpos = 0; apos < alen; apos++)	    if (! isgap(aseqs[seqidx][apos]))	      {		ainfo->sqinfo[seqidx].sa[rpos] = ainfo->sqinfo[seqidx].sa[apos];		rpos++;	      }	  ainfo->sqinfo[seqidx].sa[rpos] = '\0';	}    }				/* NULL-terminate all the strings */  if (ainfo->rf != NULL) ainfo->rf[alen] = '\0';  if (ainfo->cs != NULL) ainfo->cs[alen] = '\0';  for (seqidx = 0; seqidx < num; seqidx++)    aseqs[seqidx][alen]            = '\0';  				/* find raw sequence lengths for sqinfo */  for (seqidx = 0; seqidx < num; seqidx++)    {      count = 0;      for (sptr = aseqs[seqidx]; *sptr != '\0'; sptr++)	if (!isgap(*sptr)) count++;      ainfo->sqinfo[seqidx].len    = count;      ainfo->sqinfo[seqidx].flags |= SQINFO_LEN;    }  /***************************************************   * Garbage collection and return   ***************************************************/  free(blocks);  if (warn_names)     Warn("sequences may be in different orders in blocks of %s?", afp->fname);  /* Convert back to MSA structure. (Wasteful kludge.)   */  msa = MSAFromAINFO(aseqs, ainfo);  MSAVerifyParse(msa);  FreeAlignment(aseqs, ainfo);  return msa;}/* Function: WriteSELEX() * Date:     SRE, Mon Jun 14 13:13:14 1999 [St. Louis] * * Purpose:  Write a SELEX file in multiblock format. * * Args:     fp  - file that's open for writing *           msa - multiple sequence alignment object   * * Returns:  (void) */voidWriteSELEX(FILE *fp, MSA *msa){  actually_write_selex(fp, msa, 50); /* 50 char per block */}/* Function: WriteSELEXOneBlock() * Date:     SRE, Mon Jun 14 13:14:56 1999 [St. Louis] * * Purpose:  Write a SELEX alignment file in Pfam's single-block *           format style. A wrapper for actually_write_selex(). * * Args:     fp - file that's open for writing *           msa- alignment to write * * Returns:  (void) */voidWriteSELEXOneBlock(FILE *fp, MSA *msa){  actually_write_selex(fp, msa, msa->alen); /* one big block */}/* Function: actually_write_selex() * Date:     SRE, Mon Jun 14 12:54:46 1999 [St. Louis] * * Purpose:  Write an alignment in SELEX format to an open *           file. This is the function that actually does *           the work. The API's WriteSELEX() and  *           WriteSELEXOneBlock() are wrappers. * * Args:     fp  - file that's open for writing *           msa - alignment to write *           cpl - characters to write per line in alignment block * * Returns:  (void) */static voidactually_write_selex(FILE *fp, MSA *msa, int cpl){  int  i;  int  len = 0;  int  namewidth;  char *buf;  int  currpos;    buf = malloc(sizeof(char) * (cpl+101)); /* 100 chars allowed for name, etc. */  /* Figure out how much space we need for name + markup   * to keep the alignment in register, for easier human viewing --   * even though Stockholm format doesn't care about visual   * alignment.   */  namewidth = 0;  for (i = 0; i < msa->nseq; i++)    if ((len = strlen(msa->sqname[i])) > namewidth)       namewidth = len;  if (namewidth < 6) namewidth = 6; /* minimum space for markup tags */  /* Free text comments   */  for (i = 0;  i < msa->ncomment; i++)    fprintf(fp, "# %s\n", msa->comment[i]);  if (msa->ncomment > 0) fprintf(fp, "\n");  /* Per-file annotation   */  if (msa->name  != NULL)       fprintf(fp, "#=ID %s\n", msa->name);  if (msa->acc   != NULL)       fprintf(fp, "#=AC %s\n", msa->acc);  if (msa->desc  != NULL)       fprintf(fp, "#=DE %s\n", msa->desc);  if (msa->au    != NULL)       fprintf(fp, "#=AU %s\n", msa->au);  if (msa->flags & MSA_SET_GA)  fprintf(fp, "#=GA %.1f %.1f\n", msa->ga1, msa->ga2);  if (msa->flags & MSA_SET_NC)  fprintf(fp, "#=NC %.1f %.1f\n", msa->nc1, msa->nc2);  if (msa->flags & MSA_SET_TC)  fprintf(fp, "#=TC %.1f %.1f\n", msa->tc1, msa->tc2);  /* Per-sequence annotation   */  for (i = 0; i < msa->nseq; i++)    fprintf(fp, "#=SQ %-*.*s %6.4f %s %s %d..%d::%d %s\n", 	    namewidth, namewidth, msa->sqname[i],	    msa->wgt[i],	    "-",		/* MSA has no ID field */	    (msa->sqacc != NULL && msa->sqacc[i] != NULL) ? msa->sqacc[i] : "-",	    0, 0, 0,		/* MSA has no start, stop, olen field */	    (msa->sqdesc != NULL && msa->sqdesc[i] != NULL) ? msa->sqdesc[i] : "-");  fprintf(fp, "\n");  /* Alignment section:   */  for (currpos = 0; currpos < msa->alen; currpos += cpl)    {      if (currpos > 0) fprintf(fp, "\n");      if (msa->ss_cons != NULL) {	strncpy(buf, msa->ss_cons + currpos, cpl);	buf[cpl] = '\0';	fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=CS", buf);      }      if (msa->rf != NULL) {	strncpy(buf, msa->rf + currpos, cpl);	buf[cpl] = '\0';	fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=RF", buf);      }      for (i = 0; i < msa->nseq; i++)	{	  strncpy(buf, msa->aseq[i] + currpos, cpl);	  buf[cpl] = '\0';	      	  fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, msa->sqname[i], buf);	  if (msa->ss != NULL && msa->ss[i] != NULL) {	    strncpy(buf, msa->ss[i] + currpos, cpl);	    buf[cpl] = '\0';	 	    fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=SS", buf);	  }	  if (msa->sa != NULL && msa->sa[i] != NULL) {	    strncpy(buf, msa->sa[i] + currpos, cpl);	    buf[cpl] = '\0';	    fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=SA", buf);	  }	}    }  free(buf);}/* Function: copy_alignment_line() *  * Purpose:  Given a line from an alignment file, and bounds lcol,rcol *           on what part of it may be sequence, save the alignment into *           aseq starting at position apos. *            *           name_rcol is set to the rightmost column this aseqs's name *           occupies; if name_rcol >= lcol, we have a special case in *           which the name intrudes into the sequence zone. */static intcopy_alignment_line(char *aseq, int apos, int name_rcol, 		    char *buffer, int lcol, int rcol, char gapsym){  char *s1, *s2;  int   i;    s1 = aseq + apos;  s2 = buffer;			/* be careful that buffer doesn't end before lcol! */  for (i = 0; i < lcol; i++)    if (*s2) s2++;  for (i = lcol; i <= rcol; i++)    {      if (*s2 == '\t') {	Warn("TAB characters will corrupt a SELEX alignment! Please remove them first.");	return 0;      }      if (name_rcol >= i)	/* name intrusion special case: pad left w/ gaps */	*s1 = gapsym;				/* short buffer special case: pad right w/ gaps  */      else if (*s2 == '\0' || *s2 == '\n')	*s1 = gapsym;      else if (*s2 == ' ')	/* new: disallow spaces as gap symbols */	*s1 = gapsym;      else			/* normal case: copy buffer into aseq */	*s1 = *s2;      s1++;      if (*s2) s2++;    }  return 1;}        /* Function: DealignAseqs() *  * Given an array of (num) aligned sequences aseqs, * strip the gaps. Store the raw sequences in a new allocated array. *  * Caller is responsible for free'ing the memory allocated to * rseqs. *  * Returns 1 on success. Returns 0 and sets squid_errno on * failure. */intDealignAseqs(char **aseqs, int num, char ***ret_rseqs){  char **rseqs;                 /* de-aligned sequence array   */  int    idx;			/* counter for sequences       */  int    depos; 		/* position counter for dealigned seq*/  int    apos;			/* position counter for aligned seq */  int    seqlen;		/* length of aligned seq */				/* alloc space */  rseqs = (char **) MallocOrDie (num * sizeof(char *));				/* main loop */  for (idx = 0; idx < num; idx++)    {      seqlen = strlen(aseqs[idx]);				/* alloc space */      rseqs[idx] = (char *) MallocOrDie ((seqlen + 1) * sizeof(char));				/* strip gaps */      depos = 0;      for (apos = 0; aseqs[idx][apos] != '\0'; apos++)	if (!isgap(aseqs[idx][apos]))	  {	    rseqs[idx][depos] = aseqs[idx][apos];	    depos++;	  }      rseqs[idx][depos] = '\0';    }  *ret_rseqs = rseqs;  return 1;}/* Function: IsSELEXFormat() *  * Return TRUE if filename may be in SELEX format. *  * Accuracy is sacrificed for speed; a TRUE return does * *not* guarantee that the file will pass the stricter * error-checking of ReadSELEX(). All it checks is that * the first 500 non-comment lines of a file are  * blank, or if there's a second "word" on the line * it looks like sequence (i.e., it's not kOtherSeq). *  * Returns TRUE or FALSE. */intIsSELEXFormat(char *filename){  FILE *fp;                     /* ptr to open sequence file */  char  buffer[LINEBUFLEN];  char *sptr;                   /* ptr to first word          */  int   linenum;  if ((fp = fopen(filename, "r")) == NULL)    { squid_errno = SQERR_NOFILE; return 0; }  linenum = 0;  while (linenum < 500 && 	 fgets(buffer, LINEBUFLEN, fp) != NULL)    {      linenum++;				/* dead giveaways for extended SELEX */      if      (strncmp(buffer, "#=AU", 4) == 0) goto DONE;      else if (strncmp(buffer, "#=ID", 4) == 0) goto DONE;      else if (strncmp(buffer, "#=AC", 4) == 0) goto DONE;      else if (strncmp(buffer, "#=DE", 4) == 0) goto DONE;      else if (strncmp(buffer, "#=GA", 4) == 0) goto DONE;      else if (strncmp(buffer, "#=TC", 4) == 0) goto DONE;      else if (strncmp(buffer, "#=NC", 4) == 0) goto DONE;      else if (strncmp(buffer, "#=SQ", 4) == 0) goto DONE;      else if (strncmp(buffer, "#=SS", 4) == 0) goto DONE;      else if (strncmp(buffer, "#=CS", 4) == 0) goto DONE;      else if (strncmp(buffer, "#=RF", 4) == 0) goto DONE;				/* a comment? */      if (strchr(commentsyms, *buffer) != NULL) continue;				/* a blank line? */      if ((sptr = strtok(buffer, WHITESPACE)) == NULL) continue;				/* a one-word line (name only)				   is possible, though rare */      if ((sptr = strtok(NULL, "\n")) == NULL) continue;            if (Seqtype(sptr) == kOtherSeq) {fclose(fp); return 0;}    } DONE:  fclose(fp);  return 1;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -