📄 stockholm.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/***************************************************************** * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. *****************************************************************//* stockholm.c * SRE, Fri May 28 15:46:41 1999 *  * Reading/writing of Stockholm format multiple sequence alignments. *  * example of API: *  * MSA     *msa; * FILE    *fp;        -- opened for write with fopen() * MSAFILE *afp;       -- opened for read with MSAFileOpen() *       * while ((msa = ReadStockholm(afp)) != NULL) *   { *      WriteStockholm(fp, msa); *      MSAFree(msa); *   } *  * RCS $Id: stockholm.c,v 1.5 2001/08/04 20:15:42 eddy Exp $ */#include <stdio.h>#include <string.h>#include "squid.h"#include "msa.h"static int  parse_gf(MSA *msa, char *buf);static int  parse_gs(MSA *msa, char *buf);static int  parse_gc(MSA *msa, char *buf);static int  parse_gr(MSA *msa, char *buf);static int  parse_comment(MSA *msa, char *buf);static int  parse_sequence(MSA *msa, char *buf);static void actually_write_stockholm(FILE *fp, MSA *msa, int cpl);#ifdef TESTDRIVE_STOCKHOLM/***************************************************************** * stockholm.c test driver:  * cc -DTESTDRIVE_STOCKHOLM -g -O2 -Wall -o test stockholm.c msa.c gki.c sqerror.c sre_string.c file.c hsregex.c sre_math.c sre_ctype.c -lm  *  */intmain(int argc, char **argv){  MSAFILE *afp;  MSA     *msa;  char    *file;    file = argv[1];  if ((afp = MSAFileOpen(file, MSAFILE_STOCKHOLM, NULL)) == NULL)    Die("Couldn't open %s\n", file);  while ((msa = ReadStockholm(afp)) != NULL)    {      WriteStockholm(stdout, msa);      MSAFree(msa);     }    MSAFileClose(afp);  exit(0);}/******************************************************************/#endif /* testdriver *//* Function: ReadStockholm() * Date:     SRE, Fri May 21 17:33:10 1999 [St. Louis] * * Purpose:  Parse the next alignment from an open Stockholm *           format alignment file. Return the alignment, or *           NULL if there are no more alignments in the file. * * Args:     afp  - open alignment file * * Returns:  MSA *   - an alignment object.  *                     caller responsible for an MSAFree()  *           NULL if no more alignments * * Diagnostics: *           Will Die() here with a (potentially) useful message *           if a parsing error occurs  */MSA *ReadStockholm(MSAFILE *afp){  MSA   *msa;  char  *s;  int    status;  if (feof(afp->f)) return NULL;  /* Initialize allocation of the MSA.   */  msa = MSAAlloc(10, 0);  /* Check the magic Stockholm header line.   * We have to skip blank lines here, else we perceive   * trailing blank lines in a file as a format error when   * reading in multi-record mode.   */  do {    if ((s = MSAFileGetLine(afp)) == NULL) {      MSAFree(msa);      return NULL;    }  } while (IsBlankline(s));  if (strncmp(s, "# STOCKHOLM 1.", 14) != 0)    Die("\File %s doesn't appear to be in Stockholm format.\n\Assuming there isn't some other problem with your file (it is an\n\alignment file, right?), please either:\n\  a) use the Babelfish format autotranslator option (-B, usually);\n\  b) specify the file's format with the --informat option; or\n\  a) reformat the alignment to Stockholm format.\n", 	afp->fname);  /* Read the alignment file one line at a time.   */  while ((s = MSAFileGetLine(afp)) != NULL)     {      while (*s == ' ' || *s == '\t') s++;  /* skip leading whitespace */      if (*s == '#') {	if      (strncmp(s, "#=GF", 4) == 0)   status = parse_gf(msa, s);	else if (strncmp(s, "#=GS", 4) == 0)   status = parse_gs(msa, s);	else if (strncmp(s, "#=GC", 4) == 0)   status = parse_gc(msa, s);	else if (strncmp(s, "#=GR", 4) == 0)   status = parse_gr(msa, s);	else                                   status = parse_comment(msa, s);      }       else if (strncmp(s, "//",   2) == 0)   break;      else if (*s == '\n')                   continue;      else                                   status = parse_sequence(msa, s);      if (status == 0)  	Die("Stockholm format parse error: line %d of file %s while reading alignment %s",	    afp->linenumber, afp->fname, msa->name == NULL? "" : msa->name);    }  if (s == NULL && msa->nseq != 0)    Die ("Didn't find // at end of alignment %s", msa->name == NULL ? "" : msa->name);  if (s == NULL && msa->nseq == 0) {    				/* probably just some junk at end of file */      MSAFree(msa);       return NULL;     }    MSAVerifyParse(msa);  return msa;}/* Function: WriteStockholm() * Date:     SRE, Mon May 31 19:15:22 1999 [St. Louis] * * Purpose:  Write an alignment in standard multi-block  *           Stockholm format to an open file. A wrapper *           for actually_write_stockholm(). * * Args:     fp  - file that's open for writing *           msa - alignment to write     * * Returns:  (void) */voidWriteStockholm(FILE *fp, MSA *msa){  actually_write_stockholm(fp, msa, 50); /* 50 char per block */}/* Function: WriteStockholmOneBlock() * Date:     SRE, Mon May 31 19:15:22 1999 [St. Louis] * * Purpose:  Write an alignment in Pfam's single-block *           Stockholm format to an open file. A wrapper *           for actually_write_stockholm(). * * Args:     fp  - file that's open for writing *           msa - alignment to write     * * Returns:  (void) */voidWriteStockholmOneBlock(FILE *fp, MSA *msa){  actually_write_stockholm(fp, msa, msa->alen); /* one big block */}/* Function: actually_write_stockholm() * Date:     SRE, Fri May 21 17:39:22 1999 [St. Louis] * * Purpose:  Write an alignment in Stockholm format to  *           an open file. This is the function that actually *           does the work. The API's WriteStockholm() *           and WriteStockholmOneBlock() are wrappers. * * Args:     fp    - file that's open for writing *           msa   - alignment to write         *           cpl   - characters to write per line in alignment block * * Returns:  (void) */static voidactually_write_stockholm(FILE *fp, MSA *msa, int cpl){  int  i, j;  int  len = 0;  int  namewidth;  int  typewidth = 0;		/* markup tags are up to 5 chars long */  int  markupwidth = 0;		/* #=GR, #=GC are four char wide + 1 space */  char buf[256];  int  currpos;  char *s, *tok;    /* Figure out how much space we need for name + markup   * to keep the alignment in register. Required by Stockholm   * spec, even though our Stockholm parser doesn't care (Erik's does).   */  namewidth = 0;  for (i = 0; i < msa->nseq; i++)    if ((len = strlen(msa->sqname[i])) > namewidth)       namewidth = len;  /* Figure out how much space we need for markup tags   *   markupwidth = always 4 if we're doing markup:  strlen("#=GR")   *   typewidth   = longest markup tag   */  if (msa->ss      != NULL) { markupwidth = 4; typewidth = 2; }  if (msa->sa      != NULL) { markupwidth = 4; typewidth = 2; }  for (i = 0; i < msa->ngr; i++)    if ((len = strlen(msa->gr_tag[i])) > typewidth) typewidth = len;  if (msa->rf      != NULL) { markupwidth = 4; if (typewidth < 2) typewidth = 2; }  if (msa->ss_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; }  if (msa->sa_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; }  for (i = 0; i < msa->ngc; i++)    if ((len = strlen(msa->gc_tag[i])) > typewidth) typewidth = len;    /* Magic Stockholm header   */  fprintf(fp, "# STOCKHOLM 1.0\n");  /* Free text comments   */  for (i = 0;  i < msa->ncomment; i++)    fprintf(fp, "# %s\n", msa->comment[i]);  if (msa->ncomment > 0) fprintf(fp, "\n");  /* GF section: per-file annotation   */  if (msa->name  != NULL)       fprintf(fp, "#=GF ID    %s\n", msa->name);  if (msa->acc   != NULL)       fprintf(fp, "#=GF AC    %s\n", msa->acc);  if (msa->desc  != NULL)       fprintf(fp, "#=GF DE    %s\n", msa->desc);  if (msa->au    != NULL)       fprintf(fp, "#=GF AU    %s\n", msa->au);  if (msa->flags & MSA_SET_GA)  fprintf(fp, "#=GF GA    %.1f %.1f\n", msa->ga1, msa->ga2);  if (msa->flags & MSA_SET_NC)  fprintf(fp, "#=GF TC    %.1f %.1f\n", msa->nc1, msa->nc2);  if (msa->flags & MSA_SET_TC)  fprintf(fp, "#=GF TC    %.1f %.1f\n", msa->tc1, msa->tc2);  for (i = 0; i < msa->ngf; i++)    fprintf(fp, "#=GF %-5s %s\n", msa->gf_tag[i], msa->gf[i]);   fprintf(fp, "\n");  /* GS section: per-sequence annotation   */  if (msa->flags & MSA_SET_WGT)     {      for (i = 0; i < msa->nseq; i++) 	fprintf(fp, "#=GS %-*.*s WT    %.2f\n", namewidth, namewidth, msa->sqname[i], msa->wgt[i]);      fprintf(fp, "\n");    }  if (msa->sqacc != NULL)     {      for (i = 0; i < msa->nseq; i++) 	if (msa->sqacc[i] != NULL)	  fprintf(fp, "#=GS %-*.*s AC    %s\n", namewidth, namewidth, msa->sqname[i], msa->sqacc[i]);      fprintf(fp, "\n");    }  if (msa->sqdesc != NULL)     {      for (i = 0; i < msa->nseq; i++) 	if (msa->sqdesc[i] != NULL)	  fprintf(fp, "#=GS %*.*s DE    %s\n", namewidth, namewidth, msa->sqname[i], msa->sqdesc[i]);      fprintf(fp, "\n");    }  for (i = 0; i < msa->ngs; i++)    {      /* Multiannotated GS tags are possible; for example,        *     #=GS foo DR PDB; 1xxx;       *     #=GS foo DR PDB; 2yyy;       * These are stored, for example, as:       *     msa->gs[0][0] = "PDB; 1xxx;\nPDB; 2yyy;"       * and must be decomposed.       */
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -