⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 msa.c

📁 这是一个基于HMM 模型的生物多序列比对算法的linux实现版本。hmmer
💻 C
📖 第 1 页 / 共 3 页
字号:
      Die("failed to sre_strcat()");    if (sre_strcat(&(msa->gs[tagidx][sqidx]), len, value, -1) < 0)      Die("failed to sre_strcat()");  }  return;} /* Function: MSAAppendGC() * Date:     SRE, Thu Jun  3 06:25:14 1999 [Madison] * * Purpose:  Add an unparsed #=GC markup line to the MSA *           structure, allocating as necessary.  *            *           When called multiple times for the same tag, *           appends value strings together -- used when *           parsing multiblock alignment files, for *           example. * * Args:     msa   - multiple alignment structure *           tag   - markup tag (e.g. "CS") *           value - markup, one char per aligned column       * * Returns:  (void) */voidMSAAppendGC(MSA *msa, char *tag, char *value){  int tagidx;  /* Is this an unparsed tag name that we recognize?   * If not, handle adding it to index, and reallocating   * as needed.   */  if (msa->gc_tag == NULL)	/* first tag? init w/ malloc  */    {      msa->gc_tag = MallocOrDie(sizeof(char *));      msa->gc     = MallocOrDie(sizeof(char *));      msa->gc_idx = GKIInit();      tagidx      = GKIStoreKey(msa->gc_idx, tag);      SQD_DASSERT1((tagidx == 0));      msa->gc[0]  = NULL;    }  else    {			/* new tag? */      tagidx  = GKIKeyIndex(msa->gc_idx, tag);       if (tagidx < 0) {		/* it's a new tag name; realloc */	tagidx = GKIStoreKey(msa->gc_idx, tag);				/* since we alloc in blocks of 1,				   we always realloc upon seeing 				   a new tag. */	SQD_DASSERT1((tagidx == msa->ngc));	msa->gc_tag = ReallocOrDie(msa->gc_tag, (msa->ngc+1) * sizeof(char **));	msa->gc     = ReallocOrDie(msa->gc, (msa->ngc+1) * sizeof(char **));	msa->gc[tagidx] = NULL;      }    }  if (tagidx == msa->ngc) {    msa->gc_tag[tagidx] = sre_strdup(tag, -1);    msa->ngc++;  }  sre_strcat(&(msa->gc[tagidx]), -1, value, -1);  return;}/* Function: MSAGetGC() * Date:     SRE, Fri Aug 13 13:25:57 1999 [St. Louis] * * Purpose:  Given a tagname for a miscellaneous #=GC column *           annotation, return a pointer to the annotation *           string.  * * Args:     msa  - alignment and its annotation *           tag  - name of the annotation        * * Returns:  ptr to the annotation string. Caller does *not* *           free; is managed by msa object still. */char *MSAGetGC(MSA *msa, char *tag){  int tagidx;  if (msa->gc_idx == NULL) return NULL;  if ((tagidx = GKIKeyIndex(msa->gc_idx, tag)) < 0) return NULL;  return msa->gc[tagidx];}/* Function: MSAAppendGR() * Date:     SRE, Thu Jun  3 06:34:38 1999 [Madison] * * Purpose:  Add an unparsed #=GR markup line to the *           MSA structure, allocating as necessary. *            *           When called multiple times for the same tag, *           appends value strings together -- used when *           parsing multiblock alignment files, for *           example. * * Args:     msa    - multiple alignment structure *           tag    - markup tag (e.g. "SS") *           sqidx  - index of seq to assoc markup with (0..nseq-1) *           value  - markup, one char per aligned column       * * Returns:  (void) */voidMSAAppendGR(MSA *msa, char *tag, int sqidx, char *value){  int tagidx;  int i;  /* Is this an unparsed tag name that we recognize?   * If not, handle adding it to index, and reallocating   * as needed.   */  if (msa->gr_tag == NULL)	/* first tag? init w/ malloc  */    {      msa->gr_tag = MallocOrDie(sizeof(char *));      msa->gr     = MallocOrDie(sizeof(char **));      msa->gr[0]  = MallocOrDie(sizeof(char *) * msa->nseqalloc);      for (i = 0; i < msa->nseqalloc; i++) 	msa->gr[0][i] = NULL;      msa->gr_idx = GKIInit();      tagidx      = GKIStoreKey(msa->gr_idx, tag);      SQD_DASSERT1((tagidx == 0));    }  else     {				/* new tag? */      tagidx  = GKIKeyIndex(msa->gr_idx, tag);       if (tagidx < 0) {		/* it's a new tag name; realloc */	tagidx = GKIStoreKey(msa->gr_idx, tag);				/* since we alloc in blocks of 1,				   we always realloc upon seeing 				   a new tag. */	SQD_DASSERT1((tagidx == msa->ngr));	msa->gr_tag       = ReallocOrDie(msa->gr_tag, (msa->ngr+1) * sizeof(char *));	msa->gr           = ReallocOrDie(msa->gr, (msa->ngr+1) * sizeof(char **));	msa->gr[msa->ngr] = MallocOrDie(sizeof(char *) * msa->nseqalloc);	for (i = 0; i < msa->nseqalloc; i++) 	  msa->gr[msa->ngr][i] = NULL;      }    }    if (tagidx == msa->ngr) {    msa->gr_tag[tagidx] = sre_strdup(tag, -1);    msa->ngr++;  }  sre_strcat(&(msa->gr[tagidx][sqidx]), -1, value, -1);  return;}/* Function: MSAVerifyParse() * Date:     SRE, Sat Jun  5 14:24:24 1999 [Madison, 1999 worm mtg] * * Purpose:  Last function called after a multiple alignment is *           parsed. Checks that parse was successful; makes sure *           required information is present; makes sure required *           information is consistent. Some fields that are *           only use during parsing may be freed (sqlen, for *           example). *            *           Some fields in msa may be modified (msa->alen is set, *           for example). * * Args:     msa - the multiple alignment *                 sqname, aseq must be set *                 nseq must be correct *                 alen need not be set; will be set here. *                 wgt will be set here if not already set * * Returns:  (void) *           Will Die() here with diagnostics on error. * * Example:   */voidMSAVerifyParse(MSA *msa){  int idx;  if (msa->nseq == 0) Die("Parse error: no sequences were found for alignment %s",			  msa->name != NULL ? msa->name : "");  msa->alen = msa->sqlen[0];  /* We can rely on msa->sqname[] being valid for any index,   * because of the way the line parsers always store any name   * they add to the index.   */  for (idx = 0; idx < msa->nseq; idx++)    {				/* aseq is required. */      if (msa->aseq[idx] == NULL) 	Die("Parse error: No sequence for %s in alignment %s", msa->sqname[idx],	    msa->name != NULL ? msa->name : "");				/* either all weights must be set, or none of them */      if ((msa->flags & MSA_SET_WGT) && msa->wgt[idx] == -1.0)	Die("Parse error: some weights are set, but %s doesn't have one in alignment %s", 	    msa->sqname[idx],	    msa->name != NULL ? msa->name : "");				/* all aseq must be same length. */      if (msa->sqlen[idx] != msa->alen)	Die("Parse error: sequence %s: length %d, expected %d in alignment %s",	    msa->sqname[idx], msa->sqlen[idx], msa->alen,	    msa->name != NULL ? msa->name : "");				/* if SS is present, must have length right */      if (msa->ss != NULL && msa->ss[idx] != NULL && msa->sslen[idx] != msa->alen) 	Die("Parse error: #=GR SS annotation for %s: length %d, expected %d in alignment %s",	    msa->sqname[idx], msa->sslen[idx], msa->alen,	    msa->name != NULL ? msa->name : "");				/* if SA is present, must have length right */      if (msa->sa != NULL && msa->sa[idx] != NULL && msa->salen[idx] != msa->alen) 	Die("Parse error: #=GR SA annotation for %s: length %d, expected %d in alignment %s",	    msa->sqname[idx], msa->salen[idx], msa->alen,	    msa->name != NULL ? msa->name : "");    }			/* if cons SS is present, must have length right */  if (msa->ss_cons != NULL && strlen(msa->ss_cons) != msa->alen)     Die("Parse error: #=GC SS_cons annotation: length %d, expected %d in alignment %s",	strlen(msa->ss_cons), msa->alen,	msa->name != NULL ? msa->name : "");			/* if cons SA is present, must have length right */  if (msa->sa_cons != NULL && strlen(msa->sa_cons) != msa->alen)     Die("Parse error: #=GC SA_cons annotation: length %d, expected %d in alignment %s",	strlen(msa->sa_cons), msa->alen,	msa->name != NULL ? msa->name : "");				/* if RF is present, must have length right */  if (msa->rf != NULL && strlen(msa->rf) != msa->alen)     Die("Parse error: #=GC RF annotation: length %d, expected %d in alignment %s",	strlen(msa->rf), msa->alen,	msa->name != NULL ? msa->name : "");				/* Check that all or no weights are set */  if (!(msa->flags & MSA_SET_WGT))    FSet(msa->wgt, msa->nseq, 1.0); /* default weights */				/* Clean up a little from the parser */  if (msa->sqlen != NULL) { free(msa->sqlen); msa->sqlen = NULL; }  if (msa->sslen != NULL) { free(msa->sslen); msa->sslen = NULL; }  if (msa->salen != NULL) { free(msa->salen); msa->salen = NULL; }  return;}/* Function: MSAFileOpen() * Date:     SRE, Tue May 18 13:22:01 1999 [St. Louis] * * Purpose:  Open an alignment database file and prepare *           for reading one alignment, or sequentially *           in the (rare) case of multiple MSA databases *           (e.g. Stockholm format). *            * Args:     filename - name of file to open *                      if "-", read stdin *                      if it ends in ".gz", read from pipe to gunzip -dc *           format   - format of file (e.g. MSAFILE_STOCKHOLM) *           env      - environment variable for path (e.g. BLASTDB) * * Returns:  opened MSAFILE * on success. *           NULL on failure:  *             usually, because the file doesn't exist; *             for gzip'ed files, may also mean that gzip isn't in the path. */MSAFILE *MSAFileOpen(char *filename, int format, char *env){  MSAFILE *afp;    afp        = MallocOrDie(sizeof(MSAFILE));  if (strcmp(filename, "-") == 0)    {      afp->f         = stdin;      afp->do_stdin  = TRUE;       afp->do_gzip   = FALSE;      afp->fname     = sre_strdup("[STDIN]", -1);      afp->ssi       = NULL;	/* can't index stdin because we can't seek*/    }#ifndef SRE_STRICT_ANSI		  /* popen(), pclose() aren't portable to non-POSIX systems; disable */  else if (Strparse("^.*\\.gz$", filename, 0))    {      char cmd[256];      /* Note that popen() will return "successfully"       * if file doesn't exist, because gzip works fine       * and prints an error! So we have to check for       * existence of file ourself.       */      if (! FileExists(filename))	Die("%s: file does not exist", filename);      if (strlen(filename) + strlen("gzip -dc ") >= 256)	Die("filename > 255 char in MSAFileOpen()");       sprintf(cmd, "gzip -dc %s", filename);      if ((afp->f = popen(cmd, "r")) == NULL)	return NULL;      afp->do_stdin = FALSE;      afp->do_gzip  = TRUE;      afp->fname    = sre_strdup(filename, -1);      /* we can't index a .gz file, because we can't seek in a pipe afaik */      afp->ssi      = NULL;	    }#endif /*SRE_STRICT_ANSI*/  else    {      char *ssifile;      char *dir;      /* When we open a file, it may be either in the current       * directory, or in the directory indicated by the env       * argument - and we have to construct the SSI filename accordingly.       */      if ((afp->f = fopen(filename, "r")) != NULL)	{	  ssifile = MallocOrDie(sizeof(char) * (strlen(filename) + 5));	  sprintf(ssifile, "%s.ssi", filename);	}      else if ((afp->f = EnvFileOpen(filename, env, &dir)) != NULL)	{	  char *full;	  full = FileConcat(dir, filename);	  ssifile = MallocOrDie(sizeof(char) * (strlen(full) + strlen(filename)  + 5));	  sprintf(ssifile, "%s.ssi", full);	  free(dir);	}      else return NULL;      afp->do_stdin = FALSE;      afp->do_gzip  = FALSE;      afp->fname    = sre_strdup(filename, -1);      afp->ssi      = NULL;      /* Open the SSI index file. If it doesn't exist, or       * it's corrupt, or some error happens, afp->ssi stays NULL.       */      SSIOpen(ssifile, &(afp->ssi));      free(ssifile);    }  /* Invoke autodetection if we haven't already been told what   * to expect.   */  if (format == MSAFILE_UNKNOWN)    {      if (afp->do_stdin == TRUE || afp->do_gzip)	Die("Can't autodetect alignment file format from a stdin or gzip pipe");      format = MSAFileFormat(afp);      if (format == MSAFILE_UNKNOWN)	Die("Can't determine format of multiple alignment file %s", afp->fname);    }  afp->format     = format;  afp->linenumber = 0;  afp->buf        = NULL;  afp->buflen     = 0;  return afp;}/* Function: MSAFilePositionByKey() *           MSAFilePositionByIndex() *           MSAFileRewind() *  * Date:     SRE, Tue Nov  9 19:02:54 1999 [St. Louis] * * Purpose:  Family of functions for repositioning in *           open MSA files; analogous to a similarly *           named function series in HMMER's hmmio.c. * * Args:     afp    - open alignment file *           offset - disk offset in bytes *           key    - key to look up in SSI indices  *           idx    - index of alignment. * * Returns:  0 on failure. *           1 on success. *           If called on a non-fseek()'able file (e.g. a gzip'ed *           or pipe'd alignment), returns 0 as a failure flag. */int MSAFileRewind(MSAFILE *afp){  if (afp->do_gzip || afp->do_stdin) return 0;  rewind(afp->f);  return 1;}int MSAFilePositionByKey(MSAFILE *afp, char *key){  int       fh;			/* filehandle is ignored       */  SSIOFFSET offset;		/* offset of the key alignment */  if (afp->ssi == NULL) return 0;  if (SSIGetOffsetByName(afp->ssi, key, &fh, &offset) != 0) return 0;  if (SSISetFilePosition(afp->f, &offset) != 0) return 0;  return 1;}intMSAFilePositionByIndex(MSAFILE *afp, int idx){  int       fh;			/* filehandled is passed but ignored */  SSIOFFSET offset;		/* disk offset of desired alignment  */  if (afp->ssi == NULL) return 0;  if (SSIGetOffsetByNumber(afp->ssi, idx, &fh, &offset) != 0) return 0;  if (SSISetFilePosition(afp->f, &offset) != 0) return 0;  return 1;}/* Function: MSAFileRead() * Date:     SRE, Fri May 28 16:01:43 1999 [St. Louis] * * Purpose:  Read the next msa from an open alignment file. *           This is a wrapper around format-specific calls. * * Args:     afp     - open alignment file * * Returns:  next alignment, or NULL if out of alignments  */MSA *MSAFileRead(MSAFILE *afp){  MSA *msa = NULL;  switch (afp->format) {  case MSAFILE_STOCKHOLM: msa = ReadStockholm(afp); break;  case MSAFILE_MSF:       msa = ReadMSF(afp);       break;  case MSAFILE_A2M:       msa = ReadA2M(afp);       break;  case MSAFILE_CLUSTAL:   msa = ReadClustal(afp);   break;  case MSAFILE_SELEX:     msa = ReadSELEX(afp);     break;  case MSAFILE_PHYLIP:    msa = ReadPhylip(afp);    break;  default:    Die("MSAFILE corrupted: bad format index");  }  return msa;}/* Function: MSAFileClose() * Date:     SRE, Tue May 18 14:05:28 1999 [St. Louis] * * Purpose:  Close an open MSAFILE. * * Args:     afp  - ptr to an open MSAFILE. * * Returns:  void */voidMSAFileClose(MSAFILE *afp){#ifndef SRE_STRICT_ANSI	 /* gzip functionality only on POSIX systems */  if (afp->do_gzip)    pclose(afp->f);#endif  if (! afp->do_stdin) fclose(afp->f);  if (afp->buf  != NULL) free(afp->buf);  if (afp->ssi  != NULL) SSIClose(afp->ssi);  if (afp->fname != NULL) free(afp->fname);  free(afp);}char *MSAFileGetLine(MSAFILE *afp){  char *s;  if ((s = sre_fgets(&(afp->buf), &(afp->buflen), afp->f)) == NULL)    return NULL;  afp->linenumber++;  return afp->buf;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -