⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 msa.c

📁 这是一个基于HMM 模型的生物多序列比对算法的linux实现版本。hmmer
💻 C
📖 第 1 页 / 共 3 页
字号:
/***************************************************************** * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-2003 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. *****************************************************************//* msa.c * SRE, Mon May 17 10:48:47 1999 *  * SQUID's interface for multiple sequence alignment * manipulation: access to the MSA object. *  * CVS $Id: msa.c,v 1.20 2003/05/26 16:21:50 eddy Exp $ */#include "squidconf.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#include "squid.h"#include "msa.h"	/* multiple sequence alignment object support */#include "gki.h"	/* string indexing hashtable code  */#include "ssi.h"	/* SSI sequence file indexing code *//* Function: MSAAlloc() * Date:     SRE, Tue May 18 10:45:47 1999 [St. Louis] * * Purpose:  Allocate an MSA structure, return a pointer *           to it. *            *           Designed to be used in three ways: *           1) We know exactly the dimensions of the alignment: *              both nseq and alen. *                    msa = MSAAlloc(nseq, alen); *                     *           2) We know the number of sequences but not alen. *              (We add sequences later.)  *                    msa = MSAAlloc(nseq, 0); *               *           3) We even don't know the number of sequences, so *              we'll have to dynamically expand allocations. *              We provide a blocksize for the allocation expansion, *              and expand when needed. *                    msa = MSAAlloc(10, 0); *                    if (msa->nseq == msa->nseqalloc) MSAExpand(msa);    * * Args:     nseq - number of sequences, or nseq allocation blocksize *           alen - length of alignment in columns, or 0       * * Returns:  pointer to new MSA object, w/ all values initialized. *           Note that msa->nseq is initialized to 0, though space *           is allocated. *            * Diagnostics: "always works". Die()'s on memory allocation failure. *              */MSA *MSAAlloc(int nseq, int alen){  MSA *msa;  int  i;  msa         = MallocOrDie(sizeof(MSA));  msa->aseq   = MallocOrDie(sizeof(char *) * nseq);  msa->sqname = MallocOrDie(sizeof(char *) * nseq);  msa->sqlen  = MallocOrDie(sizeof(int)    * nseq);  msa->wgt    = MallocOrDie(sizeof(float)  * nseq);  for (i = 0; i < nseq; i++)    {      msa->sqname[i] = NULL;      msa->sqlen[i]  = 0;      msa->wgt[i]    = -1.0;      if (alen != 0) msa->aseq[i] = MallocOrDie(sizeof(char) * (alen+1));      else           msa->aseq[i] = NULL;    }        msa->alen      = alen;  msa->nseq      = 0;  msa->nseqalloc = nseq;  msa->nseqlump  = nseq;  msa->flags   = 0;  msa->type    = kOtherSeq;  msa->name    = NULL;  msa->desc    = NULL;  msa->acc     = NULL;  msa->au      = NULL;  msa->ss_cons = NULL;  msa->sa_cons = NULL;  msa->rf      = NULL;  msa->sqacc   = NULL;  msa->sqdesc  = NULL;  msa->ss      = NULL;  msa->sslen   = NULL;  msa->sa      = NULL;  msa->salen   = NULL;  msa->index   = GKIInit();  msa->lastidx = 0;  for (i = 0; i < MSA_MAXCUTOFFS; i++) {    msa->cutoff[i]        = 0.;    msa->cutoff_is_set[i] = FALSE;  }  /* Initialize unparsed optional markup   */  msa->comment        = NULL;  msa->ncomment       = 0;  msa->alloc_ncomment = 0;  msa->gf_tag         = NULL;  msa->gf             = NULL;  msa->ngf            = 0;  msa->gs_tag         = NULL;  msa->gs             = NULL;  msa->gs_idx         = NULL;  msa->ngs            = 0;  msa->gc_tag         = NULL;  msa->gc             = NULL;  msa->gc_idx         = NULL;  msa->ngc            = 0;  msa->gr_tag         = NULL;  msa->gr             = NULL;  msa->gr_idx         = NULL;  msa->ngr            = 0;  /* Done. Return the alloced, initialized structure   */   return msa;}/* Function: MSAExpand() * Date:     SRE, Tue May 18 11:06:53 1999 [St. Louis] * * Purpose:  Increase the sequence allocation in an MSA *           by msa->nseqlump. (Typically used when we're reading *           in an alignment sequentially from a file, *           so we don't know nseq until we're done.) * * Args:     msa - the MSA object * * Returns:  (void) *            */voidMSAExpand(MSA *msa){  int i,j;  msa->nseqalloc += msa->nseqlump;  msa->aseq   = ReallocOrDie(msa->aseq,   sizeof(char *) * msa->nseqalloc);  msa->sqname = ReallocOrDie(msa->sqname, sizeof(char *) * msa->nseqalloc);  msa->sqlen  = ReallocOrDie(msa->sqlen,  sizeof(char *) * msa->nseqalloc);  msa->wgt    = ReallocOrDie(msa->wgt,    sizeof(float)  * msa->nseqalloc);  if (msa->ss != NULL) {    msa->ss    = ReallocOrDie(msa->ss,    sizeof(char *) * msa->nseqalloc);    msa->sslen = ReallocOrDie(msa->sslen, sizeof(int)    * msa->nseqalloc);  }  if (msa->sa != NULL) {    msa->sa    = ReallocOrDie(msa->sa,    sizeof(char *) * msa->nseqalloc);    msa->salen = ReallocOrDie(msa->salen, sizeof(int)    * msa->nseqalloc);  }  if (msa->sqacc != NULL)    msa->sqacc = ReallocOrDie(msa->sqacc, sizeof(char *) * msa->nseqalloc);  if (msa->sqdesc != NULL)    msa->sqdesc =ReallocOrDie(msa->sqdesc,sizeof(char *) * msa->nseqalloc);  for (i = msa->nseqalloc-msa->nseqlump; i < msa->nseqalloc; i++)    {      msa->sqname[i] = NULL;      msa->wgt[i]    = -1.0;      if (msa->sqacc  != NULL) msa->sqacc[i]  = NULL;      if (msa->sqdesc != NULL) msa->sqdesc[i] = NULL;      if (msa->alen != 0) 	msa->aseq[i] = ReallocOrDie(msa->aseq[i], sizeof(char) * (msa->alen+1));      else msa->aseq[i] = NULL;      msa->sqlen[i] = 0;      if (msa->ss != NULL) {	if (msa->alen != 0) 	  msa->ss[i] = ReallocOrDie(msa->ss[i], sizeof(char) * (msa->alen+1));	else msa->ss[i] = NULL;	msa->sslen[i] = 0;      }      if (msa->sa != NULL) { 	if (msa->alen != 0) 	  msa->sa[i] = ReallocOrDie(msa->ss[i], sizeof(char) * (msa->alen+1));	else 	  msa->sa[i] = NULL;	msa->salen[i] = 0;      }    }  /* Reallocate and re-init for unparsed #=GS tags, if we have some.   * gs is [0..ngs-1][0..nseq-1][], so we're reallocing the middle   * set of pointers.   */  if (msa->gs != NULL)    for (i = 0; i < msa->ngs; i++)      {	if (msa->gs[i] != NULL)	  {	    msa->gs[i] = ReallocOrDie(msa->gs[i], sizeof(char *) * msa->nseqalloc);	    for (j = msa->nseqalloc-msa->nseqlump; j < msa->nseqalloc; j++)	      msa->gs[i][j] = NULL;	  }      }  /* Reallocate and re-init for unparsed #=GR tags, if we have some.   * gr is [0..ngs-1][0..nseq-1][], so we're reallocing the middle   * set of pointers.   */  if (msa->gr != NULL)    for (i = 0; i < msa->ngr; i++)      {	if (msa->gr[i] != NULL)	  {	    msa->gr[i] = ReallocOrDie(msa->gr[i], sizeof(char *) * msa->nseqalloc);	    for (j = msa->nseqalloc-msa->nseqlump; j < msa->nseqalloc; j++)	      msa->gr[i][j] = NULL;	  }      }  return;}/* Function: MSAFree() * Date:     SRE, Tue May 18 11:20:16 1999 [St. Louis] * * Purpose:  Free a multiple sequence alignment structure. * * Args:     msa - the alignment * * Returns:  (void) */voidMSAFree(MSA *msa){  Free2DArray((void **) msa->aseq,   msa->nseq);  Free2DArray((void **) msa->sqname, msa->nseq);  Free2DArray((void **) msa->sqacc,  msa->nseq);  Free2DArray((void **) msa->sqdesc, msa->nseq);  Free2DArray((void **) msa->ss,     msa->nseq);  Free2DArray((void **) msa->sa,     msa->nseq);  if (msa->sqlen   != NULL) free(msa->sqlen);  if (msa->wgt     != NULL) free(msa->wgt);  if (msa->name    != NULL) free(msa->name);  if (msa->desc    != NULL) free(msa->desc);  if (msa->acc     != NULL) free(msa->acc);  if (msa->au      != NULL) free(msa->au);  if (msa->ss_cons != NULL) free(msa->ss_cons);  if (msa->sa_cons != NULL) free(msa->sa_cons);  if (msa->rf      != NULL) free(msa->rf);  if (msa->sslen   != NULL) free(msa->sslen);  if (msa->salen   != NULL) free(msa->salen);    Free2DArray((void **) msa->comment, msa->ncomment);  Free2DArray((void **) msa->gf_tag,  msa->ngf);  Free2DArray((void **) msa->gf,      msa->ngf);  Free2DArray((void **) msa->gs_tag,  msa->ngs);  Free3DArray((void ***)msa->gs,      msa->ngs, msa->nseq);  Free2DArray((void **) msa->gc_tag,  msa->ngc);  Free2DArray((void **) msa->gc,      msa->ngc);  Free2DArray((void **) msa->gr_tag,  msa->ngr);  Free3DArray((void ***)msa->gr,      msa->ngr, msa->nseq);  GKIFree(msa->index);  GKIFree(msa->gs_idx);  GKIFree(msa->gc_idx);  GKIFree(msa->gr_idx);  free(msa);}/* Function: MSASetSeqAccession() * Date:     SRE, Mon Jun 21 04:13:33 1999 [Sanger Centre] * * Purpose:  Set a sequence accession in an MSA structure. *           Handles some necessary allocation/initialization. * * Args:     msa      - multiple alignment to add accession to *           seqidx   - index of sequence to attach accession to *           acc      - accession  * * Returns:  void */voidMSASetSeqAccession(MSA *msa, int seqidx, char *acc){  int x;  if (msa->sqacc == NULL) {    msa->sqacc = MallocOrDie(sizeof(char *) * msa->nseqalloc);    for (x = 0; x < msa->nseqalloc; x++)      msa->sqacc[x] = NULL;  }  msa->sqacc[seqidx] = sre_strdup(acc, -1);}/* Function: MSASetSeqDescription() * Date:     SRE, Mon Jun 21 04:21:09 1999 [Sanger Centre] * * Purpose:  Set a sequence description in an MSA structure. *           Handles some necessary allocation/initialization. * * Args:     msa      - multiple alignment to add accession to *           seqidx   - index of sequence to attach accession to *           desc     - description * * Returns:  void */voidMSASetSeqDescription(MSA *msa, int seqidx, char *desc){  int x;  if (msa->sqdesc == NULL) {    msa->sqdesc = MallocOrDie(sizeof(char *) * msa->nseqalloc);    for (x = 0; x < msa->nseqalloc; x++)      msa->sqdesc[x] = NULL;  }  msa->sqdesc[seqidx] = sre_strdup(desc, -1);}/* Function: MSAAddComment() * Date:     SRE, Tue Jun  1 17:37:21 1999 [St. Louis] * * Purpose:  Add an (unparsed) comment line to the MSA structure, *           allocating as necessary. * * Args:     msa - a multiple alignment *           s   - comment line to add * * Returns:  (void) */voidMSAAddComment(MSA *msa, char *s){  /* If this is our first recorded comment, we need to malloc();   * and if we've filled available space, we need to realloc().   * Note the arbitrary lumpsize of 10 lines per allocation...   */  if (msa->comment == NULL) {    msa->comment        = MallocOrDie (sizeof(char *) * 10);    msa->alloc_ncomment = 10;  }  if (msa->ncomment == msa->alloc_ncomment) {    msa->alloc_ncomment += 10;    msa->comment = ReallocOrDie(msa->comment, sizeof(char *) * msa->alloc_ncomment);  }  msa->comment[msa->ncomment] = sre_strdup(s, -1);  msa->ncomment++;  return;}/* Function: MSAAddGF() * Date:     SRE, Wed Jun  2 06:53:54 1999 [bus to Madison] * * Purpose:  Add an unparsed #=GF markup line to the MSA *           structure, allocating as necessary.  * * Args:     msa   - a multiple alignment *           tag   - markup tag (e.g. "AU")        *           value - free text markup (e.g. "Alex Bateman") * * Returns:  (void) */voidMSAAddGF(MSA *msa, char *tag, char *value){  /* If this is our first recorded unparsed #=GF line, we need to malloc();   * if we've filled availabl space If we already have a hash index, and the GF    * Note the arbitrary lumpsize of 10 lines per allocation...   */  if (msa->gf_tag == NULL) {    msa->gf_tag    = MallocOrDie (sizeof(char *) * 10);    msa->gf        = MallocOrDie (sizeof(char *) * 10);    msa->alloc_ngf = 10;  }  if (msa->ngf == msa->alloc_ngf) {    msa->alloc_ngf += 10;    msa->gf_tag     = ReallocOrDie(msa->gf_tag, sizeof(char *) * msa->alloc_ngf);    msa->gf         = ReallocOrDie(msa->gf, sizeof(char *) * msa->alloc_ngf);  }  msa->gf_tag[msa->ngf] = sre_strdup(tag, -1);  msa->gf[msa->ngf]     = sre_strdup(value, -1);  msa->ngf++;  return;}/* Function: MSAAddGS() * Date:     SRE, Wed Jun  2 06:57:03 1999 [St. Louis] * * Purpose:  Add an unparsed #=GS markup line to the MSA *           structure, allocating as necessary. *            *           It's possible that we could get more than one *           of the same type of GS tag per sequence; for *           example, "DR PDB;" structure links in Pfam. *           Hack: handle these by appending to the string, *           in a \n separated fashion.  * * Args:     msa    - multiple alignment structure *           tag    - markup tag (e.g. "AC") *           sqidx  - index of sequence to assoc markup with (0..nseq-1) *           value  - markup (e.g. "P00666") * * Returns:  0 on success */voidMSAAddGS(MSA *msa, char *tag, int sqidx, char *value){  int tagidx;  int i;  /* Is this an unparsed tag name that we recognize?   * If not, handle adding it to index, and reallocating   * as needed.   */  if (msa->gs_tag == NULL)	/* first tag? init w/ malloc  */    {      msa->gs_idx = GKIInit();      tagidx      = GKIStoreKey(msa->gs_idx, tag);      SQD_DASSERT1((tagidx == 0));      msa->gs_tag = MallocOrDie(sizeof(char *));      msa->gs     = MallocOrDie(sizeof(char **));      msa->gs[0]  = MallocOrDie(sizeof(char *) * msa->nseqalloc);      for (i = 0; i < msa->nseqalloc; i++)	msa->gs[0][i] = NULL;    }  else     {				/* new tag? */      tagidx  = GKIKeyIndex(msa->gs_idx, tag);       if (tagidx < 0) {		/* it's a new tag name; realloc */	tagidx = GKIStoreKey(msa->gs_idx, tag);				/* since we alloc in blocks of 1,				   we always realloc upon seeing 				   a new tag. */	SQD_DASSERT1((tagidx == msa->ngs));	msa->gs_tag =       ReallocOrDie(msa->gs_tag, (msa->ngs+1) * sizeof(char *));	msa->gs     =       ReallocOrDie(msa->gs, (msa->ngs+1) * sizeof(char **));	msa->gs[msa->ngs] = MallocOrDie(sizeof(char *) * msa->nseqalloc);	for (i = 0; i < msa->nseqalloc; i++) 	  msa->gs[msa->ngs][i] = NULL;      }    }  if (tagidx == msa->ngs) {    msa->gs_tag[tagidx] = sre_strdup(tag, -1);    msa->ngs++;  }    if (msa->gs[tagidx][sqidx] == NULL) /* first annotation of this seq with this tag? */    msa->gs[tagidx][sqidx] = sre_strdup(value, -1);  else {							/* >1 annotation of this seq with this tag; append */    int len;    if ((len = sre_strcat(&(msa->gs[tagidx][sqidx]), -1, "\n", 1)) < 0)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -