⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 msa.c

📁 这是一个基于HMM 模型的生物多序列比对算法的linux实现版本。hmmer
💻 C
📖 第 1 页 / 共 3 页
字号:
void MSAFileWrite(FILE *fp, MSA *msa, int outfmt, int do_oneline){  switch (outfmt) {  case MSAFILE_A2M:       WriteA2M(fp, msa);     break;  case MSAFILE_CLUSTAL:   WriteClustal(fp, msa); break;  case MSAFILE_MSF:       WriteMSF(fp, msa);     break;  case MSAFILE_PHYLIP:    WritePhylip(fp, msa);  break;  case MSAFILE_SELEX:     WriteSELEX(fp, msa);   break;  case MSAFILE_STOCKHOLM:    if (do_oneline) WriteStockholmOneBlock(fp, msa);    else            WriteStockholm(fp, msa);    break;  default:    Die("can't write. no such alignment format %d\n", outfmt);  }}/* Function: MSAGetSeqidx() * Date:     SRE, Wed May 19 15:08:25 1999 [St. Louis] * * Purpose:  From a sequence name, return seqidx appropriate *           for an MSA structure. *            *           1) try to guess the index. (pass -1 if you can't guess) *           2) Look up name in msa's hashtable. *           3) If it's a new name, store in msa's hashtable; *                                  expand allocs as needed; *                                  save sqname. * * Args:     msa   - alignment object *           name  - a sequence name *           guess - a guess at the right index, or -1 if no guess. * * Returns:  seqidx */intMSAGetSeqidx(MSA *msa, char *name, int guess){  int seqidx;				/* can we guess? */  if (guess >= 0 && guess < msa->nseq && strcmp(name, msa->sqname[guess]) == 0)     return guess;				/* else, a lookup in the index */  if ((seqidx = GKIKeyIndex(msa->index, name)) >= 0)    return seqidx;				/* else, it's a new name */  seqidx = GKIStoreKey(msa->index, name);  if (seqidx >= msa->nseqalloc)  MSAExpand(msa);  msa->sqname[seqidx] = sre_strdup(name, -1);  msa->nseq++;  return seqidx;}/* Function: MSAFromAINFO() * Date:     SRE, Mon Jun 14 11:22:24 1999 [St. Louis] * * Purpose:  Convert the old aseq/ainfo alignment structure *           to new MSA structure. Enables more rapid conversion *           of codebase to the new world order. * * Args:     aseq  - [0..nseq-1][0..alen-1] alignment *           ainfo - old-style optional info * * Returns:  MSA * */MSA *MSAFromAINFO(char **aseq, AINFO *ainfo){  MSA *msa;  int  i, j;  msa = MSAAlloc(ainfo->nseq, ainfo->alen);  for (i = 0; i < ainfo->nseq; i++)    {      strcpy(msa->aseq[i], aseq[i]);      msa->wgt[i]    = ainfo->wgt[i];      msa->sqname[i] = sre_strdup(ainfo->sqinfo[i].name, -1);      msa->sqlen[i]  = msa->alen;      GKIStoreKey(msa->index, msa->sqname[i]);      if (ainfo->sqinfo[i].flags & SQINFO_ACC) 	MSASetSeqAccession(msa, i, ainfo->sqinfo[i].acc);      if (ainfo->sqinfo[i].flags & SQINFO_DESC) 	MSASetSeqDescription(msa, i, ainfo->sqinfo[i].desc);      if (ainfo->sqinfo[i].flags & SQINFO_SS) {	if (msa->ss == NULL) {	  msa->ss    = MallocOrDie(sizeof(char *) * msa->nseqalloc);	  msa->sslen = MallocOrDie(sizeof(int)    * msa->nseqalloc);	  for (j = 0; j < msa->nseqalloc; j++) {	    msa->ss[j]    = NULL;	    msa->sslen[j] = 0;	  }	}	MakeAlignedString(msa->aseq[i], msa->alen, ainfo->sqinfo[i].ss, &(msa->ss[i]));	msa->sslen[i] = msa->alen;      }      if (ainfo->sqinfo[i].flags & SQINFO_SA) {	if (msa->sa == NULL) {	  msa->sa    = MallocOrDie(sizeof(char *) * msa->nseqalloc);	  msa->salen = MallocOrDie(sizeof(int)    * msa->nseqalloc);	  for (j = 0; j < msa->nseqalloc; j++) {	    msa->sa[j]    = NULL;	    msa->salen[j] = 0;	  }	}	MakeAlignedString(msa->aseq[i], msa->alen, ainfo->sqinfo[i].sa, &(msa->sa[i]));	msa->salen[i] = msa->alen;      }    }			/* note that sre_strdup() returns NULL when passed NULL */  msa->name    = sre_strdup(ainfo->name, -1);  msa->desc    = sre_strdup(ainfo->desc, -1);  msa->acc     = sre_strdup(ainfo->acc,  -1);  msa->au      = sre_strdup(ainfo->au,   -1);  msa->ss_cons = sre_strdup(ainfo->cs,   -1);  msa->rf      = sre_strdup(ainfo->rf,   -1);  if (ainfo->flags & AINFO_TC) {    msa->cutoff[MSA_CUTOFF_TC1] = ainfo->tc1; msa->cutoff_is_set[MSA_CUTOFF_TC1] = TRUE;    msa->cutoff[MSA_CUTOFF_TC2] = ainfo->tc2; msa->cutoff_is_set[MSA_CUTOFF_TC2] = TRUE;  }  if (ainfo->flags & AINFO_NC) {    msa->cutoff[MSA_CUTOFF_NC1] = ainfo->nc1; msa->cutoff_is_set[MSA_CUTOFF_NC1] = TRUE;    msa->cutoff[MSA_CUTOFF_NC2] = ainfo->nc2; msa->cutoff_is_set[MSA_CUTOFF_NC2] = TRUE;  }  if (ainfo->flags & AINFO_GA) {    msa->cutoff[MSA_CUTOFF_GA1] = ainfo->ga1; msa->cutoff_is_set[MSA_CUTOFF_GA1] = TRUE;    msa->cutoff[MSA_CUTOFF_GA2] = ainfo->ga2; msa->cutoff_is_set[MSA_CUTOFF_GA2] = TRUE;  }  msa->nseq = ainfo->nseq;  msa->alen = ainfo->alen;  return msa;}/* Function: MSAFileFormat() * Date:     SRE, Fri Jun 18 14:26:49 1999 [Sanger Centre] * * Purpose:  (Attempt to) determine the format of an alignment file. *           Since it rewinds the file pointer when it's done, *           cannot be used on a pipe or gzip'ed file. Works by *           calling SeqfileFormat() from sqio.c, then making sure *           that the format is indeed an alignment. If the format *           comes back as FASTA, it assumes that the format as A2M  *           (e.g. aligned FASTA). * * Args:     fname   - file to evaluate * * Returns:  format code; e.g. MSAFILE_STOCKHOLM */intMSAFileFormat(MSAFILE *afp){  int fmt;  fmt = SeqfileFormat(afp->f);  if (fmt == SQFILE_FASTA) fmt = MSAFILE_A2M;  if (fmt != MSAFILE_UNKNOWN && ! IsAlignmentFormat(fmt))     Die("File %s does not appear to be an alignment file;\n\rather, it appears to be an unaligned file in %s format.\n\I'm expecting an alignment file in this context.\n",	afp->fname,	SeqfileFormat2String(fmt));  return fmt;}/* Function: MSAMingap() * Date:     SRE, Mon Jun 28 18:57:54 1999 [on jury duty, St. Louis Civil Court] * * Purpose:  Remove all-gap columns from a multiple sequence alignment *           and its associated per-residue data. * * Args:     msa - the alignment * * Returns:  (void) */voidMSAMingap(MSA *msa){  int *useme;			/* array of TRUE/FALSE flags for which columns to keep */  int apos;			/* position in original alignment */  int idx;			/* sequence index */  useme = MallocOrDie(sizeof(int) * msa->alen);  for (apos = 0; apos < msa->alen; apos++)    {      for (idx = 0; idx < msa->nseq; idx++)	if (! isgap(msa->aseq[idx][apos]))	  break;      if (idx == msa->nseq) useme[apos] = FALSE; else useme[apos] = TRUE;    }  MSAShorterAlignment(msa, useme);  free(useme);  return;}/* Function: MSANogap() * Date:     SRE, Wed Nov 17 09:59:51 1999 [St. Louis] * * Purpose:  Remove all columns from a multiple sequence alignment that *           contain any gaps -- used for filtering before phylogenetic *           analysis. * * Args:     msa - the alignment * * Returns:  (void). The alignment is modified, so if you want to keep *           the original for something, make a copy. */voidMSANogap(MSA *msa){  int *useme;			/* array of TRUE/FALSE flags for which columns to keep */  int apos;			/* position in original alignment */  int idx;			/* sequence index */  useme = MallocOrDie(sizeof(int) * msa->alen);  for (apos = 0; apos < msa->alen; apos++)    {      for (idx = 0; idx < msa->nseq; idx++)	if (isgap(msa->aseq[idx][apos]))	  break;      if (idx == msa->nseq) useme[apos] = TRUE; else useme[apos] = FALSE;    }  MSAShorterAlignment(msa, useme);  free(useme);  return;}/* Function: MSAShorterAlignment() * Date:     SRE, Wed Nov 17 09:49:32 1999 [St. Louis] * * Purpose:  Given an array "useme" (0..alen-1) of TRUE/FALSE flags, *           where TRUE means "keep this column in the new alignment": *           Remove all columns annotated as "FALSE" in the useme *           array. * * Args:     msa   - the alignment. The alignment is changed, so *                   if you don't want the original screwed up, make *                   a copy of it first. *           useme - TRUE/FALSE flags for columns to keep: 0..alen-1 * * Returns:  (void) */voidMSAShorterAlignment(MSA *msa, int *useme){  int apos;			/* position in original alignment */  int mpos;			/* position in new alignment      */  int idx;			/* sequence index */  int i;			/* markup index */  /* Since we're minimizing, we can overwrite, using already allocated   * memory.   */  for (apos = 0, mpos = 0; apos < msa->alen; apos++)    {      if (useme[apos] == FALSE) continue;			/* shift alignment and associated per-column+per-residue markup */      if (mpos != apos)	{	  for (idx = 0; idx < msa->nseq; idx++)	    {	      msa->aseq[idx][mpos] = msa->aseq[idx][apos];	      if (msa->ss != NULL && msa->ss[idx] != NULL) msa->ss[idx][mpos] = msa->ss[idx][apos];	      if (msa->sa != NULL && msa->sa[idx] != NULL) msa->sa[idx][mpos] = msa->sa[idx][apos];	      	      for (i = 0; i < msa->ngr; i++)		if (msa->gr[i][idx] != NULL) msa->gr[i][idx][mpos] = msa->gr[i][idx][apos];	    }	  	  if (msa->ss_cons != NULL) msa->ss_cons[mpos] = msa->ss_cons[apos];	  if (msa->sa_cons != NULL) msa->sa_cons[mpos] = msa->sa_cons[apos];	  if (msa->rf      != NULL) msa->rf[mpos]      = msa->rf[apos];	  for (i = 0; i < msa->ngc; i++)	    msa->gc[i][mpos] = msa->gc[i][apos];	}      mpos++;    }		  msa->alen = mpos;		/* set new length */				/* null terminate everything */  for (idx = 0; idx < msa->nseq; idx++)    {      msa->aseq[idx][mpos] = '\0';      if (msa->ss != NULL && msa->ss[idx] != NULL) msa->ss[idx][mpos] = '\0';      if (msa->sa != NULL && msa->sa[idx] != NULL) msa->sa[idx][mpos] = '\0';	            for (i = 0; i < msa->ngr; i++)	if (msa->gr[i][idx] != NULL) msa->gr[i][idx][mpos] = '\0';    }  if (msa->ss_cons != NULL) msa->ss_cons[mpos] = '\0';  if (msa->sa_cons != NULL) msa->sa_cons[mpos] = '\0';  if (msa->rf != NULL)      msa->rf[mpos] = '\0';  for (i = 0; i < msa->ngc; i++)    msa->gc[i][mpos] = '\0';  return;}/* Function: MSASmallerAlignment() * Date:     SRE, Wed Jun 30 09:56:08 1999 [St. Louis] * * Purpose:  Given an array "useme" of TRUE/FALSE flags for *           each sequence in an alignment, construct *           and return a new alignment containing only  *           those sequences that are flagged useme=TRUE. *            *           Used by routines such as MSAFilterAlignment() *           and MSASampleAlignment(). *            * Limitations: *           Does not copy unparsed Stockholm markup. * *           Does not make assumptions about meaning of wgt; *           if you want the new wgt vector renormalized, do *           it yourself with FNorm(new->wgt, new->nseq).  * * Args:     msa     -- the original (larger) alignment *           useme   -- [0..nseq-1] array of TRUE/FALSE flags; TRUE means include  *                      this seq in new alignment *           ret_new -- RETURN: new alignment           * * Returns:  void *           ret_new is allocated here; free with MSAFree()  */voidMSASmallerAlignment(MSA *msa, int *useme, MSA **ret_new){  MSA *new;                     /* RETURN: new alignment */  int nnew;			/* number of seqs in new msa (e.g. # of TRUEs) */  int oidx, nidx;		/* old, new indices */  int i;  nnew = 0;  for (oidx = 0; oidx < msa->nseq; oidx++)    if (useme[oidx]) nnew++;  if (nnew == 0) { *ret_new = NULL; return; }    new  = MSAAlloc(nnew, 0);  nidx = 0;  for (oidx = 0; oidx < msa->nseq; oidx++)    if (useme[oidx])      {	new->aseq[nidx]   = sre_strdup(msa->aseq[oidx],   msa->alen);	new->sqname[nidx] = sre_strdup(msa->sqname[oidx], msa->alen);	GKIStoreKey(new->index, msa->sqname[oidx]);	new->wgt[nidx]    = msa->wgt[oidx];	if (msa->sqacc != NULL)	  MSASetSeqAccession(new, nidx, msa->sqacc[oidx]);	if (msa->sqdesc != NULL)	  MSASetSeqDescription(new, nidx, msa->sqdesc[oidx]);	if (msa->ss != NULL && msa->ss[oidx] != NULL)	  {	    if (new->ss == NULL) new->ss = MallocOrDie(sizeof(char *) * new->nseq);	    new->ss[nidx] = sre_strdup(msa->ss[oidx], -1);	  }	if (msa->sa != NULL && msa->sa[oidx] != NULL)	  {	    if (new->sa == NULL) new->sa = MallocOrDie(sizeof(char *) * new->nseq);	    new->sa[nidx] = sre_strdup(msa->sa[oidx], -1);	  }	nidx++;      }  new->nseq    = nnew;  new->alen    = msa->alen;   new->flags   = msa->flags;  new->type    = msa->type;  new->name    = sre_strdup(msa->name, -1);  new->desc    = sre_strdup(msa->desc, -1);  new->acc     = sre_strdup(msa->acc, -1);  new->au      = sre_strdup(msa->au, -1);  new->ss_cons = sre_strdup(msa->ss_cons, -1);  new->sa_cons = sre_strdup(msa->sa_cons, -1);  new->rf      = sre_strdup(msa->rf, -1);  for (i = 0; i < MSA_MAXCUTOFFS; i++) {    new->cutoff[i]        = msa->cutoff[i];    new->cutoff_is_set[i] = msa->cutoff_is_set[i];  }  free(new->sqlen);  MSAMingap(new);  *ret_new = new;  return;}/***************************************************************** * Retrieval routines *  * Access to MSA structure data is possible through these routines. * I'm not doing this because of object oriented design, though * it might work in my favor someday. * I'm doing this because lots of MSA data is optional, and * checking through the chain of possible NULLs is a pain. *****************************************************************/char *MSAGetSeqAccession(MSA *msa, int idx){  if (msa->sqacc != NULL && msa->sqacc[idx] != NULL)    return msa->sqacc[idx];  else    return NULL;}char *MSAGetSeqDescription(MSA *msa, int idx){  if (msa->sqdesc != NULL && msa->sqdesc[idx] != NULL)    return msa->sqdesc[idx];  else    return NULL;}char *MSAGetSeqSS(MSA *msa, int idx){  if (msa->ss != NULL && msa->ss[idx] != NULL)    return msa->ss[idx];  else    return NULL;}char *MSAGetSeqSA(MSA *msa, int idx){  if (msa->sa != NULL && msa->sa[idx] != NULL)    return msa->sa[idx];  else    return NULL;}/***************************************************************** * Information routines *  * Access information about the MSA. *****************************************************************//* Function: MSAAverageSequenceLength() * Date:     SRE, Sat Apr  6 09:41:34 2002 [St. Louis] * * Purpose:  Return the average length of the (unaligned) sequences *           in the MSA. * * Args:     msa  - the alignment * * Returns:  average length */floatMSAAverageSequenceLength(MSA *msa){  int   i;  float avg;    avg = 0.;  for (i = 0; i < msa->nseq; i++)     avg += (float) DealignedLength(msa->aseq[i]);  if (msa->nseq == 0) return 0.;  else                return (avg / msa->nseq);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -