📄 msa.c
字号:
/***************************************************************** * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-2003 Washington University School of Medicine * All Rights Reserved * * This source code is distributed under the terms of the * GNU General Public License. See the files COPYING and LICENSE * for details. *****************************************************************//* msa.c * SRE, Mon May 17 10:48:47 1999 * * SQUID's interface for multiple sequence alignment * manipulation: access to the MSA object. * * CVS $Id: msa.c,v 1.20 2003/05/26 16:21:50 eddy Exp $ */#include "squidconf.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#include "squid.h"#include "msa.h" /* multiple sequence alignment object support */#include "gki.h" /* string indexing hashtable code */#include "ssi.h" /* SSI sequence file indexing code *//* Function: MSAAlloc() * Date: SRE, Tue May 18 10:45:47 1999 [St. Louis] * * Purpose: Allocate an MSA structure, return a pointer * to it. * * Designed to be used in three ways: * 1) We know exactly the dimensions of the alignment: * both nseq and alen. * msa = MSAAlloc(nseq, alen); * * 2) We know the number of sequences but not alen. * (We add sequences later.) * msa = MSAAlloc(nseq, 0); * * 3) We even don't know the number of sequences, so * we'll have to dynamically expand allocations. * We provide a blocksize for the allocation expansion, * and expand when needed. * msa = MSAAlloc(10, 0); * if (msa->nseq == msa->nseqalloc) MSAExpand(msa); * * Args: nseq - number of sequences, or nseq allocation blocksize * alen - length of alignment in columns, or 0 * * Returns: pointer to new MSA object, w/ all values initialized. * Note that msa->nseq is initialized to 0, though space * is allocated. * * Diagnostics: "always works". Die()'s on memory allocation failure. * */MSA *MSAAlloc(int nseq, int alen){ MSA *msa; int i; msa = MallocOrDie(sizeof(MSA)); msa->aseq = MallocOrDie(sizeof(char *) * nseq); msa->sqname = MallocOrDie(sizeof(char *) * nseq); msa->sqlen = MallocOrDie(sizeof(int) * nseq); msa->wgt = MallocOrDie(sizeof(float) * nseq); for (i = 0; i < nseq; i++) { msa->sqname[i] = NULL; msa->sqlen[i] = 0; msa->wgt[i] = -1.0; if (alen != 0) msa->aseq[i] = MallocOrDie(sizeof(char) * (alen+1)); else msa->aseq[i] = NULL; } msa->alen = alen; msa->nseq = 0; msa->nseqalloc = nseq; msa->nseqlump = nseq; msa->flags = 0; msa->type = kOtherSeq; msa->name = NULL; msa->desc = NULL; msa->acc = NULL; msa->au = NULL; msa->ss_cons = NULL; msa->sa_cons = NULL; msa->rf = NULL; msa->sqacc = NULL; msa->sqdesc = NULL; msa->ss = NULL; msa->sslen = NULL; msa->sa = NULL; msa->salen = NULL; msa->index = GKIInit(); msa->lastidx = 0; for (i = 0; i < MSA_MAXCUTOFFS; i++) { msa->cutoff[i] = 0.; msa->cutoff_is_set[i] = FALSE; } /* Initialize unparsed optional markup */ msa->comment = NULL; msa->ncomment = 0; msa->alloc_ncomment = 0; msa->gf_tag = NULL; msa->gf = NULL; msa->ngf = 0; msa->gs_tag = NULL; msa->gs = NULL; msa->gs_idx = NULL; msa->ngs = 0; msa->gc_tag = NULL; msa->gc = NULL; msa->gc_idx = NULL; msa->ngc = 0; msa->gr_tag = NULL; msa->gr = NULL; msa->gr_idx = NULL; msa->ngr = 0; /* Done. Return the alloced, initialized structure */ return msa;}/* Function: MSAExpand() * Date: SRE, Tue May 18 11:06:53 1999 [St. Louis] * * Purpose: Increase the sequence allocation in an MSA * by msa->nseqlump. (Typically used when we're reading * in an alignment sequentially from a file, * so we don't know nseq until we're done.) * * Args: msa - the MSA object * * Returns: (void) * */voidMSAExpand(MSA *msa){ int i,j; msa->nseqalloc += msa->nseqlump; msa->aseq = ReallocOrDie(msa->aseq, sizeof(char *) * msa->nseqalloc); msa->sqname = ReallocOrDie(msa->sqname, sizeof(char *) * msa->nseqalloc); msa->sqlen = ReallocOrDie(msa->sqlen, sizeof(char *) * msa->nseqalloc); msa->wgt = ReallocOrDie(msa->wgt, sizeof(float) * msa->nseqalloc); if (msa->ss != NULL) { msa->ss = ReallocOrDie(msa->ss, sizeof(char *) * msa->nseqalloc); msa->sslen = ReallocOrDie(msa->sslen, sizeof(int) * msa->nseqalloc); } if (msa->sa != NULL) { msa->sa = ReallocOrDie(msa->sa, sizeof(char *) * msa->nseqalloc); msa->salen = ReallocOrDie(msa->salen, sizeof(int) * msa->nseqalloc); } if (msa->sqacc != NULL) msa->sqacc = ReallocOrDie(msa->sqacc, sizeof(char *) * msa->nseqalloc); if (msa->sqdesc != NULL) msa->sqdesc =ReallocOrDie(msa->sqdesc,sizeof(char *) * msa->nseqalloc); for (i = msa->nseqalloc-msa->nseqlump; i < msa->nseqalloc; i++) { msa->sqname[i] = NULL; msa->wgt[i] = -1.0; if (msa->sqacc != NULL) msa->sqacc[i] = NULL; if (msa->sqdesc != NULL) msa->sqdesc[i] = NULL; if (msa->alen != 0) msa->aseq[i] = ReallocOrDie(msa->aseq[i], sizeof(char) * (msa->alen+1)); else msa->aseq[i] = NULL; msa->sqlen[i] = 0; if (msa->ss != NULL) { if (msa->alen != 0) msa->ss[i] = ReallocOrDie(msa->ss[i], sizeof(char) * (msa->alen+1)); else msa->ss[i] = NULL; msa->sslen[i] = 0; } if (msa->sa != NULL) { if (msa->alen != 0) msa->sa[i] = ReallocOrDie(msa->ss[i], sizeof(char) * (msa->alen+1)); else msa->sa[i] = NULL; msa->salen[i] = 0; } } /* Reallocate and re-init for unparsed #=GS tags, if we have some. * gs is [0..ngs-1][0..nseq-1][], so we're reallocing the middle * set of pointers. */ if (msa->gs != NULL) for (i = 0; i < msa->ngs; i++) { if (msa->gs[i] != NULL) { msa->gs[i] = ReallocOrDie(msa->gs[i], sizeof(char *) * msa->nseqalloc); for (j = msa->nseqalloc-msa->nseqlump; j < msa->nseqalloc; j++) msa->gs[i][j] = NULL; } } /* Reallocate and re-init for unparsed #=GR tags, if we have some. * gr is [0..ngs-1][0..nseq-1][], so we're reallocing the middle * set of pointers. */ if (msa->gr != NULL) for (i = 0; i < msa->ngr; i++) { if (msa->gr[i] != NULL) { msa->gr[i] = ReallocOrDie(msa->gr[i], sizeof(char *) * msa->nseqalloc); for (j = msa->nseqalloc-msa->nseqlump; j < msa->nseqalloc; j++) msa->gr[i][j] = NULL; } } return;}/* Function: MSAFree() * Date: SRE, Tue May 18 11:20:16 1999 [St. Louis] * * Purpose: Free a multiple sequence alignment structure. * * Args: msa - the alignment * * Returns: (void) */voidMSAFree(MSA *msa){ Free2DArray((void **) msa->aseq, msa->nseq); Free2DArray((void **) msa->sqname, msa->nseq); Free2DArray((void **) msa->sqacc, msa->nseq); Free2DArray((void **) msa->sqdesc, msa->nseq); Free2DArray((void **) msa->ss, msa->nseq); Free2DArray((void **) msa->sa, msa->nseq); if (msa->sqlen != NULL) free(msa->sqlen); if (msa->wgt != NULL) free(msa->wgt); if (msa->name != NULL) free(msa->name); if (msa->desc != NULL) free(msa->desc); if (msa->acc != NULL) free(msa->acc); if (msa->au != NULL) free(msa->au); if (msa->ss_cons != NULL) free(msa->ss_cons); if (msa->sa_cons != NULL) free(msa->sa_cons); if (msa->rf != NULL) free(msa->rf); if (msa->sslen != NULL) free(msa->sslen); if (msa->salen != NULL) free(msa->salen); Free2DArray((void **) msa->comment, msa->ncomment); Free2DArray((void **) msa->gf_tag, msa->ngf); Free2DArray((void **) msa->gf, msa->ngf); Free2DArray((void **) msa->gs_tag, msa->ngs); Free3DArray((void ***)msa->gs, msa->ngs, msa->nseq); Free2DArray((void **) msa->gc_tag, msa->ngc); Free2DArray((void **) msa->gc, msa->ngc); Free2DArray((void **) msa->gr_tag, msa->ngr); Free3DArray((void ***)msa->gr, msa->ngr, msa->nseq); GKIFree(msa->index); GKIFree(msa->gs_idx); GKIFree(msa->gc_idx); GKIFree(msa->gr_idx); free(msa);}/* Function: MSASetSeqAccession() * Date: SRE, Mon Jun 21 04:13:33 1999 [Sanger Centre] * * Purpose: Set a sequence accession in an MSA structure. * Handles some necessary allocation/initialization. * * Args: msa - multiple alignment to add accession to * seqidx - index of sequence to attach accession to * acc - accession * * Returns: void */voidMSASetSeqAccession(MSA *msa, int seqidx, char *acc){ int x; if (msa->sqacc == NULL) { msa->sqacc = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (x = 0; x < msa->nseqalloc; x++) msa->sqacc[x] = NULL; } msa->sqacc[seqidx] = sre_strdup(acc, -1);}/* Function: MSASetSeqDescription() * Date: SRE, Mon Jun 21 04:21:09 1999 [Sanger Centre] * * Purpose: Set a sequence description in an MSA structure. * Handles some necessary allocation/initialization. * * Args: msa - multiple alignment to add accession to * seqidx - index of sequence to attach accession to * desc - description * * Returns: void */voidMSASetSeqDescription(MSA *msa, int seqidx, char *desc){ int x; if (msa->sqdesc == NULL) { msa->sqdesc = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (x = 0; x < msa->nseqalloc; x++) msa->sqdesc[x] = NULL; } msa->sqdesc[seqidx] = sre_strdup(desc, -1);}/* Function: MSAAddComment() * Date: SRE, Tue Jun 1 17:37:21 1999 [St. Louis] * * Purpose: Add an (unparsed) comment line to the MSA structure, * allocating as necessary. * * Args: msa - a multiple alignment * s - comment line to add * * Returns: (void) */voidMSAAddComment(MSA *msa, char *s){ /* If this is our first recorded comment, we need to malloc(); * and if we've filled available space, we need to realloc(). * Note the arbitrary lumpsize of 10 lines per allocation... */ if (msa->comment == NULL) { msa->comment = MallocOrDie (sizeof(char *) * 10); msa->alloc_ncomment = 10; } if (msa->ncomment == msa->alloc_ncomment) { msa->alloc_ncomment += 10; msa->comment = ReallocOrDie(msa->comment, sizeof(char *) * msa->alloc_ncomment); } msa->comment[msa->ncomment] = sre_strdup(s, -1); msa->ncomment++; return;}/* Function: MSAAddGF() * Date: SRE, Wed Jun 2 06:53:54 1999 [bus to Madison] * * Purpose: Add an unparsed #=GF markup line to the MSA * structure, allocating as necessary. * * Args: msa - a multiple alignment * tag - markup tag (e.g. "AU") * value - free text markup (e.g. "Alex Bateman") * * Returns: (void) */voidMSAAddGF(MSA *msa, char *tag, char *value){ /* If this is our first recorded unparsed #=GF line, we need to malloc(); * if we've filled availabl space If we already have a hash index, and the GF * Note the arbitrary lumpsize of 10 lines per allocation... */ if (msa->gf_tag == NULL) { msa->gf_tag = MallocOrDie (sizeof(char *) * 10); msa->gf = MallocOrDie (sizeof(char *) * 10); msa->alloc_ngf = 10; } if (msa->ngf == msa->alloc_ngf) { msa->alloc_ngf += 10; msa->gf_tag = ReallocOrDie(msa->gf_tag, sizeof(char *) * msa->alloc_ngf); msa->gf = ReallocOrDie(msa->gf, sizeof(char *) * msa->alloc_ngf); } msa->gf_tag[msa->ngf] = sre_strdup(tag, -1); msa->gf[msa->ngf] = sre_strdup(value, -1); msa->ngf++; return;}/* Function: MSAAddGS() * Date: SRE, Wed Jun 2 06:57:03 1999 [St. Louis] * * Purpose: Add an unparsed #=GS markup line to the MSA * structure, allocating as necessary. * * It's possible that we could get more than one * of the same type of GS tag per sequence; for * example, "DR PDB;" structure links in Pfam. * Hack: handle these by appending to the string, * in a \n separated fashion. * * Args: msa - multiple alignment structure * tag - markup tag (e.g. "AC") * sqidx - index of sequence to assoc markup with (0..nseq-1) * value - markup (e.g. "P00666") * * Returns: 0 on success */voidMSAAddGS(MSA *msa, char *tag, int sqidx, char *value){ int tagidx; int i; /* Is this an unparsed tag name that we recognize? * If not, handle adding it to index, and reallocating * as needed. */ if (msa->gs_tag == NULL) /* first tag? init w/ malloc */ { msa->gs_idx = GKIInit(); tagidx = GKIStoreKey(msa->gs_idx, tag); SQD_DASSERT1((tagidx == 0)); msa->gs_tag = MallocOrDie(sizeof(char *)); msa->gs = MallocOrDie(sizeof(char **)); msa->gs[0] = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (i = 0; i < msa->nseqalloc; i++) msa->gs[0][i] = NULL; } else { /* new tag? */ tagidx = GKIKeyIndex(msa->gs_idx, tag); if (tagidx < 0) { /* it's a new tag name; realloc */ tagidx = GKIStoreKey(msa->gs_idx, tag); /* since we alloc in blocks of 1, we always realloc upon seeing a new tag. */ SQD_DASSERT1((tagidx == msa->ngs)); msa->gs_tag = ReallocOrDie(msa->gs_tag, (msa->ngs+1) * sizeof(char *)); msa->gs = ReallocOrDie(msa->gs, (msa->ngs+1) * sizeof(char **)); msa->gs[msa->ngs] = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (i = 0; i < msa->nseqalloc; i++) msa->gs[msa->ngs][i] = NULL; } } if (tagidx == msa->ngs) { msa->gs_tag[tagidx] = sre_strdup(tag, -1); msa->ngs++; } if (msa->gs[tagidx][sqidx] == NULL) /* first annotation of this seq with this tag? */ msa->gs[tagidx][sqidx] = sre_strdup(value, -1); else { /* >1 annotation of this seq with this tag; append */ int len; if ((len = sre_strcat(&(msa->gs[tagidx][sqidx]), -1, "\n", 1)) < 0)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -