📄 msa.c
字号:
Die("failed to sre_strcat()"); if (sre_strcat(&(msa->gs[tagidx][sqidx]), len, value, -1) < 0) Die("failed to sre_strcat()"); } return;} /* Function: MSAAppendGC() * Date: SRE, Thu Jun 3 06:25:14 1999 [Madison] * * Purpose: Add an unparsed #=GC markup line to the MSA * structure, allocating as necessary. * * When called multiple times for the same tag, * appends value strings together -- used when * parsing multiblock alignment files, for * example. * * Args: msa - multiple alignment structure * tag - markup tag (e.g. "CS") * value - markup, one char per aligned column * * Returns: (void) */voidMSAAppendGC(MSA *msa, char *tag, char *value){ int tagidx; /* Is this an unparsed tag name that we recognize? * If not, handle adding it to index, and reallocating * as needed. */ if (msa->gc_tag == NULL) /* first tag? init w/ malloc */ { msa->gc_tag = MallocOrDie(sizeof(char *)); msa->gc = MallocOrDie(sizeof(char *)); msa->gc_idx = GKIInit(); tagidx = GKIStoreKey(msa->gc_idx, tag); SQD_DASSERT1((tagidx == 0)); msa->gc[0] = NULL; } else { /* new tag? */ tagidx = GKIKeyIndex(msa->gc_idx, tag); if (tagidx < 0) { /* it's a new tag name; realloc */ tagidx = GKIStoreKey(msa->gc_idx, tag); /* since we alloc in blocks of 1, we always realloc upon seeing a new tag. */ SQD_DASSERT1((tagidx == msa->ngc)); msa->gc_tag = ReallocOrDie(msa->gc_tag, (msa->ngc+1) * sizeof(char **)); msa->gc = ReallocOrDie(msa->gc, (msa->ngc+1) * sizeof(char **)); msa->gc[tagidx] = NULL; } } if (tagidx == msa->ngc) { msa->gc_tag[tagidx] = sre_strdup(tag, -1); msa->ngc++; } sre_strcat(&(msa->gc[tagidx]), -1, value, -1); return;}/* Function: MSAGetGC() * Date: SRE, Fri Aug 13 13:25:57 1999 [St. Louis] * * Purpose: Given a tagname for a miscellaneous #=GC column * annotation, return a pointer to the annotation * string. * * Args: msa - alignment and its annotation * tag - name of the annotation * * Returns: ptr to the annotation string. Caller does *not* * free; is managed by msa object still. */char *MSAGetGC(MSA *msa, char *tag){ int tagidx; if (msa->gc_idx == NULL) return NULL; if ((tagidx = GKIKeyIndex(msa->gc_idx, tag)) < 0) return NULL; return msa->gc[tagidx];}/* Function: MSAAppendGR() * Date: SRE, Thu Jun 3 06:34:38 1999 [Madison] * * Purpose: Add an unparsed #=GR markup line to the * MSA structure, allocating as necessary. * * When called multiple times for the same tag, * appends value strings together -- used when * parsing multiblock alignment files, for * example. * * Args: msa - multiple alignment structure * tag - markup tag (e.g. "SS") * sqidx - index of seq to assoc markup with (0..nseq-1) * value - markup, one char per aligned column * * Returns: (void) */voidMSAAppendGR(MSA *msa, char *tag, int sqidx, char *value){ int tagidx; int i; /* Is this an unparsed tag name that we recognize? * If not, handle adding it to index, and reallocating * as needed. */ if (msa->gr_tag == NULL) /* first tag? init w/ malloc */ { msa->gr_tag = MallocOrDie(sizeof(char *)); msa->gr = MallocOrDie(sizeof(char **)); msa->gr[0] = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (i = 0; i < msa->nseqalloc; i++) msa->gr[0][i] = NULL; msa->gr_idx = GKIInit(); tagidx = GKIStoreKey(msa->gr_idx, tag); SQD_DASSERT1((tagidx == 0)); } else { /* new tag? */ tagidx = GKIKeyIndex(msa->gr_idx, tag); if (tagidx < 0) { /* it's a new tag name; realloc */ tagidx = GKIStoreKey(msa->gr_idx, tag); /* since we alloc in blocks of 1, we always realloc upon seeing a new tag. */ SQD_DASSERT1((tagidx == msa->ngr)); msa->gr_tag = ReallocOrDie(msa->gr_tag, (msa->ngr+1) * sizeof(char *)); msa->gr = ReallocOrDie(msa->gr, (msa->ngr+1) * sizeof(char **)); msa->gr[msa->ngr] = MallocOrDie(sizeof(char *) * msa->nseqalloc); for (i = 0; i < msa->nseqalloc; i++) msa->gr[msa->ngr][i] = NULL; } } if (tagidx == msa->ngr) { msa->gr_tag[tagidx] = sre_strdup(tag, -1); msa->ngr++; } sre_strcat(&(msa->gr[tagidx][sqidx]), -1, value, -1); return;}/* Function: MSAVerifyParse() * Date: SRE, Sat Jun 5 14:24:24 1999 [Madison, 1999 worm mtg] * * Purpose: Last function called after a multiple alignment is * parsed. Checks that parse was successful; makes sure * required information is present; makes sure required * information is consistent. Some fields that are * only use during parsing may be freed (sqlen, for * example). * * Some fields in msa may be modified (msa->alen is set, * for example). * * Args: msa - the multiple alignment * sqname, aseq must be set * nseq must be correct * alen need not be set; will be set here. * wgt will be set here if not already set * * Returns: (void) * Will Die() here with diagnostics on error. * * Example: */voidMSAVerifyParse(MSA *msa){ int idx; if (msa->nseq == 0) Die("Parse error: no sequences were found for alignment %s", msa->name != NULL ? msa->name : ""); msa->alen = msa->sqlen[0]; /* We can rely on msa->sqname[] being valid for any index, * because of the way the line parsers always store any name * they add to the index. */ for (idx = 0; idx < msa->nseq; idx++) { /* aseq is required. */ if (msa->aseq[idx] == NULL) Die("Parse error: No sequence for %s in alignment %s", msa->sqname[idx], msa->name != NULL ? msa->name : ""); /* either all weights must be set, or none of them */ if ((msa->flags & MSA_SET_WGT) && msa->wgt[idx] == -1.0) Die("Parse error: some weights are set, but %s doesn't have one in alignment %s", msa->sqname[idx], msa->name != NULL ? msa->name : ""); /* all aseq must be same length. */ if (msa->sqlen[idx] != msa->alen) Die("Parse error: sequence %s: length %d, expected %d in alignment %s", msa->sqname[idx], msa->sqlen[idx], msa->alen, msa->name != NULL ? msa->name : ""); /* if SS is present, must have length right */ if (msa->ss != NULL && msa->ss[idx] != NULL && msa->sslen[idx] != msa->alen) Die("Parse error: #=GR SS annotation for %s: length %d, expected %d in alignment %s", msa->sqname[idx], msa->sslen[idx], msa->alen, msa->name != NULL ? msa->name : ""); /* if SA is present, must have length right */ if (msa->sa != NULL && msa->sa[idx] != NULL && msa->salen[idx] != msa->alen) Die("Parse error: #=GR SA annotation for %s: length %d, expected %d in alignment %s", msa->sqname[idx], msa->salen[idx], msa->alen, msa->name != NULL ? msa->name : ""); } /* if cons SS is present, must have length right */ if (msa->ss_cons != NULL && strlen(msa->ss_cons) != msa->alen) Die("Parse error: #=GC SS_cons annotation: length %d, expected %d in alignment %s", strlen(msa->ss_cons), msa->alen, msa->name != NULL ? msa->name : ""); /* if cons SA is present, must have length right */ if (msa->sa_cons != NULL && strlen(msa->sa_cons) != msa->alen) Die("Parse error: #=GC SA_cons annotation: length %d, expected %d in alignment %s", strlen(msa->sa_cons), msa->alen, msa->name != NULL ? msa->name : ""); /* if RF is present, must have length right */ if (msa->rf != NULL && strlen(msa->rf) != msa->alen) Die("Parse error: #=GC RF annotation: length %d, expected %d in alignment %s", strlen(msa->rf), msa->alen, msa->name != NULL ? msa->name : ""); /* Check that all or no weights are set */ if (!(msa->flags & MSA_SET_WGT)) FSet(msa->wgt, msa->nseq, 1.0); /* default weights */ /* Clean up a little from the parser */ if (msa->sqlen != NULL) { free(msa->sqlen); msa->sqlen = NULL; } if (msa->sslen != NULL) { free(msa->sslen); msa->sslen = NULL; } if (msa->salen != NULL) { free(msa->salen); msa->salen = NULL; } return;}/* Function: MSAFileOpen() * Date: SRE, Tue May 18 13:22:01 1999 [St. Louis] * * Purpose: Open an alignment database file and prepare * for reading one alignment, or sequentially * in the (rare) case of multiple MSA databases * (e.g. Stockholm format). * * Args: filename - name of file to open * if "-", read stdin * if it ends in ".gz", read from pipe to gunzip -dc * format - format of file (e.g. MSAFILE_STOCKHOLM) * env - environment variable for path (e.g. BLASTDB) * * Returns: opened MSAFILE * on success. * NULL on failure: * usually, because the file doesn't exist; * for gzip'ed files, may also mean that gzip isn't in the path. */MSAFILE *MSAFileOpen(char *filename, int format, char *env){ MSAFILE *afp; afp = MallocOrDie(sizeof(MSAFILE)); if (strcmp(filename, "-") == 0) { afp->f = stdin; afp->do_stdin = TRUE; afp->do_gzip = FALSE; afp->fname = sre_strdup("[STDIN]", -1); afp->ssi = NULL; /* can't index stdin because we can't seek*/ }#ifndef SRE_STRICT_ANSI /* popen(), pclose() aren't portable to non-POSIX systems; disable */ else if (Strparse("^.*\\.gz$", filename, 0)) { char cmd[256]; /* Note that popen() will return "successfully" * if file doesn't exist, because gzip works fine * and prints an error! So we have to check for * existence of file ourself. */ if (! FileExists(filename)) Die("%s: file does not exist", filename); if (strlen(filename) + strlen("gzip -dc ") >= 256) Die("filename > 255 char in MSAFileOpen()"); sprintf(cmd, "gzip -dc %s", filename); if ((afp->f = popen(cmd, "r")) == NULL) return NULL; afp->do_stdin = FALSE; afp->do_gzip = TRUE; afp->fname = sre_strdup(filename, -1); /* we can't index a .gz file, because we can't seek in a pipe afaik */ afp->ssi = NULL; }#endif /*SRE_STRICT_ANSI*/ else { char *ssifile; char *dir; /* When we open a file, it may be either in the current * directory, or in the directory indicated by the env * argument - and we have to construct the SSI filename accordingly. */ if ((afp->f = fopen(filename, "r")) != NULL) { ssifile = MallocOrDie(sizeof(char) * (strlen(filename) + 5)); sprintf(ssifile, "%s.ssi", filename); } else if ((afp->f = EnvFileOpen(filename, env, &dir)) != NULL) { char *full; full = FileConcat(dir, filename); ssifile = MallocOrDie(sizeof(char) * (strlen(full) + strlen(filename) + 5)); sprintf(ssifile, "%s.ssi", full); free(dir); } else return NULL; afp->do_stdin = FALSE; afp->do_gzip = FALSE; afp->fname = sre_strdup(filename, -1); afp->ssi = NULL; /* Open the SSI index file. If it doesn't exist, or * it's corrupt, or some error happens, afp->ssi stays NULL. */ SSIOpen(ssifile, &(afp->ssi)); free(ssifile); } /* Invoke autodetection if we haven't already been told what * to expect. */ if (format == MSAFILE_UNKNOWN) { if (afp->do_stdin == TRUE || afp->do_gzip) Die("Can't autodetect alignment file format from a stdin or gzip pipe"); format = MSAFileFormat(afp); if (format == MSAFILE_UNKNOWN) Die("Can't determine format of multiple alignment file %s", afp->fname); } afp->format = format; afp->linenumber = 0; afp->buf = NULL; afp->buflen = 0; return afp;}/* Function: MSAFilePositionByKey() * MSAFilePositionByIndex() * MSAFileRewind() * * Date: SRE, Tue Nov 9 19:02:54 1999 [St. Louis] * * Purpose: Family of functions for repositioning in * open MSA files; analogous to a similarly * named function series in HMMER's hmmio.c. * * Args: afp - open alignment file * offset - disk offset in bytes * key - key to look up in SSI indices * idx - index of alignment. * * Returns: 0 on failure. * 1 on success. * If called on a non-fseek()'able file (e.g. a gzip'ed * or pipe'd alignment), returns 0 as a failure flag. */int MSAFileRewind(MSAFILE *afp){ if (afp->do_gzip || afp->do_stdin) return 0; rewind(afp->f); return 1;}int MSAFilePositionByKey(MSAFILE *afp, char *key){ int fh; /* filehandle is ignored */ SSIOFFSET offset; /* offset of the key alignment */ if (afp->ssi == NULL) return 0; if (SSIGetOffsetByName(afp->ssi, key, &fh, &offset) != 0) return 0; if (SSISetFilePosition(afp->f, &offset) != 0) return 0; return 1;}intMSAFilePositionByIndex(MSAFILE *afp, int idx){ int fh; /* filehandled is passed but ignored */ SSIOFFSET offset; /* disk offset of desired alignment */ if (afp->ssi == NULL) return 0; if (SSIGetOffsetByNumber(afp->ssi, idx, &fh, &offset) != 0) return 0; if (SSISetFilePosition(afp->f, &offset) != 0) return 0; return 1;}/* Function: MSAFileRead() * Date: SRE, Fri May 28 16:01:43 1999 [St. Louis] * * Purpose: Read the next msa from an open alignment file. * This is a wrapper around format-specific calls. * * Args: afp - open alignment file * * Returns: next alignment, or NULL if out of alignments */MSA *MSAFileRead(MSAFILE *afp){ MSA *msa = NULL; switch (afp->format) { case MSAFILE_STOCKHOLM: msa = ReadStockholm(afp); break; case MSAFILE_MSF: msa = ReadMSF(afp); break; case MSAFILE_A2M: msa = ReadA2M(afp); break; case MSAFILE_CLUSTAL: msa = ReadClustal(afp); break; case MSAFILE_SELEX: msa = ReadSELEX(afp); break; case MSAFILE_PHYLIP: msa = ReadPhylip(afp); break; default: Die("MSAFILE corrupted: bad format index"); } return msa;}/* Function: MSAFileClose() * Date: SRE, Tue May 18 14:05:28 1999 [St. Louis] * * Purpose: Close an open MSAFILE. * * Args: afp - ptr to an open MSAFILE. * * Returns: void */voidMSAFileClose(MSAFILE *afp){#ifndef SRE_STRICT_ANSI /* gzip functionality only on POSIX systems */ if (afp->do_gzip) pclose(afp->f);#endif if (! afp->do_stdin) fclose(afp->f); if (afp->buf != NULL) free(afp->buf); if (afp->ssi != NULL) SSIClose(afp->ssi); if (afp->fname != NULL) free(afp->fname); free(afp);}char *MSAFileGetLine(MSAFILE *afp){ char *s; if ((s = sre_fgets(&(afp->buf), &(afp->buflen), afp->f)) == NULL) return NULL; afp->linenumber++; return afp->buf;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -