📄 alignio.c
字号:
/***************************************************************** * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved * * This source code is distributed under the terms of the * GNU General Public License. See the files COPYING and LICENSE * for details. *****************************************************************//* alignio.c * SRE, Mon Jul 12 11:57:37 1993 * RCS $Id: alignio.c,v 1.10 2000/03/20 01:48:53 eddy Exp $ * * Input/output of sequence alignments. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "squid.h"#ifdef MEMDEBUG#include "dbmalloc.h"#endif/* Function: AllocAlignment() * * Purpose: Allocate space for an alignment, given the number * of sequences and the alignment length in columns. * * Args: nseq - number of sequences * alen - width of alignment * ret_aseq - RETURN: alignment itself * ainfo - RETURN: other info associated with alignment * * Return: (void) * aseq, ainfo free'd by caller: FreeAlignment(aseq, &ainfo). * note that ainfo itself is alloc'ed in caller, usually * just by a "AINFO ainfo" definition. */voidAllocAlignment(int nseq, int alen, char ***ret_aseq, AINFO *ainfo){ char **aseq; int idx; InitAinfo(ainfo); aseq = (char **) MallocOrDie (sizeof(char *) * nseq); for (idx = 0; idx < nseq; idx++) aseq[idx] = (char *) MallocOrDie (sizeof(char) * (alen+1)); ainfo->alen = alen; ainfo->nseq = nseq; ainfo->wgt = (float *) MallocOrDie (sizeof(float) * nseq); FSet(ainfo->wgt, nseq, 1.0); ainfo->sqinfo = (SQINFO *) MallocOrDie (sizeof(SQINFO) * nseq); for (idx = 0; idx < nseq; idx++) ainfo->sqinfo[idx].flags = 0; *ret_aseq = aseq;} /* Function: InitAinfo() * Date: SRE, Tue Jan 19 10:16:02 1999 [St. Louis] * * Purpose: Initialize the fields in ainfo structure to * default (null) values. Does nothing with * fields that are dependent on nseq or alen. * * Args: ainfo - optional info structure for an alignment * * Returns: (void). ainfo is modified. */voidInitAinfo(AINFO *ainfo){ ainfo->name = NULL; ainfo->desc = NULL; ainfo->cs = NULL; ainfo->rf = NULL; ainfo->acc = NULL; ainfo->au = NULL; ainfo->flags = 0; ainfo->tc1 = ainfo->tc2 = 0.0; ainfo->nc1 = ainfo->nc2 = 0.0; ainfo->ga1 = ainfo->ga2 = 0.0;}/* Function: FreeAlignment() * * Purpose: Free the space allocated to alignment, names, and optional * information. * * Args: aseqs - sequence alignment * ainfo - associated alignment data. */ voidFreeAlignment(char **aseqs, AINFO *ainfo){ int i; for (i = 0; i < ainfo->nseq; i++) { if (ainfo->sqinfo[i].flags & SQINFO_SS) free(ainfo->sqinfo[i].ss); if (ainfo->sqinfo[i].flags & SQINFO_SA) free(ainfo->sqinfo[i].sa); } if (ainfo->cs != NULL) free(ainfo->cs); if (ainfo->rf != NULL) free(ainfo->rf); if (ainfo->name != NULL) free(ainfo->name); if (ainfo->desc != NULL) free(ainfo->desc); if (ainfo->acc != NULL) free(ainfo->acc); if (ainfo->au != NULL) free(ainfo->au); free(ainfo->sqinfo); free(ainfo->wgt); Free2DArray((void **) aseqs, ainfo->nseq);}/* Function: SAMizeAlignment() * Date: SRE, Tue Jun 30 09:49:40 1998 [St. Louis] * * Purpose: Make a "best effort" attempt to convert an alignment * to SAM gap format: - in delete col, . in insert col. * Only works if alignment adheres to SAM's upper/lower * case convention, which is true for instance of old * HMMER alignments. * * Args: aseq - alignment to convert * nseq - number of seqs in alignment * alen - length of alignment * * Returns: (void) */voidSAMizeAlignment(char **aseq, int nseq, int alen){ int col; /* counter for aligned columns */ int i; /* counter for seqs */ int sawlower, sawupper, sawgap; char gapchar; for (col = 0; col < alen; col++) { sawlower = sawupper = sawgap = 0; /* pass 1: do we see only upper or lower? */ for (i = 0; i < nseq; i++) { if (isgap(aseq[i][col])) { sawgap = 1; continue; } if (isupper((int) aseq[i][col])) { sawupper = 1; continue; } if (islower((int) aseq[i][col])) sawlower = 1; } /* select gap character for column */ gapchar = '-'; /* default */ if (sawlower && ! sawupper) gapchar = '.'; /* pass 2: set gap char */ for (i = 0; i < nseq; i++) if (isgap(aseq[i][col])) aseq[i][col] = gapchar; }}/* Function: SAMizeAlignmentByGapFrac() * Date: SRE, Tue Jun 30 10:58:38 1998 [St. Louis] * * Purpose: Convert an alignment to SAM's gap and case * conventions, using gap fraction in a column * to choose match versus insert columns. In match columns, * residues are upper case and gaps are '-'. * In insert columns, residues are lower case and * gaps are '.' * * Args: aseq - aligned sequences * nseq - number of sequences * alen - length of alignment * maxgap - if more gaps than this fraction, column is insert. * * Returns: (void) Characters in aseq may be altered. */voidSAMizeAlignmentByGapFrac(char **aseq, int nseq, int alen, float maxgap){ int apos; /* counter over columns */ int idx; /* counter over sequences */ int ngap; /* number of gaps seen */ for (apos = 0; apos < alen; apos++) { /* count gaps */ ngap = 0; for (idx = 0; idx < nseq; idx++) if (isgap(aseq[idx][apos])) ngap++; /* convert to SAM conventions */ if ((float) ngap / (float) nseq > maxgap) { /* insert column */ for (idx = 0; idx < nseq; idx++) if (isgap(aseq[idx][apos])) aseq[idx][apos] = '.'; else aseq[idx][apos] = (char) tolower((int) aseq[idx][apos]); } else { /* match column */ for (idx = 0; idx < nseq; idx++) if (isgap(aseq[idx][apos])) aseq[idx][apos] = '-'; else aseq[idx][apos] = (char) toupper((int) aseq[idx][apos]); } }}/* Function: MakeAlignedString() * * Purpose: Given a raw string of some type (secondary structure, say), * align it to a given aseq by putting gaps wherever the * aseq has gaps. * * Args: aseq: template for alignment * alen: length of aseq * ss: raw string to align to aseq * ret_s: RETURN: aligned ss * * Return: 1 on success, 0 on failure (and squid_errno is set.) * ret_ss is malloc'ed here and must be free'd by caller. */intMakeAlignedString(char *aseq, int alen, char *ss, char **ret_s){ char *new; int apos, rpos; new = (char *) MallocOrDie ((alen+1) * sizeof(char)); for (apos = rpos = 0; apos < alen; apos++) if (! isgap(aseq[apos])) { new[apos] = ss[rpos]; rpos++; } else new[apos] = '.'; new[apos] = '\0'; if (rpos != strlen(ss)) { squid_errno = SQERR_PARAMETER; free(new); return 0; } *ret_s = new; return 1;}/* Function: MakeDealignedString() * * Purpose: Given an aligned string of some type (either sequence or * secondary structure, for instance), dealign it relative * to a given aseq. Return a ptr to the new string. * * Args: aseq : template alignment * alen : length of aseq * ss: : string to make dealigned copy of; same length as aseq * ret_s : RETURN: dealigned copy of ss * * Return: 1 on success, 0 on failure (and squid_errno is set) * ret_s is alloc'ed here and must be freed by caller */intMakeDealignedString(char *aseq, int alen, char *ss, char **ret_s){ char *new; int apos, rpos; new = (char *) MallocOrDie ((alen+1) * sizeof(char)); for (apos = rpos = 0; apos < alen; apos++) if (! isgap(aseq[apos])) { new[rpos] = ss[apos]; rpos++; } new[rpos] = '\0'; if (alen != strlen(ss)) { squid_errno = SQERR_PARAMETER; free(new); return 0; } *ret_s = new; return 1;}/* Function: DealignedLength() * * Purpose: Count the number of non-gap symbols in seq. * (i.e. find the length of the unaligned sequence) * * Args: aseq - aligned sequence to count symbols in, \0 terminated * * Return: raw length of seq. */intDealignedLength(char *aseq){ int rlen; for (rlen = 0; *aseq; aseq++) if (! isgap(*aseq)) rlen++; return rlen;}/* Function: WritePairwiseAlignment() * * Purpose: Write a nice formatted pairwise alignment out, * with a BLAST-style middle line showing identities * as themselves (single letter) and conservative * changes as '+'. * * Args: ofp - open fp to write to (stdout, perhaps)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -