⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 alignio.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 2 页
字号:
 *           aseq1, aseq2 - alignments to write (not necessarily  *                          flushed right with gaps) *           name1, name2 - names of sequences *           spos1, spos2 - starting position in each (raw) sequence *           pam          - PAM matrix; positive values define *                          conservative changes *           indent       - how many extra spaces to print on left *            * Return:  1 on success, 0 on failure */intWritePairwiseAlignment(FILE *ofp,		       char *aseq1, char *name1, int spos1,		       char *aseq2, char *name2, int spos2,		       int **pam, int indent){  char sname1[11];              /* shortened name               */  char sname2[11];               int  still_going;		/* True if writing another block */  char buf1[61];		/* buffer for writing seq1; CPL+1*/  char bufmid[61];              /* buffer for writing consensus  */  char buf2[61];  char *s1, *s2;                /* ptrs into each sequence          */  int  count1, count2;		/* number of symbols we're writing  */  int  rpos1, rpos2;		/* position in raw seqs             */  int  rawcount1, rawcount2;	/* number of nongap symbols written */  int  apos;  strncpy(sname1, name1, 10);  sname1[10] = '\0';  strtok(sname1, WHITESPACE);  strncpy(sname2, name2, 10);  sname2[10] = '\0';  strtok(sname2, WHITESPACE);  s1 = aseq1;   s2 = aseq2;  rpos1 = spos1;  rpos2 = spos2;  still_going = TRUE;  while (still_going)    {      still_going = FALSE;      				/* get next line's worth from both */      strncpy(buf1, s1, 60); buf1[60] = '\0';      strncpy(buf2, s2, 60); buf2[60] = '\0';      count1 = strlen(buf1);      count2 = strlen(buf2);				/* is there still more to go? */      if ((count1 == 60 && s1[60] != '\0') ||	  (count2 == 60 && s2[60] != '\0'))	still_going = TRUE;				/* shift seq ptrs by a line */      s1 += count1;      s2 += count2;				/* assemble the consensus line */      for (apos = 0; apos < count1 && apos < count2; apos++)	{	  if (!isgap(buf1[apos]) && !isgap(buf2[apos]))	    {	      if (buf1[apos] == buf2[apos])		bufmid[apos] = buf1[apos];	      else if (pam[buf1[apos] - 'A'][buf2[apos] - 'A'] > 0)		bufmid[apos] = '+';	      else		bufmid[apos] = ' ';	    }	  else	    bufmid[apos] = ' ';	}      bufmid[apos] = '\0';      rawcount1 = 0;      for (apos = 0; apos < count1; apos++)	if (!isgap(buf1[apos])) rawcount1++;            rawcount2 = 0;      for (apos = 0; apos < count2; apos++)	if (!isgap(buf2[apos])) rawcount2++;      (void) fprintf(ofp, "%*s%-10.10s %5d %s %5d\n", indent, "", 		     sname1, rpos1, buf1, rpos1 + rawcount1 -1);      (void) fprintf(ofp, "%*s                 %s\n", indent, "",		     bufmid);      (void) fprintf(ofp, "%*s%-10.10s %5d %s %5d\n", indent, "", 		     sname2, rpos2, buf2, rpos2 + rawcount2 -1);      (void) fprintf(ofp, "\n");      rpos1 += rawcount1;      rpos2 += rawcount2;    }  return 1;}/* Function: MingapAlignment() *  * Purpose:  Remove all-gap columns from a multiple sequence alignment *           and its associated data. The alignment is assumed to be *           flushed (all aseqs the same length). */intMingapAlignment(char **aseqs, AINFO *ainfo){  int apos;			/* position in original alignment */  int mpos;			/* position in new alignment      */  int idx;  /* We overwrite aseqs, using its allocated memory.   */  for (apos = 0, mpos = 0; aseqs[0][apos] != '\0'; apos++)    {				/* check for all-gap in column */      for (idx = 0; idx < ainfo->nseq; idx++)	if (! isgap(aseqs[idx][apos]))	  break;      if (idx == ainfo->nseq) continue;					/* shift alignment and ainfo */      if (mpos != apos)	{	  for (idx = 0; idx < ainfo->nseq; idx++)	    aseqs[idx][mpos] = aseqs[idx][apos];	  	  if (ainfo->cs != NULL) ainfo->cs[mpos] = ainfo->cs[apos];	  if (ainfo->rf != NULL) ainfo->rf[mpos] = ainfo->rf[apos];	}      mpos++;    }				/* null terminate everything */  for (idx = 0; idx < ainfo->nseq; idx++)    aseqs[idx][mpos] = '\0';  ainfo->alen = mpos;	/* set new length */  if (ainfo->cs != NULL) ainfo->cs[mpos] = '\0';  if (ainfo->rf != NULL) ainfo->rf[mpos] = '\0';  return 1;}/* Function: RandomAlignment() *  * Purpose:  Create a random alignment from raw sequences. *  *           Ideally, we would like to sample an alignment from the *           space of possible alignments according to its probability, *           given a prior probability distribution for alignments. *           I don't see how to describe such a distribution, let alone *           sample it. *            *           This is a rough approximation that tries to capture some *           desired properties. We assume the alignment is generated *           by a simple HMM composed of match and insert states. *           Given parameters (pop, pex) for the probability of opening *           and extending an insertion, we can find the expected number *           of match states, M, in the underlying model for each sequence. *           We use an average M taken over all the sequences (this is *           an approximation. The expectation of M given all the sequence *           lengths is a nasty-looking summation.) *            *           M = len / ( 1 + pop ( 1 + 1/ (1-pex) ) )  *            *           Then, we assign positions in each raw sequence onto the M match *           states and M+1 insert states of this "HMM", by rolling random *           numbers and inserting the (rlen-M) inserted positions randomly *           into the insert slots, taking into account the relative probability *           of open vs. extend. *            *           The resulting alignment has two desired properties: insertions *           tend to follow the HMM-like exponential distribution, and *           the "sparseness" of the alignment is controllable through *           pop and pex. *            * Args:     rseqs     - raw sequences to "align", 0..nseq-1 *           sqinfo    - array of 0..nseq-1 info structures for the sequences *           nseq      - number of sequences    *           pop       - probability to open insertion (0<pop<1) *           pex       - probability to extend insertion (0<pex<1) *           ret_aseqs - RETURN: alignment (flushed) *           ainfo     - fill in: alignment info *  * Return:   1 on success, 0 on failure. Sets squid_errno to indicate cause *           of failure. */                      intRandomAlignment(char **rseqs, SQINFO *sqinfo, int nseq, float pop, float pex,		char ***ret_aseqs, AINFO *ainfo){  char **aseqs;                 /* RETURN: alignment   */  int    alen;			/* length of alignment */  int   *rlen;                  /* lengths of each raw sequence */  int    M;			/* length of "model"   */  int  **ins;                   /* insertion counts, 0..nseq-1 by 0..M */  int   *master_ins;            /* max insertion counts, 0..M */  int    apos, rpos, idx;  int    statepos;  int    count;  int    minlen;  /* calculate expected length of model, M   */  rlen = (int *) MallocOrDie (sizeof(int) * nseq);  M = 0;  minlen = 9999999;  for (idx = 0; idx < nseq; idx++)    {      rlen[idx] = strlen(rseqs[idx]);      M += rlen[idx];      minlen = (rlen[idx] < minlen) ? rlen[idx] : minlen;    }  M = (int) ((float) M / (1.0 + pop * (1.0 + 1.0 / (1.0 - pex))));  M /= nseq;  if (M > minlen) M = minlen;  /* make arrays that count insertions in M+1 possible insert states   */  ins = (int **) MallocOrDie (sizeof(int *) * nseq);  master_ins = (int *) MallocOrDie (sizeof(int) * (M+1));  for (idx = 0; idx < nseq; idx++)    {      ins[idx] = (int *) MallocOrDie (sizeof(int) * (M+1));      for (rpos = 0; rpos <= M; rpos++)	ins[idx][rpos] = 0;    }				/* normalize */  pop = pop / (pop+pex);  pex = 1.0 - pop;				/* make insertions for individual sequences */  for (idx = 0; idx < nseq; idx++)    {      apos = -1;      for (rpos = 0; rpos < rlen[idx]-M; rpos++)	{	  if (sre_random() < pop || apos == -1)	/* open insertion */	    apos = CHOOSE(M+1);        /* choose 0..M */	  ins[idx][apos]++;	}    }				/* calculate master_ins, max inserts */  alen = M;  for (apos = 0; apos <= M; apos++)    {      master_ins[apos] = 0;      for (idx = 0; idx < nseq; idx++)	if (ins[idx][apos] > master_ins[apos])	  master_ins[apos] = ins[idx][apos];      alen += master_ins[apos];    }  /* Now, construct alignment   */  aseqs = (char **) MallocOrDie (sizeof (char *) * nseq);  for (idx = 0; idx < nseq; idx++)    aseqs[idx] = (char *) MallocOrDie (sizeof(char) * (alen+1));  for (idx = 0; idx < nseq; idx++)    {      apos = rpos = 0;      for (statepos = 0; statepos <= M; statepos++)	{	  for (count = 0; count < ins[idx][statepos]; count++)	    aseqs[idx][apos++] = rseqs[idx][rpos++];	  for (; count < master_ins[statepos]; count++)	    aseqs[idx][apos++] = ' ';	  if (statepos != M)	    aseqs[idx][apos++] = rseqs[idx][rpos++];	}      aseqs[idx][alen] = '\0';    }  ainfo->flags = 0;  ainfo->alen  = alen;   ainfo->nseq  = nseq;  ainfo->sqinfo = (SQINFO *) MallocOrDie (sizeof(SQINFO) * nseq);  for (idx = 0; idx < nseq; idx++)    SeqinfoCopy(&(ainfo->sqinfo[idx]), &(sqinfo[idx]));  free(rlen);  free(master_ins);  Free2DArray((void **) ins, nseq);  *ret_aseqs = aseqs;  return 1;}/* Function: AlignmentHomogenousGapsym() * Date:     SRE, Sun Mar 19 19:37:12 2000 [wren, St. Louis] * * Purpose:  Sometimes we've got to convert alignments to  *           a lowest common denominator, and we need *           a single specific gap character -- for example, *           PSI-BLAST blastpgp -B takes a very simplistic *           alignment input format which appears to only *           allow '-' as a gap symbol. *            *           Anything matching the isgap() macro is *           converted. * * Args:     aseq   - aligned character strings, [0..nseq-1][0..alen-1] *           nseq   - number of aligned strings *           alen   - length of alignment *           gapsym - character to use for gaps.          * * Returns:  void ("never fails") */voidAlignmentHomogenousGapsym(char **aseq, int nseq, int alen, char gapsym){  int i, apos;  for (i = 0; i < nseq; i++)    for (apos = 0; apos < alen; apos++)      if (isgap(aseq[i][apos])) aseq[i][apos] = gapsym;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -