📄 aligneval.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
   * We have to keep separate track of our position in the list (lpos)   * from our positions in the raw sequences (r1,r2)   */  r1 = r2 = lpos = 0;  for (col = 0; s1[col] != '\0'; col++)    {      if (! isgap(s1[col]) && canons1[r1])	{	  s1_list[lpos] = isgap(s2[col]) ? -1 : r2;	  lpos++;	}            if (! isgap(s1[col]))	r1++;      if (! isgap(s2[col]))	r2++;    }  free(canons1);  *ret_listlen = lpos;  *ret_s1_list = s1_list;  return 1;}/* Function: compare_lists() *  * Purpose:  Given four alignment lists (k1,k2, t1,t2), calculate the *           alignment score. *            * Args:     k1   - list of k1's alignment to k2 *           k2   - list of k2's alignment to k1 *           t1   - list of t1's alignment to t2 *           t2   - list of t2's alignment to t2 *           len1 - length of k1, t1 lists (same by definition) *           len2 - length of k2, t2 lists (same by definition) *           ret_sc - RETURN: identity score of alignment * * Return:   1 on success, 0 on failure. */           static intcompare_lists(int *k1, int *k2, int *t1, int *t2, int len1, int len2, float *ret_sc){  float id;  float tot;  int   i;  id = tot = 0.0;  for (i = 0; i < len1; i++)    {      tot += 1.0;      if (t1[i] == k1[i]) id += 1.0;    }  for ( i = 0; i < len2; i++)    {      tot += 1.0;      if (k2[i] == t2[i]) id += 1.0;    }  *ret_sc = id / tot;  return 1;}/* Function: CompareMultAlignments *  * Purpose:  Invokes pairwise alignment comparison for every possible pair, *           and returns the average score over all N(N-1) of them or -1.0 *           on an internal failure. *  *           Can be slow for large N, since it's quadratic. * * Args:     kseqs  - trusted multiple alignment *           tseqs  - test multiple alignment *           N      - number of sequences *            * Return:   average identity score, or -1.0 on failure.           */floatCompareMultAlignments(char **kseqs, char **tseqs, int N){  int    i, j;			/* counters for sequences */  float  score;  float  tot_score = 0.0;				/* do all pairwise comparisons */  for (i = 0; i < N; i++)    for (j = i+1; j < N; j++)      {	score = ComparePairAlignments(kseqs[i], kseqs[j], tseqs[i], tseqs[j]);	if (score < 0.0) return -1.0;	tot_score += score;      }  return ((tot_score * 2.0) / ((float) N * ((float) N - 1.0)));}/* Function: CompareRefMultAlignments() *  * Purpose:  Same as above, except an array of reference coords for *           the canonical positions of the known alignment is also *           provided. * * Args:     ref      : 0..alen-1 array of 1/0 flags, 1 if canon *           kseqs    : trusted alignment *           tseqs    : test alignment *           N        : number of sequences * * Return:   average identity score, or -1.0 on failure */floatCompareRefMultAlignments(int   *ref, char **kseqs, char **tseqs, int N){  int    i, j;			/* counters for sequences */  float  score;  float  tot_score = 0.0;  				/* do all pairwise comparisons */  for (i = 0; i < N; i++)    for (j = i+1; j < N; j++)      {	score = CompareRefPairAlignments(ref, kseqs[i], kseqs[j], tseqs[i], tseqs[j]);	if (score < 0.0) return -1.0;	tot_score += score;      }  return ((tot_score * 2.0)/ ((float) N * ((float) N - 1.0)));}/* Function: PairwiseIdentity() *  * Purpose:  Calculate the pairwise fractional identity between *           two aligned sequences s1 and s2. This is simply *           (idents / MIN(len1, len2)). * *           Note how many ways there are to calculate pairwise identity, *           because of the variety of choices for the denominator: *           idents/(idents+mismat) has the disadvantage that artifactual *             gappy alignments would have high "identities". *           idents/(AVG|MAX)(len1,len2) both have the disadvantage that  *             alignments of fragments to longer sequences would have *             artifactually low "identities". *            *           Case sensitive; also, watch out in nucleic acid alignments;  *           U/T RNA/DNA alignments will be counted as mismatches! */floatPairwiseIdentity(char *s1, char *s2){  int     idents;		/* total identical positions  */  int     len1, len2;		/* lengths of seqs            */  int     x;			/* position in aligned seqs   */  idents = len1 = len2 = 0;  for (x = 0; s1[x] != '\0' && s2[x] != '\0'; x++)     {      if (!isgap(s1[x])) {	len1++;	if (s1[x] == s2[x]) idents++;       }      if (!isgap(s2[x])) len2++;    }  if (len2 < len1) len1 = len2;  return (len1 == 0 ? 0.0 : (float) idents / (float) len1);}/* Function: AlignmentIdentityBySampling() * Date:     SRE, Mon Oct 19 14:29:01 1998 [St. Louis] * * Purpose:  Estimate and return the average pairwise *           fractional identity of an alignment, *           using sampling. *            *           For use when there's so many sequences that *           an all vs. all rigorous calculation will *           take too long. *            *           Case sensitive! * * Args:     aseq       - aligned sequences *           L          - length of alignment *           N          - number of seqs in alignment *           nsample    - number of samples                      * * Returns:  average fractional identity, 0..1. */floatAlignmentIdentityBySampling(char **aseq, int L, int N, int nsample){  int x, i, j;			/* counters */  float sum;  if (N < 2) return 1.0;  sum = 0.;  for (x = 0; x < nsample; x++)    {      i = CHOOSE(N);      do { j = CHOOSE(N); } while (j == i); /* make sure j != i */      sum += PairwiseIdentity(aseq[i], aseq[j]);    }  return sum / (float) nsample;}/* Function: MajorityRuleConsensus() * Date:     SRE, Tue Mar  7 15:30:30 2000 [St. Louis] * * Purpose:  Given a set of aligned sequences, produce a *           majority rule consensus sequence. If >50% nonalphabetic *           (usually meaning gaps) in the column, ignore the column. * * Args:     aseq  - aligned sequences, [0..nseq-1][0..alen-1] *           nseq  - number of sequences *           alen  - length of alignment         * * Returns:  ptr to allocated consensus sequence. *           Caller is responsible for free'ing this. */char *MajorityRuleConsensus(char **aseq, int nseq, int alen){  char *cs;                     /* RETURN: consensus sequence */  int count[27];		/* counts for a..z and gaps in a column */  int idx,apos;			/* counters for seq, column */  int spos;			/* position in cs */  int x;			/* counter for characters */  int sym;  int max, bestx;    cs = MallocOrDie(sizeof(char) * (alen+1));    for (spos=0,apos=0; apos < alen; apos++)    {      for (x = 0; x < 27; x++) count[x] = 0;      for (idx = 0; idx < nseq; idx++)	{	  if (isalpha(aseq[idx][apos])) {	    sym = toupper(aseq[idx][apos]);	    count[sym-'A']++;	  } else {	    count[26]++;	  }	}      if ((float) count[26] / (float) nseq <= 0.5) {	max = bestx = -1;	for (x = 0; x < 26; x++) 	  if (count[x] > max) { max = count[x]; bestx = x; }	cs[spos++] = (char) ('A' + bestx);      }    }  cs[spos] = '\0';  return cs;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -