⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 histogram.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 3 页
字号:
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. ************************************************************//* histogram.c * SRE, Sat Jan 20 16:16:17 1996 *  * Accumulation, printing, and fitting of score histograms * from database searches. * * RCS $Id: histogram.c,v 1.17 1999/12/04 22:42:28 eddy Exp $ ************************************************************ * Basic API: *  * struct histogram_s *h; *  * h = AllocHistogram(min_hint, max_hint, lumpsize); *  * while (getting scores x) AddToHistogram(h, x); *  * ExtremeValueFitHistogram(h, high_hint);    * PrintASCIIHistogram(fp, h);    * FreeHistogram(h); */#include <stdio.h>#include <string.h>#include <limits.h>#include <float.h>#include <math.h>#include "squid.h"#include "config.h"#include "structs.h"#include "funcs.h"#ifdef MEMDEBUG#include "dbmalloc.h"#endif/* Function: AllocHistogram() *  * Purpose:  Allocate and return a histogram structure. *           min and max are your best guess. They need *           not be absolutely correct; the histogram *           will expand dynamically to accomodate scores *           that exceed these suggested bounds. The amount *           that the histogram grows by is set by "lumpsize". *  * Args:     min:      minimum score (integer) *           max:      maximum score (integer) *           lumpsize: when reallocating histogram, pad the reallocation *                     by this much (saves excessive reallocation) */struct histogram_s *AllocHistogram(int min, int max, int lumpsize){  struct histogram_s *h;  int            newsize;  int            i;  newsize = max - min + 1;  h = (struct histogram_s *) MallocOrDie(sizeof(struct histogram_s));  h->min       = min;  h->max       = max;  h->total     = 0;  h->lowscore  = INT_MAX;  h->highscore = INT_MIN;  h->lumpsize  = lumpsize;  h->histogram = (int *) MallocOrDie (sizeof(int) * newsize);  for (i = 0; i < newsize; i++) h->histogram[i] = 0;  h->expect    = NULL;  h->fit_type  = HISTFIT_NONE;  return h;}/* Function: FreeHistogram() *  * Purpose:  free a histogram structure. */voidFreeHistogram(struct histogram_s *h){  free(h->histogram);  if (h->expect != NULL) free(h->expect);  free(h);} /* Function: UnfitHistogram() *  * Purpose:  Free only the theoretical fit part of a histogram. */voidUnfitHistogram(struct histogram_s *h){  if (h->expect != NULL) free(h->expect);  h->expect   = NULL;  h->fit_type = HISTFIT_NONE;}/* Function: AddToHistogram() *  * Purpose:  Bump the appropriate counter in a histogram *           structure, given a score. The score is *           rounded off from float precision to the *           next lower integer. */voidAddToHistogram(struct histogram_s *h, float sc){  int score;  int moveby;  int prevsize;  int newsize;  int i;  /* Adding to a histogram conflicts with existing fit:   * prohibit this.   */  if (h->fit_type != HISTFIT_NONE)    Die("AddToHistogram(): Can't add to a fitted histogram\n");    /* histogram bins are defined as:  score >= bin value, < bin+1    * -1.9 -> -2    -0.4 -> -1    1.9 -> 1   * -2.1 -> -3     0.4 -> 0     2.1 -> 2   */  score = (int) floor(sc);  /* Check to see if we must reallocate the histogram.   */  if (score < h->min)    {      prevsize = h->max - h->min + 1;      moveby   = (h->min - score) + h->lumpsize;      newsize  = prevsize + moveby;      h->min  -= moveby;      h->histogram = (int *) ReallocOrDie(h->histogram, sizeof(int) * newsize);      memmove(h->histogram+moveby, h->histogram, sizeof(int) * prevsize);      for (i = 0; i < moveby; i++)	h->histogram[i] = 0;    }  else if (score > h->max)    {      prevsize = h->max - h->min + 1;      h->max   = h->lumpsize + score;      newsize  = h->max - h->min + 1;      h->histogram = (int *) ReallocOrDie(h->histogram, sizeof(int) * newsize);      for (i = prevsize; i < newsize; i++)	h->histogram[i] = 0;    }  /* Bump the correct bin.   * The bin number is score - h->min   */  h->histogram[score - h->min]++;  h->total++;  if (score < h->lowscore) h->lowscore   = score;  if (score > h->highscore) h->highscore = score;  SQD_DPRINTF3(("AddToHistogram(): added %.1f; rounded to %d; in bin %d (%d-%d)\n",	  sc, score, score-h->min, h->min, h->max));  return;}/* Function: PrintASCIIHistogram() *  * Purpose:  Print a "prettified" histogram to a file pointer. *           Deliberately a look-and-feel clone of Bill Pearson's  *           excellent FASTA output. *  * Args:     fp     - open file to print to (stdout works) *           h      - histogram to print */           voidPrintASCIIHistogram(FILE *fp, struct histogram_s *h){  int units;  int maxbar;  int num;  int i, idx;  char buffer[81];		/* output line buffer */  int  pos;			/* position in output line buffer */  int  lowbound, lowcount;	/* cutoffs on the low side  */  int  highbound, highcount;	/* cutoffs on the high side */  int  emptybins = 3;  /* Find out how we'll scale the histogram.   * We have 59 characters to play with on a   * standard 80-column terminal display:   * leading "%5d %6d %6d|" occupies 20 chars.   * Save the peak position, we'll use it later.   */  maxbar = 0;  for (i = h->lowscore - h->min; i <= h->highscore - h->min; i++)    if (h->histogram[i] > maxbar)       {	maxbar   = h->histogram[i];     /* max height    */	lowbound = i + h->min;     	/* peak position */      }  /* Truncate histogram display on both sides, ad hoc fashion.   * Start from the peak; then move out until we see <emptybins> empty bins,   * and stop.   */  highbound = lowbound;		/* start at peak position */  for (num = 0; lowbound > h->lowscore; lowbound--)    {      i = lowbound - h->min;      if (h->histogram[i] > 0) { num = 0;               continue; } /* reset */      if (++num == emptybins)  { lowbound += emptybins; break;    } /* stop  */    }  for (num = 0; highbound < h->highscore; highbound++)    {      i = highbound - h->min;      if (h->histogram[i] > 0) { num = 0;                continue; } /* reset */      if (++num == emptybins)  { highbound -= emptybins; break;    } /* stop  */    }				/* collect counts outside of bounds */  for (lowcount = 0, i = h->lowscore - h->min; i <= lowbound - h->min; i++)    lowcount += h->histogram[i];  for (highcount = 0, i = h->highscore - h->min; i >= highbound - h->min; i--)    highcount += h->histogram[i];				/* maxbar might need raised now; then set our units  */  if (lowcount  > maxbar) maxbar = lowcount;  if (highcount > maxbar) maxbar = highcount;  units = ((maxbar-1)/ 59) + 1;  /* Print the histogram   */  fprintf(fp, "%5s %6s %6s  (one = represents %d sequences)\n", 	  "score", "obs", "exp", units);  fprintf(fp, "%5s %6s %6s\n", "-----", "---", "---");  buffer[80] = '\0';  buffer[79] = '\n';  for (i = h->lowscore; i <= h->highscore; i++)    {      memset(buffer, ' ', 79 * sizeof(char));      idx = i - h->min;      /* Deal with special cases at edges       */      if      (i < lowbound)  continue;      else if (i > highbound) continue;      else if (i == lowbound && i != h->lowscore) 	{	  sprintf(buffer, "<%4d %6d %6s|", i+1, lowcount, "-");	  if (lowcount > 0) {	    num = 1+(lowcount-1) / units;	    if (num > 60) Die("oops");	    for (pos = 20; num > 0; num--)  buffer[pos++] = '=';	  }	  fputs(buffer, fp);	  continue;	}      else if (i == highbound && i != h->highscore)	{	  sprintf(buffer, ">%4d %6d %6s|", i, highcount, "-");	  if (highcount > 0) {	    num = 1+(highcount-1) / units;	    for (pos = 20; num > 0; num--)  buffer[pos++] = '=';	  }	  fputs(buffer, fp);	  continue;	}      /* Deal with most cases       */      if (h->fit_type != HISTFIT_NONE) 	sprintf(buffer, "%5d %6d %6d|", 		i, h->histogram[idx], (int) h->expect[idx]);      else	sprintf(buffer, "%5d %6d %6s|", i, h->histogram[idx], "-");      buffer[20] = ' ';		/* sprintf writes a null char */      /* Mark the histogram bar for observed hits       */       if (h->histogram[idx] > 0) {	num = 1 + (h->histogram[idx]-1) / units;	for (pos = 20; num > 0; num--)  buffer[pos++] = '=';      }	        /* Mark the theoretically expected value       */      if (h->fit_type != HISTFIT_NONE && (int) h->expect[idx] > 0)	{	  pos = 20 + (int)(h->expect[idx]-1) / units;	  if (pos >= 78) pos = 78; /* be careful of buffer bounds */	  buffer[pos] = '*';	}      /* Print the line       */      fputs(buffer, fp);    }  /* Print details about the statistics   */  switch (h->fit_type) {  case HISTFIT_NONE:    fprintf(fp, "\n\n%% No statistical fit available\n");    break;      case HISTFIT_EVD:    fprintf(fp, "\n\n%% Statistical details of theoretical EVD fit:\n");    fprintf(fp, "              mu = %10.4f\n", h->param[EVD_MU]);    fprintf(fp, "          lambda = %10.4f\n", h->param[EVD_LAMBDA]);    fprintf(fp, "chi-sq statistic = %10.4f\n", h->chisq);    fprintf(fp, "  P(chi-square)  = %10.4g\n", h->chip);    break;  case HISTFIT_GAUSSIAN:    fprintf(fp, "\n\n%% Statistical details of theoretical Gaussian fit:\n");    fprintf(fp, "            mean = %10.4f\n", h->param[GAUSS_MEAN]);    fprintf(fp, "              sd = %10.4f\n", h->param[GAUSS_SD]);    fprintf(fp, "chi-sq statistic = %10.4f\n", h->chisq);    fprintf(fp, "  P(chi-square)  = %10.4g\n", h->chip);    break;  }      return;}  /* Function: PrintXMGRHistogram() * Date:     SRE, Wed Nov 12 11:02:00 1997 [St. Louis] *  * Purpose:  Print an XMGR data file that contains two data sets: *               - xy data for the observed histogram *               - xy data for the theoretical histogram */voidPrintXMGRHistogram(FILE *fp, struct histogram_s *h){  int sc;			/* integer score in histogram structure */  double val;  /* First data set is the observed histogram   */  for (sc = h->lowscore; sc <= h->highscore; sc++)    if (h->histogram[sc - h->min] > 0)      fprintf(fp, "%-6d %f\n", sc, 	      (float) h->histogram[sc - h->min]/ (float) h->total);  fprintf(fp, "&\n");  /* Second data set is the theoretical histogram   */  if (h->fit_type != HISTFIT_NONE)    {        for (sc = h->lowscore; sc <= h->highscore; sc++)	  {	    val = 	      (1. - ExtremeValueP((float)sc+1, h->param[EVD_MU], h->param[EVD_LAMBDA]))-	      (1. - ExtremeValueP((float)sc, h->param[EVD_MU], h->param[EVD_LAMBDA]));	    fprintf(fp, "%-6d %f\n", sc, val);	  }	fprintf(fp, "&\n");    }}/* Function: PrintXMGRDistribution() * Date:     SRE, Wed Nov 12 11:02:09 1997 [St. Louis] *  * Purpose:  Print an XMGR data file that contains two data sets: *               - xy data for the observed distribution P(S<x) *               - xy data for the theoretical distribution P(S<x) */voidPrintXMGRDistribution(FILE *fp, struct histogram_s *h){  int sc;			/* integer score in histogram structure */  int cum;			/* cumulative count */  double val;  /* First data set is the observed distribution;   * histogram bin x contains # of scores between x and x+1,   * hence the sc+1 offset.    */  for (cum = 0, sc = h->lowscore; sc <= h->highscore; sc++)    {      cum += h->histogram[sc - h->min];      fprintf(fp, "%-6d %f\n", sc + 1, (float) cum / (float) h->total);    }  fprintf(fp, "&\n");  /* Second data set is the theoretical histogram   */  if (h->fit_type != HISTFIT_NONE)    {      for (sc = h->lowscore; sc <= h->highscore; sc++)	{	  val = (1. - ExtremeValueP((float) sc, h->param[EVD_MU], 				    h->param[EVD_LAMBDA]));	  fprintf(fp, "%-6d %f\n", sc, val);	}      fprintf(fp, "&\n");    }}/* Function: PrintXMGRRegressionLine() * Date:     SRE, Wed Nov 12 11:02:19 1997 [St. Louis] *  * Purpose:  Print an XMGR data file that contains two data sets: *               - xy data for log log transform of observed distribution P(S<x) *               - xy data for log log transform of theoretical distribution P(S<x) */voidPrintXMGRRegressionLine(FILE *fp, struct histogram_s *h){  int sc;			/* integer score in histogram structure */  int cum;  double val;			/* log log transform */  /* First data set is the observed distribution;   * histogram bin x contains # of scores between x and x+1,   * hence the sc+1 offset.    */  for (cum = 0, sc = h->lowscore; sc <= h->highscore; sc++)    {      cum += h->histogram[sc - h->min];      val = log (-1. * log((double) cum /  (double) h->total));      if (cum < h->total)	fprintf(fp, "%-6d %f\n", sc + 1, val);    }  fprintf(fp, "&\n");  /* Second data set is the theoretical histogram   */  if (h->fit_type != HISTFIT_NONE)    {      for (sc = h->lowscore; sc <= h->highscore; sc++)	{	  val = log(-1. * log(1. - ExtremeValueP((float) sc, h->param[EVD_MU], 						       h->param[EVD_LAMBDA])));	  fprintf(fp, "%-6d %f\n", sc, val);	}      fprintf(fp, "&\n");    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -