📄 histogram.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
 *           value distribution, given x and the parameters *           of the distribution (characteristic *           value mu, decay constant lambda). *            *           This function is exquisitely prone to *           floating point exceptions if it isn't coded *           carefully. *            * Args:     x      = score *           mu     = characteristic value of extreme value distribution *           lambda = decay constant of extreme value distribution *            * Return:   P(S>x) */                   doubleExtremeValueP(float x, float mu, float lambda){  double y;			/* avoid exceptions near P=1.0 */			/* typical 32-bit sys: if () < -3.6, return 1.0 */  if ((lambda * (x - mu)) <= -1. * log(-1. * log(DBL_EPSILON))) return 1.0;			/* avoid underflow fp exceptions near P=0.0*/  if ((lambda * (x - mu)) >= 2.3 * (double) DBL_MAX_10_EXP)     return 0.0;			/* a roundoff issue arises; use 1 - e^-x --> x for small x */  y = exp(-1. * lambda * (x - mu));  if       (y < 1e-7) return y;  else     return (1.0 - exp(-1. * y));}/* Function: ExtremeValueP2() *  * Purpose:  Calculate P(S>x) in a database of size N, *           using P(S>x) for a single sequence, according *           to a Poisson distribution. * * Args:     x      = score *           mu     = characteristic value of extreme value distribution *           lambda = decay constant of extreme value distribution *           N      = number of trials (number of sequences) * * Return:   P(S>x) for database of size N */doubleExtremeValueP2(float x, float mu, float lambda, int N){  double y;  y = N * ExtremeValueP(x,mu,lambda);  if (y < 1e-7) return y;  else          return (1.0 - exp(-1. * y));}/* Function: ExtremeValueE() *  * Purpose:  Calculate E(S>x) in a database of size N, *           using P(S>x) for a single sequence: simply np. * * Args:     x      = score *           mu     = characteristic value of extreme value distribution *           lambda = decay constant of extreme value distribution *           N      = number of trials (number of sequences) * * Return:   E(S>x) for database of size N */doubleExtremeValueE(float x, float mu, float lambda, int N){  return (double)N * ExtremeValueP(x,mu,lambda);}/* Function: EVDrandom() *  * Purpose:  Randomly sample an x from an EVD. *           Trivially done by the transformation method, since *           the distribution is analytical: *              x = \mu - \frac{\log \left[ -\log P(S<x) \right]}{\lambda} *           where P(S<x) is sampled uniformly on 0 < P(S<x) < 1. */floatEVDrandom(float mu, float lambda){  float p = 0.0;  /* Very unlikely, but possible,   * that sre_random() would give us exactly 0 or 1    */  while (p == 0. || p == 1.) p = sre_random();   return mu - log(-1. * log(p)) / lambda;}  /* Function: Lawless416() * Date:     SRE, Thu Nov 13 11:48:50 1997 [St. Louis] *  * Purpose:  Equation 4.1.6 from [Lawless82], pg. 143, and *           its first derivative with respect to lambda, *           for finding the ML fit to EVD lambda parameter. *           This equation gives a result of zero for the maximum *           likelihood lambda. *            *           Can either deal with a histogram or an array. *            *           Warning: beware overflow/underflow issues! not bulletproof. *            * Args:     x      - array of sample values (or x-axis of a histogram) *           y      - NULL (or y-axis of a histogram) *           n      - number of samples (or number of histogram bins) *           lambda - a lambda to test *           ret_f  - RETURN: 4.1.6 evaluated at lambda *           ret_df - RETURN: first derivative of 4.1.6 evaluated at lambda *            * Return:   (void) */ voidLawless416(float *x, int *y, int n, float lambda, float *ret_f, float *ret_df){  double esum;			/* \sum e^(-lambda xi)      */  double xesum;			/* \sum xi e^(-lambda xi)   */  double xxesum;		/* \sum xi^2 e^(-lambda xi) */  double xsum;			/* \sum xi                  */  double mult;			/* histogram count multiplier */  double total;			/* total samples            */  int i;  esum = xesum = xsum  = xxesum = total = 0.;  for (i = 0; i < n; i++)    {      mult = (y == NULL) ? 1. : (double) y[i];      xsum   += mult * x[i];      xesum  += mult * x[i] * exp(-1. * lambda * x[i]);      xxesum += mult * x[i] * x[i] * exp(-1. * lambda * x[i]);      esum   += mult * exp(-1. * lambda * x[i]);      total  += mult;    }  *ret_f  = 1./lambda - xsum / total + xesum / esum;  *ret_df = ((xesum / esum) * (xesum / esum))    - (xxesum / esum)    - (1. / (lambda * lambda));  return;}/* Function: Lawless422() * Date:     SRE, Mon Nov 17 09:42:48 1997 [St. Louis] *  * Purpose:  Equation 4.2.2 from [Lawless82], pg. 169, and *           its first derivative with respect to lambda, *           for finding the ML fit to EVD lambda parameter *           for Type I censored data.  *           This equation gives a result of zero for the maximum *           likelihood lambda. *            *           Can either deal with a histogram or an array. *            *           Warning: beware overflow/underflow issues! not bulletproof. *            * Args:     x      - array of sample values (or x-axis of a histogram) *           y      - NULL (or y-axis of a histogram) *           n      - number of observed samples (or number of histogram bins) *           z      - number of censored samples  *           c      - censoring value; all observed x_i >= c          *           lambda - a lambda to test *           ret_f  - RETURN: 4.2.2 evaluated at lambda *           ret_df - RETURN: first derivative of 4.2.2 evaluated at lambda *            * Return:   (void) */ voidLawless422(float *x, int *y, int n, int z, float c,	   float lambda, float *ret_f, float *ret_df){  double esum;			/* \sum e^(-lambda xi)      + z term    */  double xesum;			/* \sum xi e^(-lambda xi)   + z term    */  double xxesum;		/* \sum xi^2 e^(-lambda xi) + z term    */  double xsum;			/* \sum xi                  (no z term) */  double mult;			/* histogram count multiplier */  double total;			/* total samples            */  int i;  esum = xesum = xsum  = xxesum = total = 0.;  for (i = 0; i < n; i++)    {      mult = (y == NULL) ? 1. : (double) y[i];      xsum   += mult * x[i];      esum   += mult *               exp(-1. * lambda * x[i]);      xesum  += mult * x[i] *        exp(-1. * lambda * x[i]);      xxesum += mult * x[i] * x[i] * exp(-1. * lambda * x[i]);      total  += mult;    }  /* Add z terms for censored data   */  esum   += (double) z *         exp(-1. * lambda * c);  xesum  += (double) z * c *     exp(-1. * lambda * c);  xxesum += (double) z * c * c * exp(-1. * lambda * c);  *ret_f  = 1./lambda - xsum / total + xesum / esum;  *ret_df = ((xesum / esum) * (xesum / esum))    - (xxesum / esum)    - (1. / (lambda * lambda));  return;}/* Function: EVDMaxLikelyFit() * Date:     SRE, Fri Nov 14 07:56:29 1997 [St. Louis] *  * Purpose:  Given a list or a histogram of EVD-distributed samples, *           find maximum likelihood parameters lambda and *           mu.  *            * Algorithm: Uses approach described in [Lawless82]. Solves *           for lambda using Newton/Raphson iterations; *           then substitutes lambda into Lawless' equation 4.1.5 *           to get mu.  *            *           Newton/Raphson algorithm developed from description in *           Numerical Recipes in C [Press88].  *            * Args:     x          - list of EVD distributed samples or x-axis of histogram *           c          - NULL, or y-axis of histogram *           n          - number of samples, or number of histogram bins  *           ret_mu     : RETURN: ML estimate of mu *           ret_lambda : RETURN: ML estimate of lambda *            * Return:   1 on success; 0 on any failure */intEVDMaxLikelyFit(float *x, int *c, int n, float *ret_mu, float *ret_lambda){  float  lambda, mu;  float  fx;			/* f(x)  */  float  dfx;			/* f'(x) */  double esum;                  /* \sum e^(-lambda xi) */   double mult;  double total;  float  tol = 1e-5;  int    i;  /* 1. Find an initial guess at lambda: linear regression here?   */  lambda = 0.2;  /* 2. Use Newton/Raphson to solve Lawless 4.1.6 and find ML lambda   */  for (i = 0; i < 100; i++)    {      Lawless416(x, c, n, lambda, &fx, &dfx);      if (fabs(fx) < tol) break;             /* success */      lambda = lambda - fx / dfx;	     /* Newton/Raphson is simple */      if (lambda <= 0.) lambda = 0.001;      /* but be a little careful  */    }  /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed.   *      Resort to a bisection search. Worse convergence speed   *      but guaranteed to converge (unlike Newton/Raphson).   *      We assume (!?) that fx is a monotonically decreasing function of x;   *      i.e. fx > 0 if we are left of the root, fx < 0 if we   *      are right of the root.   */   if (i == 100)    {      float left, right, mid;      SQD_DPRINTF2(("EVDMaxLikelyFit(): Newton/Raphson failed; switchover to bisection"));				/* First we need to bracket the root */      lambda = right = left = 0.2;      Lawless416(x, c, n, lambda, &fx, &dfx);      if (fx < 0.) 	{			/* fix right; search left. */	  do {	    left -= 0.1;	    if (left < 0.) { 	      SQD_DPRINTF2(("EVDMaxLikelyFit(): failed to bracket root")); 	      return 0; 	    }	    Lawless416(x, c, n, left, &fx, &dfx);	  } while (fx < 0.);	}      else	{			/* fix left; search right. */	  do {	    right += 0.1;	    Lawless416(x, c, n, right, &fx, &dfx);	    if (right > 100.) {	      SQD_DPRINTF2(("EVDMaxLikelyFit(): failed to bracket root")); 	      return 0; 	    }	  } while (fx > 0.);	}			/* now we bisection search in left/right interval */      for (i = 0; i < 100; i++)	{	  mid = (left + right) / 2.; 	  Lawless416(x, c, n, mid, &fx, &dfx);	  if (fabs(fx) < tol) break;             /* success */	  if (fx > 0.)	left = mid;	  else          right = mid;	}      if (i == 100) { 	SQD_DPRINTF2(("EVDMaxLikelyFit(): even the bisection search failed")); 	return 0;       }      lambda = mid;    }  /* 3. Substitute into Lawless 4.1.5 to find mu   */  esum = 0.;  total = 0.;  for (i = 0; i < n; i++)    {      mult   = (c == NULL) ? 1. : (double) c[i];      esum  += mult * exp(-1 * lambda * x[i]);      total += mult;    }  mu = -1. * log(esum / total) / lambda;  *ret_lambda = lambda;  *ret_mu     = mu;     return 1;}/* Function: EVDCensoredFit() * Date:     SRE, Mon Nov 17 10:01:05 1997 [St. Louis] *  * Purpose:  Given a /left-censored/ list or histogram of EVD-distributed  *           samples, as well as the number of censored samples z and the *           censoring value c,               *           find maximum likelihood parameters lambda and *           mu.  *            * Algorithm: Uses approach described in [Lawless82]. Solves *           for lambda using Newton/Raphson iterations; *           then substitutes lambda into Lawless' equation 4.2.3 *           to get mu.  *            *           Newton/Raphson algorithm developed from description in *           Numerical Recipes in C [Press88].  *            * Args:     x          - list of EVD distributed samples or x-axis of histogram *           y          - NULL, or y-axis of histogram *           n          - number of observed samples,or number of histogram bins  *           z          - number of censored samples *           c          - censoring value (all x_i >= c) *           ret_mu     : RETURN: ML estimate of mu *           ret_lambda : RETURN: ML estimate of lambda *            * Return:   (void) */intEVDCensoredFit(float *x, int *y, int n, int z, float c, 	       float *ret_mu, float *ret_lambda){  float  lambda, mu;  float  fx;			/* f(x)  */  float  dfx;			/* f'(x) */  double esum;                  /* \sum e^(-lambda xi) */   double mult;  double total;  float  tol = 1e-5;  int    i;  /* 1. Find an initial guess at lambda: linear regression here?   */  lambda = 0.2;  /* 2. Use Newton/Raphson to solve Lawless 4.2.2 and find ML lambda   */  for (i = 0; i < 100; i++)    {      Lawless422(x, y, n, z, c, lambda, &fx, &dfx);      if (fabs(fx) < tol) break;             /* success */      lambda = lambda - fx / dfx;	     /* Newton/Raphson is simple */      if (lambda <= 0.) lambda = 0.001;      /* but be a little careful  */    } /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed.   *      Resort to a bisection search. Worse convergence speed   *      but guaranteed to converge (unlike Newton/Raphson).   *      We assume (!?) that fx is a monotonically decreasing function of x;   *      i.e. fx > 0 if we are left of the root, fx < 0 if we   *      are right of the root.   */   if (i == 100)    {      float left, right, mid;				/* First we need to bracket the root */      SQD_DPRINTF2(("EVDCensoredFit(): Newton/Raphson failed; switched to bisection"));      lambda = right = left = 0.2;      Lawless422(x, y, n, z, c, lambda, &fx, &dfx);      if (fx < 0.) 	{			/* fix right; search left. */	  do {	    left -= 0.03;	    if (left < 0.) { 	      SQD_DPRINTF2(("EVDCensoredFit(): failed to bracket root")); 	      return 0;	    }	    Lawless422(x, y, n, z, c, left, &fx, &dfx);	  } while (fx < 0.);	}      else	{			/* fix left; search right. */	  do {	    right += 0.1;	    Lawless422(x, y, n, z, c, left, &fx, &dfx);	    if (right > 100.) {	      SQD_DPRINTF2(("EVDCensoredFit(): failed to bracket root"));	      return 0;	    }	  } while (fx > 0.);	}			/* now we bisection search in left/right interval */      for (i = 0; i < 100; i++)	{	  mid = (left + right) / 2.; 	  Lawless422(x, y, n, z, c, left, &fx, &dfx);	  if (fabs(fx) < tol) break;             /* success */	  if (fx > 0.)	left = mid;	  else          right = mid;	}      if (i == 100) {	SQD_DPRINTF2(("EVDCensoredFit(): even the bisection search failed"));	return 0;      }      lambda = mid;    }  /* 3. Substitute into Lawless 4.2.3 to find mu   */  esum =  total = 0.;  for (i = 0; i < n; i++)    {      mult   = (y == NULL) ? 1. : (double) y[i];      esum  += mult * exp(-1. * lambda * x[i]);      total += mult;    }  esum += (double) z * exp(-1. * lambda * c);    /* term from censored data */  mu = -1. * log(esum / total) / lambda;          *ret_lambda = lambda;  *ret_mu     = mu;     return 1;}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -