📄 histogram.c
字号:
* value distribution, given x and the parameters * of the distribution (characteristic * value mu, decay constant lambda). * * This function is exquisitely prone to * floating point exceptions if it isn't coded * carefully. * * Args: x = score * mu = characteristic value of extreme value distribution * lambda = decay constant of extreme value distribution * * Return: P(S>x) */ doubleExtremeValueP(float x, float mu, float lambda){ double y; /* avoid exceptions near P=1.0 */ /* typical 32-bit sys: if () < -3.6, return 1.0 */ if ((lambda * (x - mu)) <= -1. * log(-1. * log(DBL_EPSILON))) return 1.0; /* avoid underflow fp exceptions near P=0.0*/ if ((lambda * (x - mu)) >= 2.3 * (double) DBL_MAX_10_EXP) return 0.0; /* a roundoff issue arises; use 1 - e^-x --> x for small x */ y = exp(-1. * lambda * (x - mu)); if (y < 1e-7) return y; else return (1.0 - exp(-1. * y));}/* Function: ExtremeValueP2() * * Purpose: Calculate P(S>x) in a database of size N, * using P(S>x) for a single sequence, according * to a Poisson distribution. * * Args: x = score * mu = characteristic value of extreme value distribution * lambda = decay constant of extreme value distribution * N = number of trials (number of sequences) * * Return: P(S>x) for database of size N */doubleExtremeValueP2(float x, float mu, float lambda, int N){ double y; y = N * ExtremeValueP(x,mu,lambda); if (y < 1e-7) return y; else return (1.0 - exp(-1. * y));}/* Function: ExtremeValueE() * * Purpose: Calculate E(S>x) in a database of size N, * using P(S>x) for a single sequence: simply np. * * Args: x = score * mu = characteristic value of extreme value distribution * lambda = decay constant of extreme value distribution * N = number of trials (number of sequences) * * Return: E(S>x) for database of size N */doubleExtremeValueE(float x, float mu, float lambda, int N){ return (double)N * ExtremeValueP(x,mu,lambda);}/* Function: EVDrandom() * * Purpose: Randomly sample an x from an EVD. * Trivially done by the transformation method, since * the distribution is analytical: * x = \mu - \frac{\log \left[ -\log P(S<x) \right]}{\lambda} * where P(S<x) is sampled uniformly on 0 < P(S<x) < 1. */floatEVDrandom(float mu, float lambda){ float p = 0.0; /* Very unlikely, but possible, * that sre_random() would give us exactly 0 or 1 */ while (p == 0. || p == 1.) p = sre_random(); return mu - log(-1. * log(p)) / lambda;} /* Function: Lawless416() * Date: SRE, Thu Nov 13 11:48:50 1997 [St. Louis] * * Purpose: Equation 4.1.6 from [Lawless82], pg. 143, and * its first derivative with respect to lambda, * for finding the ML fit to EVD lambda parameter. * This equation gives a result of zero for the maximum * likelihood lambda. * * Can either deal with a histogram or an array. * * Warning: beware overflow/underflow issues! not bulletproof. * * Args: x - array of sample values (or x-axis of a histogram) * y - NULL (or y-axis of a histogram) * n - number of samples (or number of histogram bins) * lambda - a lambda to test * ret_f - RETURN: 4.1.6 evaluated at lambda * ret_df - RETURN: first derivative of 4.1.6 evaluated at lambda * * Return: (void) */ voidLawless416(float *x, int *y, int n, float lambda, float *ret_f, float *ret_df){ double esum; /* \sum e^(-lambda xi) */ double xesum; /* \sum xi e^(-lambda xi) */ double xxesum; /* \sum xi^2 e^(-lambda xi) */ double xsum; /* \sum xi */ double mult; /* histogram count multiplier */ double total; /* total samples */ int i; esum = xesum = xsum = xxesum = total = 0.; for (i = 0; i < n; i++) { mult = (y == NULL) ? 1. : (double) y[i]; xsum += mult * x[i]; xesum += mult * x[i] * exp(-1. * lambda * x[i]); xxesum += mult * x[i] * x[i] * exp(-1. * lambda * x[i]); esum += mult * exp(-1. * lambda * x[i]); total += mult; } *ret_f = 1./lambda - xsum / total + xesum / esum; *ret_df = ((xesum / esum) * (xesum / esum)) - (xxesum / esum) - (1. / (lambda * lambda)); return;}/* Function: Lawless422() * Date: SRE, Mon Nov 17 09:42:48 1997 [St. Louis] * * Purpose: Equation 4.2.2 from [Lawless82], pg. 169, and * its first derivative with respect to lambda, * for finding the ML fit to EVD lambda parameter * for Type I censored data. * This equation gives a result of zero for the maximum * likelihood lambda. * * Can either deal with a histogram or an array. * * Warning: beware overflow/underflow issues! not bulletproof. * * Args: x - array of sample values (or x-axis of a histogram) * y - NULL (or y-axis of a histogram) * n - number of observed samples (or number of histogram bins) * z - number of censored samples * c - censoring value; all observed x_i >= c * lambda - a lambda to test * ret_f - RETURN: 4.2.2 evaluated at lambda * ret_df - RETURN: first derivative of 4.2.2 evaluated at lambda * * Return: (void) */ voidLawless422(float *x, int *y, int n, int z, float c, float lambda, float *ret_f, float *ret_df){ double esum; /* \sum e^(-lambda xi) + z term */ double xesum; /* \sum xi e^(-lambda xi) + z term */ double xxesum; /* \sum xi^2 e^(-lambda xi) + z term */ double xsum; /* \sum xi (no z term) */ double mult; /* histogram count multiplier */ double total; /* total samples */ int i; esum = xesum = xsum = xxesum = total = 0.; for (i = 0; i < n; i++) { mult = (y == NULL) ? 1. : (double) y[i]; xsum += mult * x[i]; esum += mult * exp(-1. * lambda * x[i]); xesum += mult * x[i] * exp(-1. * lambda * x[i]); xxesum += mult * x[i] * x[i] * exp(-1. * lambda * x[i]); total += mult; } /* Add z terms for censored data */ esum += (double) z * exp(-1. * lambda * c); xesum += (double) z * c * exp(-1. * lambda * c); xxesum += (double) z * c * c * exp(-1. * lambda * c); *ret_f = 1./lambda - xsum / total + xesum / esum; *ret_df = ((xesum / esum) * (xesum / esum)) - (xxesum / esum) - (1. / (lambda * lambda)); return;}/* Function: EVDMaxLikelyFit() * Date: SRE, Fri Nov 14 07:56:29 1997 [St. Louis] * * Purpose: Given a list or a histogram of EVD-distributed samples, * find maximum likelihood parameters lambda and * mu. * * Algorithm: Uses approach described in [Lawless82]. Solves * for lambda using Newton/Raphson iterations; * then substitutes lambda into Lawless' equation 4.1.5 * to get mu. * * Newton/Raphson algorithm developed from description in * Numerical Recipes in C [Press88]. * * Args: x - list of EVD distributed samples or x-axis of histogram * c - NULL, or y-axis of histogram * n - number of samples, or number of histogram bins * ret_mu : RETURN: ML estimate of mu * ret_lambda : RETURN: ML estimate of lambda * * Return: 1 on success; 0 on any failure */intEVDMaxLikelyFit(float *x, int *c, int n, float *ret_mu, float *ret_lambda){ float lambda, mu; float fx; /* f(x) */ float dfx; /* f'(x) */ double esum; /* \sum e^(-lambda xi) */ double mult; double total; float tol = 1e-5; int i; /* 1. Find an initial guess at lambda: linear regression here? */ lambda = 0.2; /* 2. Use Newton/Raphson to solve Lawless 4.1.6 and find ML lambda */ for (i = 0; i < 100; i++) { Lawless416(x, c, n, lambda, &fx, &dfx); if (fabs(fx) < tol) break; /* success */ lambda = lambda - fx / dfx; /* Newton/Raphson is simple */ if (lambda <= 0.) lambda = 0.001; /* but be a little careful */ } /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed. * Resort to a bisection search. Worse convergence speed * but guaranteed to converge (unlike Newton/Raphson). * We assume (!?) that fx is a monotonically decreasing function of x; * i.e. fx > 0 if we are left of the root, fx < 0 if we * are right of the root. */ if (i == 100) { float left, right, mid; SQD_DPRINTF2(("EVDMaxLikelyFit(): Newton/Raphson failed; switchover to bisection")); /* First we need to bracket the root */ lambda = right = left = 0.2; Lawless416(x, c, n, lambda, &fx, &dfx); if (fx < 0.) { /* fix right; search left. */ do { left -= 0.1; if (left < 0.) { SQD_DPRINTF2(("EVDMaxLikelyFit(): failed to bracket root")); return 0; } Lawless416(x, c, n, left, &fx, &dfx); } while (fx < 0.); } else { /* fix left; search right. */ do { right += 0.1; Lawless416(x, c, n, right, &fx, &dfx); if (right > 100.) { SQD_DPRINTF2(("EVDMaxLikelyFit(): failed to bracket root")); return 0; } } while (fx > 0.); } /* now we bisection search in left/right interval */ for (i = 0; i < 100; i++) { mid = (left + right) / 2.; Lawless416(x, c, n, mid, &fx, &dfx); if (fabs(fx) < tol) break; /* success */ if (fx > 0.) left = mid; else right = mid; } if (i == 100) { SQD_DPRINTF2(("EVDMaxLikelyFit(): even the bisection search failed")); return 0; } lambda = mid; } /* 3. Substitute into Lawless 4.1.5 to find mu */ esum = 0.; total = 0.; for (i = 0; i < n; i++) { mult = (c == NULL) ? 1. : (double) c[i]; esum += mult * exp(-1 * lambda * x[i]); total += mult; } mu = -1. * log(esum / total) / lambda; *ret_lambda = lambda; *ret_mu = mu; return 1;}/* Function: EVDCensoredFit() * Date: SRE, Mon Nov 17 10:01:05 1997 [St. Louis] * * Purpose: Given a /left-censored/ list or histogram of EVD-distributed * samples, as well as the number of censored samples z and the * censoring value c, * find maximum likelihood parameters lambda and * mu. * * Algorithm: Uses approach described in [Lawless82]. Solves * for lambda using Newton/Raphson iterations; * then substitutes lambda into Lawless' equation 4.2.3 * to get mu. * * Newton/Raphson algorithm developed from description in * Numerical Recipes in C [Press88]. * * Args: x - list of EVD distributed samples or x-axis of histogram * y - NULL, or y-axis of histogram * n - number of observed samples,or number of histogram bins * z - number of censored samples * c - censoring value (all x_i >= c) * ret_mu : RETURN: ML estimate of mu * ret_lambda : RETURN: ML estimate of lambda * * Return: (void) */intEVDCensoredFit(float *x, int *y, int n, int z, float c, float *ret_mu, float *ret_lambda){ float lambda, mu; float fx; /* f(x) */ float dfx; /* f'(x) */ double esum; /* \sum e^(-lambda xi) */ double mult; double total; float tol = 1e-5; int i; /* 1. Find an initial guess at lambda: linear regression here? */ lambda = 0.2; /* 2. Use Newton/Raphson to solve Lawless 4.2.2 and find ML lambda */ for (i = 0; i < 100; i++) { Lawless422(x, y, n, z, c, lambda, &fx, &dfx); if (fabs(fx) < tol) break; /* success */ lambda = lambda - fx / dfx; /* Newton/Raphson is simple */ if (lambda <= 0.) lambda = 0.001; /* but be a little careful */ } /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed. * Resort to a bisection search. Worse convergence speed * but guaranteed to converge (unlike Newton/Raphson). * We assume (!?) that fx is a monotonically decreasing function of x; * i.e. fx > 0 if we are left of the root, fx < 0 if we * are right of the root. */ if (i == 100) { float left, right, mid; /* First we need to bracket the root */ SQD_DPRINTF2(("EVDCensoredFit(): Newton/Raphson failed; switched to bisection")); lambda = right = left = 0.2; Lawless422(x, y, n, z, c, lambda, &fx, &dfx); if (fx < 0.) { /* fix right; search left. */ do { left -= 0.03; if (left < 0.) { SQD_DPRINTF2(("EVDCensoredFit(): failed to bracket root")); return 0; } Lawless422(x, y, n, z, c, left, &fx, &dfx); } while (fx < 0.); } else { /* fix left; search right. */ do { right += 0.1; Lawless422(x, y, n, z, c, left, &fx, &dfx); if (right > 100.) { SQD_DPRINTF2(("EVDCensoredFit(): failed to bracket root")); return 0; } } while (fx > 0.); } /* now we bisection search in left/right interval */ for (i = 0; i < 100; i++) { mid = (left + right) / 2.; Lawless422(x, y, n, z, c, left, &fx, &dfx); if (fabs(fx) < tol) break; /* success */ if (fx > 0.) left = mid; else right = mid; } if (i == 100) { SQD_DPRINTF2(("EVDCensoredFit(): even the bisection search failed")); return 0; } lambda = mid; } /* 3. Substitute into Lawless 4.2.3 to find mu */ esum = total = 0.; for (i = 0; i < n; i++) { mult = (y == NULL) ? 1. : (double) y[i]; esum += mult * exp(-1. * lambda * x[i]); total += mult; } esum += (double) z * exp(-1. * lambda * c); /* term from censored data */ mu = -1. * log(esum / total) / lambda; *ret_lambda = lambda; *ret_mu = mu; return 1;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -