📄 scaleswn.c
字号:
var = pu->r_u.rg.mean_var = s2_score/(double)(j-1); pu->r_u.rg.mean_var_sqrt = sqrt(var); } else { pu->r_u.rg.mu = 50.0; pu->r_u.rg.mean_var = 10.0; pu->r_u.rg.mean_var_sqrt = sqrt(10.0); } if (pu->r_u.rg.mean_var < 0.01) { pu->r_u.rg.mean_var = (pu->r_u.rg.mu > 1.0) ? pu->r_u.rg.mu: 1.0; } if (pu->r_u.rg.n_trimmed < LHISTC) { /* fprintf(stderr,"nprune %d at %d\n",nprune,nit); */ break; } } if (ppst->zsflag < 10) s_string[0]='\0'; else if (ppst->zs_win > 0) sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win); else sprintf(s_string,"(shuffled [%d])",nstats); sprintf(f_string,#ifndef NORMAL_DIST "%s Unscaled statistics: mu= %6.4f var=%6.4f; Lambda= %6.4f", s_string, pu->r_u.rg.mu,pu->r_u.rg.mean_var,PI_SQRT6/sqrt(pu->r_u.rg.mean_var_sqrt)#else "%s Unscaled normal statistics: mu= %6.4f var=%6.4f Ztrim: %d", s_string, pu->r_u.rg.mu,pu->r_u.rg.mean_var, pu->r_u.rg.n_trimmed#endif ); return AVE_STATS;}/*This routine calculates the maximum likelihood estimates for theextreme value distribution exp(-exp(-(-x-a)/b)) using the formula <lambda> = x_m - sum{ x[i] * exp (-x[i]<lambda>)}/sum{exp (-x[i]<lambda>)} <a> = -<1/lambda> log ( (1/nlib) sum { exp(-x[i]/<lambda> } ) The <a> parameter can be transformed into and K of the formula: 1 - exp ( - K m n exp ( - lambda S )) using the transformation: 1 - exp ( -exp -(lambda S + log(K m n) )) 1 - exp ( -exp( - lambda ( S + log(K m n) / lambda)) a = log(K m n) / lambda a lambda = log (K m n) exp(a lambda) = K m n but from above: a lambda = log (1/nlib sum{exp( -x[i]*lambda)}) so: K m n = (1/n sum{ exp( -x[i] *lambda)}) K = sum{}/(nlib m n )*/voidalloc_hist(struct llen_str *llen){ int max_llen, i; max_llen = llen->max; if (llen->hist == NULL) { llen->hist = (int *)calloc((size_t)(max_llen+1),sizeof(int)); llen->score_sums = (double *)calloc((size_t)(max_llen + 1),sizeof(double)); llen->score2_sums =(double *)calloc((size_t)(max_llen + 1),sizeof(double)); llen->score_var = (double *)calloc((size_t)(max_llen + 1),sizeof(double)); } for (i=0; i< max_llen+1; i++) { llen->hist[i] = 0; llen->score_var[i] = llen->score_sums[i] = llen->score2_sums[i] = 0.0; }} voidfree_hist(struct llen_str *llen){ if (llen->hist!=NULL) { free(llen->score_var); free(llen->score2_sums); free(llen->score_sums); free(llen->hist); llen->hist=NULL; }}voidinithist(struct llen_str *llen, struct pstruct *ppst, int max_hscore){ llen->max = MAX_LLEN; llen->max_score = -1; llen->min_score=10000; alloc_hist(llen); llen->zero_s = 0; llen->min_length = 10000; llen->max_length = 0;}voidaddhist(struct llen_str *llen, int score, int length, int max_hscore){ int llength; double dscore; if ( length < LENGTH_CUTOFF) { llen->min_score = 0; llen->zero_s++; return; } if (score < llen->min_score) llen->min_score = score; if (score > llen->max_score) llen->max_score = score; if (length > llen->max_length) llen->max_length = length; if (length < llen->min_length) llen->min_length = length; if (score > max_hscore) score = max_hscore;#ifdef LOCAL_SCORE llength = (int)(LN_FACT*log((double)length)+0.5);#else llength = (int)(LN_FACT*log((double)length)+0.5); /* llength = length; */#endif if (llength < 0 ) llength = 0; if (llength > llen->max) llength = llen->max; llen->hist[llength]++; dscore = (double)score; llen->score_sums[llength] += dscore; llen->score2_sums[llength] += dscore * dscore;}/* histogram will go from z-scores of 20 .. 100 with mean 50 and z=10 */voidinithistz(int mh, struct hist_str *histp ){ int i; histp->z_calls = 0; histp->min_hist = 20; histp->max_hist = 120; histp->histint = (int) ((double)(histp->max_hist - histp->min_hist + 2)/(double)mh+0.5); histp->maxh = (int) ((double)(histp->max_hist - histp->min_hist + 2)/(double)histp->histint+0.5); if (histp->hist_a==NULL) { if ((histp->hist_a=(int *)calloc((size_t)histp->maxh,sizeof(int)))== NULL) { fprintf(stderr," cannot allocate %d for histogram\n",histp->maxh); histp->histflg = 0; } else histp->histflg = 1; } else { for (i=0; i<histp->maxh; i++) histp->hist_a[i]=0; } histp->entries = 0;}static double nrv[100]={ 0.3098900570,-0.0313400923, 0.1131975903,-0.2832547606, 0.0073672659, 0.2914489107, 0.4209306311,-0.4630181404, 0.3326537896, 0.0050140359, -0.1117435426,-0.2835630301, 0.2302997065,-0.3102716394, 0.0819894916, -0.1676455701,-0.3782225018,-0.3204509938,-0.3594969187,-0.0308950398, 0.2922813812, 0.1337170751, 0.4666577031,-0.2917784349,-0.2438179916, 0.3002301394, 0.0231147123, 0.5687927366,-0.2318208709,-0.1476839273, -0.0385043851,-0.1213476523, 0.1486341995, 0.1027917167, 0.1409192644, -0.3280652579, 0.4232041455, 0.0775993309, 0.1159071787, 0.2769424442, 0.3197284751, 0.1507346903, 0.0028580909, 0.4825103412,-0.0496843610, -0.2754357656, 0.6021881753,-0.0816123956,-0.0899148991, 0.4847183201, 0.2151621865,-0.4542246220, 0.0690709102, 0.2461894193, 0.2126042295, -0.0767060668, 0.4819746149, 0.3323031326, 0.0177600676, 0.1143185210, 0.2653977455, 0.0921872958,-0.1330986718, 0.0412287716,-0.1691604748, -0.0529679078,-0.0194157955,-0.6117493924, 0.1199067932, 0.0210243193, -0.5832259838,-0.1685528664, 0.0008591271,-0.1120347822, 0.0839125069, -0.2787486831,-0.1937017962,-0.1915733940,-0.7888453635,-0.3316745163, 0.1180885226,-0.3347001067,-0.2477492636,-0.2445697600, 0.0001342482, -0.0015759812,-0.1516473992,-0.5202267615, 0.2136975210, 0.2500423188, -0.2402926401,-0.1094186280,-0.0618869933,-0.0815221188, 0.2623337275, 0.0219427302 -0.1774469919, 0.0828245026,-0.3271952808,-0.0632898028};voidaddhistz(double zs, struct hist_str *histp){ int ih, zi; double rv; rv = nrv[histp->z_calls++ % 100]; zi = (int)(zs + 0.5+rv ); if ((zi >= 0) && (zi <= 120)) histp->entries++; if (zi < histp->min_hist) zi = histp->min_hist; if (zi > histp->max_hist) zi = histp->max_hist; ih = (zi - histp->min_hist)/histp->histint; histp->hist_a[ih]++;}/* addhistzp() does not increase histp->entries since addhist did it already *//*voidaddhistzp(double zs, struct hist_str *histp){ int ih, zi; double rv; rv = nrv[histp->z_calls++ %100]; zi = (int)(zs + 0.5 + rv); if (zi < histp->min_hist) zi = histp->min_hist; if (zi > histp->max_hist) zi = histp->max_hist; ih = (zi - histp->min_hist)/histp->histint; histp->hist_a[ih]++;}*/voidprune_hist(struct llen_str *llen, int score, int length, int max_hscore, long *entries){ int llength; double dscore;#ifdef LOCAL_SCORE if (score <= 0 || length < LENGTH_CUTOFF) return;#endif if (score > max_hscore) score = max_hscore;#ifdef LOCAL_SCORE llength = (int)(LN_FACT*log((double)length)+0.5);#else llength = (int)(LN_FACT*log((double)length)+0.5); /* llength = length; */#endif if (llength < 0 ) llength = 0; if (llength > llen->max) llength = llen->max; llen->hist[llength]--; dscore = (double)score; llen->score_sums[llength] -= dscore; llen->score2_sums[llength] -= dscore * dscore;/* (*entries)--; histp->entries is not yet initialized */} /* fit_llen: no trimming (1) regress scores vs log(n) using weighted variance (2) calculate mean variance after length regression*/voidfit_llen(struct llen_str *llen, struct rstat_str *pr){ int j; int n; int n_size; double x, y2, u, z; double mean_x, mean_y, var_x, var_y, covar_xy; double mean_y2, covar_xy2, var_y2, dllj; double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w; /* now fit scores to best linear function of log(n), using simple linear regression */ for (llen->min=0; llen->min < llen->max; llen->min++) if (llen->hist[llen->min]) break; llen->min--; for (n_size=0,j = llen->min; j < llen->max; j++) { if (llen->hist[j] > 1) { dllj = (double)llen->hist[j]; llen->score_var[j] = llen->score2_sums[j]/dllj - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj); llen->score_var[j] /= (double)(llen->hist[j]-1); if (llen->score_var[j] <= 0.1 ) llen->score_var[j] = 0.1; n_size++; } } pr->nb_tot = n_size; n_w = 0.0; sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0; for (j = llen->min; j < llen->max; j++) if (llen->hist[j] > 1) { x = j + 0.5; dllj = (double)llen->hist[j]; n_w += dllj/llen->score_var[j]; sum_x += dllj * x / llen->score_var[j] ; sum_y += llen->score_sums[j] / llen->score_var[j]; sum_x2 += dllj * x * x /llen->score_var[j]; sum_xy += x * llen->score_sums[j]/llen->score_var[j]; } if (n_size < 5 ) { llen->fit_flag=0; pr->rho = 0; pr->mu = sum_y/n_w; return; } else { det = n_w * sum_x2 - sum_x * sum_x; if (det > 0.001) { pr->rho = (n_w * sum_xy - sum_x * sum_y)/det; pr->rho_e = n_w/det; pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det; pr->mu_e = sum_x2/det; } else { llen->fit_flag = 0; pr->rho = 0; pr->mu = sum_y/n_w; return; } } det = n_w * sum_x2 - sum_x * sum_x; pr->rho = (n_w * sum_xy - sum_x * sum_y)/det; pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det; n = 0; mean_x = mean_y = mean_y2 = 0.0; var_x = var_y = 0.0; covar_xy = covar_xy2 = 0.0; for (j = llen->min; j <= llen->max; j++) if (llen->hist[j] > 1 ) { n += llen->hist[j]; x = (double)j + 0.5; mean_x += (double)llen->hist[j] * x; mean_y += llen->score_sums[j]; var_x += (double)llen->hist[j] * x * x; var_y += llen->score2_sums[j]; covar_xy += x * llen->score_sums[j]; } mean_x /= n; mean_y /= n; var_x = var_x / n - mean_x * mean_x; var_y = var_y / n - mean_y * mean_y; covar_xy = covar_xy / n - mean_x * mean_y;/* pr->rho = covar_xy / var_x; pr->mu = mean_y - pr->rho * mean_x;*/ mean_y2 = covar_xy2 = var_y2 = 0.0; for (j = llen->min; j <= llen->max; j++) if (llen->hist[j] > 1) { x = (double)j + 0.5; u = pr->rho * x + pr->mu; y2 = llen->score2_sums[j] - 2.0 * llen->score_sums[j] * u + llen->hist[j] * u * u;/* dllj = (double)llen->hist[j]; fprintf(stderr,"%.2f\t%d\t%g\t%g\n",x/LN_FACT,llen->hist[j], llen->score_sums[j]/dllj,y2/dllj);*/ mean_y2 += y2; var_y2 += y2 * y2; covar_xy2 += x * y2; /* fprintf(stderr,"%6.1f %4d %8d %8d %7.2f %8.2f\n", x,llen->hist[j],llen->score_sums[j],llen->score2_sums[j],u,y2); */ } pr->mean_var = mean_y2 /= (double)n; pr->mean_var_sqrt = sqrt(pr->mean_var); covar_xy2 = covar_xy2 / (double)n - mean_x * mean_y2; if (pr->mean_var <= 0.01) { llen->fit_flag = 0; pr->mean_var = (pr->mu > 1.0) ? pr->mu: 1.0; } /* fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var %.4f\n", pr->rho*LN_FACT,pr->mu,pr->mean_var); */ if (n > 1) pr->var_e = (var_y2/n - mean_y2 * mean_y2)/(n-1); else pr->var_e = 0.0; if (llen->fit_flag) { pr->rho2 = covar_xy2 / var_x; pr->mu2 = pr->mean_var - pr->rho2 * mean_x; } else { pr->rho2 = 0; pr->mu2 = pr->mean_var; } if (pr->rho2 < 0.0 ) z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT); else z = pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF; if (z < 2*LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF; pr->var_cutoff = pr->rho2 * LN_FACT*log(z) + pr->mu2;}/* fit_llens: trim high variance bins (1) regress scores vs log(n) using weighted variance (2) regress residuals vs log(n) (3) remove high variance bins (4) calculate mean variance after length regression*/voidfit_llens(struct llen_str *llen, struct rstat_str *pr){ int j; int n, n_u2; double x, y, y2, u, u2, v, z; double mean_x, mean_y, var_x, var_y, covar_xy; double mean_y2, covar_xy2; double mean_u2, mean_3u2, dllj; double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;/* now fit scores to best linear function of log(n), using simple linear regression */ for (llen->min=0; llen->min < llen->max; llen->min++) if (llen->hist[llen->min]) break; llen->min--; for (j = llen->min; j < llen->max; j++) { if (llen->hist[j] > 1) { dllj = (double)llen->hist[j]; llen->score_var[j] = (double)llen->score2_sums[j]/dllj - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj); llen->score_var[j] /= (double)(llen->hist[j]-1); if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0; } } n_w = 0.0; sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0; for (j = llen->min; j < llen->max; j++) if (llen->hist[j] > 1) { x = j + 0.5; dllj = (double)llen->hist[j]; n_w += dllj/llen->score_var[j]; sum_x += dllj * x / llen->score_var[j] ; sum_y += llen->score_sums[j] / llen->score_var[j]; sum_x2 += dllj * x * x /llen->score_var[j]; sum_xy += x * llen->score_sums[j]/llen->score_var[j]; } det = n_w * sum_x2 - sum_x * sum_x; pr->rho = (n_w * sum_xy - sum_x * sum_y)/det; pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;/* printf(" rho1/mu1: %.2f/%.2f\n",pr->rho*LN_FACT,pr->mu); */ n = 0; mean_x = mean_y = mean_y2 = 0.0; var_x = var_y = 0.0; covar_xy = covar_xy2 = 0.0; for (j = llen->min; j <= llen->max; j++) if (llen->hist[j] > 1 ) { n += llen->hist[j]; x = (double)j + 0.5; dllj = (double)llen->hist[j]; mean_x += dllj * x; mean_y += llen->score_sums[j]; var_x += dllj * x * x; var_y += llen->score2_sums[j]; covar_xy += x * llen->score_sums[j]; } mean_x /= n; mean_y /= n; var_x = var_x / n - mean_x * mean_x; var_y = var_y / n - mean_y * mean_y;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -