⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scaleswn.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 5 页
字号:
      var = pu->r_u.rg.mean_var = s2_score/(double)(j-1);      pu->r_u.rg.mean_var_sqrt = sqrt(var);    }    else {      pu->r_u.rg.mu = 50.0;      pu->r_u.rg.mean_var = 10.0;      pu->r_u.rg.mean_var_sqrt = sqrt(10.0);    }    if (pu->r_u.rg.mean_var < 0.01) {      pu->r_u.rg.mean_var = (pu->r_u.rg.mu > 1.0) ? pu->r_u.rg.mu: 1.0;    }    if (pu->r_u.rg.n_trimmed < LHISTC) {      /*	fprintf(stderr,"nprune %d at %d\n",nprune,nit);	*/      break;    }  }  if (ppst->zsflag < 10) s_string[0]='\0';  else if (ppst->zs_win > 0)    sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win);  else     sprintf(s_string,"(shuffled [%d])",nstats);  sprintf(f_string,#ifndef NORMAL_DIST	  "%s Unscaled statistics: mu= %6.4f  var=%6.4f; Lambda= %6.4f",	  s_string, pu->r_u.rg.mu,pu->r_u.rg.mean_var,PI_SQRT6/sqrt(pu->r_u.rg.mean_var_sqrt)#else	  "%s Unscaled normal statistics: mu= %6.4f  var=%6.4f Ztrim: %d",	  s_string, pu->r_u.rg.mu,pu->r_u.rg.mean_var, pu->r_u.rg.n_trimmed#endif	  );  return AVE_STATS;}/*This routine calculates the maximum likelihood estimates for theextreme value distribution exp(-exp(-(-x-a)/b)) using the formula	<lambda> = x_m - sum{ x[i] * exp (-x[i]<lambda>)}/sum{exp (-x[i]<lambda>)}	<a> = -<1/lambda> log ( (1/nlib) sum { exp(-x[i]/<lambda> } )	The <a> parameter can be transformed into and K	of the formula: 1 - exp ( - K m n exp ( - lambda S ))	using the transformation: 1 - exp ( -exp -(lambda S + log(K m n) ))			1 - exp ( -exp( - lambda ( S + log(K m n) / lambda))			a = log(K m n) / lambda			a lambda = log (K m n)			exp(a lambda)  = K m n 	 but from above: a lambda = log (1/nlib sum{exp( -x[i]*lambda)})	 so:            K m n = (1/n sum{ exp( -x[i] *lambda)})			K = sum{}/(nlib m n )*/voidalloc_hist(struct llen_str *llen){  int max_llen, i;  max_llen = llen->max;  if (llen->hist == NULL) {    llen->hist = (int *)calloc((size_t)(max_llen+1),sizeof(int));    llen->score_sums = (double *)calloc((size_t)(max_llen + 1),sizeof(double));    llen->score2_sums =(double *)calloc((size_t)(max_llen + 1),sizeof(double));    llen->score_var = (double *)calloc((size_t)(max_llen + 1),sizeof(double));  }  for (i=0; i< max_llen+1; i++) {      llen->hist[i] = 0;      llen->score_var[i] = llen->score_sums[i] = llen->score2_sums[i] = 0.0;  }}  voidfree_hist(struct llen_str *llen){  if (llen->hist!=NULL) {    free(llen->score_var);    free(llen->score2_sums);    free(llen->score_sums);    free(llen->hist);    llen->hist=NULL;  }}voidinithist(struct llen_str *llen, struct pstruct *ppst, int max_hscore){  llen->max = MAX_LLEN;  llen->max_score = -1;  llen->min_score=10000;  alloc_hist(llen);  llen->zero_s = 0;  llen->min_length = 10000;  llen->max_length = 0;}voidaddhist(struct llen_str *llen, int score, int length, int max_hscore){  int llength;   double dscore;  if ( length < LENGTH_CUTOFF) {    llen->min_score = 0;    llen->zero_s++;    return;  }  if (score < llen->min_score) llen->min_score = score;  if (score > llen->max_score) llen->max_score = score;  if (length > llen->max_length) llen->max_length = length;  if (length < llen->min_length) llen->min_length = length;  if (score > max_hscore) score = max_hscore;#ifdef LOCAL_SCORE  llength = (int)(LN_FACT*log((double)length)+0.5);#else  llength = (int)(LN_FACT*log((double)length)+0.5);  /*  llength = length; */#endif  if (llength < 0 ) llength = 0;  if (llength > llen->max) llength = llen->max;  llen->hist[llength]++;  dscore = (double)score;  llen->score_sums[llength] += dscore;  llen->score2_sums[llength] += dscore * dscore;}/* histogram will go from z-scores of 20 .. 100 with mean 50 and z=10 */voidinithistz(int mh, struct hist_str *histp ){  int i;  histp->z_calls = 0;  histp->min_hist = 20;  histp->max_hist = 120;  histp->histint = (int)    ((double)(histp->max_hist - histp->min_hist + 2)/(double)mh+0.5);  histp->maxh = (int)    ((double)(histp->max_hist - histp->min_hist + 2)/(double)histp->histint+0.5);  if (histp->hist_a==NULL) {    if ((histp->hist_a=(int *)calloc((size_t)histp->maxh,sizeof(int)))==	NULL) {      fprintf(stderr," cannot allocate %d for histogram\n",histp->maxh);      histp->histflg = 0;    }    else histp->histflg = 1;  }  else {    for (i=0; i<histp->maxh; i++) histp->hist_a[i]=0;  }  histp->entries = 0;}static double nrv[100]={  0.3098900570,-0.0313400923, 0.1131975903,-0.2832547606, 0.0073672659,  0.2914489107, 0.4209306311,-0.4630181404, 0.3326537896, 0.0050140359, -0.1117435426,-0.2835630301, 0.2302997065,-0.3102716394, 0.0819894916, -0.1676455701,-0.3782225018,-0.3204509938,-0.3594969187,-0.0308950398,  0.2922813812, 0.1337170751, 0.4666577031,-0.2917784349,-0.2438179916,  0.3002301394, 0.0231147123, 0.5687927366,-0.2318208709,-0.1476839273, -0.0385043851,-0.1213476523, 0.1486341995, 0.1027917167, 0.1409192644, -0.3280652579, 0.4232041455, 0.0775993309, 0.1159071787, 0.2769424442,  0.3197284751, 0.1507346903, 0.0028580909, 0.4825103412,-0.0496843610, -0.2754357656, 0.6021881753,-0.0816123956,-0.0899148991, 0.4847183201,  0.2151621865,-0.4542246220, 0.0690709102, 0.2461894193, 0.2126042295, -0.0767060668, 0.4819746149, 0.3323031326, 0.0177600676, 0.1143185210,  0.2653977455, 0.0921872958,-0.1330986718, 0.0412287716,-0.1691604748, -0.0529679078,-0.0194157955,-0.6117493924, 0.1199067932, 0.0210243193, -0.5832259838,-0.1685528664, 0.0008591271,-0.1120347822, 0.0839125069, -0.2787486831,-0.1937017962,-0.1915733940,-0.7888453635,-0.3316745163,  0.1180885226,-0.3347001067,-0.2477492636,-0.2445697600, 0.0001342482, -0.0015759812,-0.1516473992,-0.5202267615, 0.2136975210, 0.2500423188, -0.2402926401,-0.1094186280,-0.0618869933,-0.0815221188, 0.2623337275,  0.0219427302 -0.1774469919, 0.0828245026,-0.3271952808,-0.0632898028};voidaddhistz(double zs, struct hist_str *histp){  int ih, zi;  double rv;   rv = nrv[histp->z_calls++ % 100];  zi = (int)(zs + 0.5+rv );  if ((zi >= 0) && (zi <= 120)) histp->entries++;  if (zi < histp->min_hist) zi = histp->min_hist;  if (zi > histp->max_hist) zi = histp->max_hist;    ih = (zi - histp->min_hist)/histp->histint;  histp->hist_a[ih]++;}/* addhistzp() does not increase histp->entries since addhist did it already *//*voidaddhistzp(double zs, struct hist_str *histp){  int ih, zi;  double rv;  rv = nrv[histp->z_calls++ %100];  zi = (int)(zs + 0.5 + rv);  if (zi < histp->min_hist) zi = histp->min_hist;  if (zi > histp->max_hist) zi = histp->max_hist;    ih = (zi - histp->min_hist)/histp->histint;  histp->hist_a[ih]++;}*/voidprune_hist(struct llen_str *llen, int score, int length, int max_hscore,	   long *entries){  int llength;  double dscore;#ifdef LOCAL_SCORE  if (score <= 0 || length < LENGTH_CUTOFF) return;#endif  if (score > max_hscore) score = max_hscore;#ifdef LOCAL_SCORE  llength = (int)(LN_FACT*log((double)length)+0.5);#else  llength = (int)(LN_FACT*log((double)length)+0.5);  /*  llength = length; */#endif  if (llength < 0 ) llength = 0;  if (llength > llen->max) llength = llen->max;  llen->hist[llength]--;  dscore = (double)score;  llen->score_sums[llength] -= dscore;  llen->score2_sums[llength] -= dscore * dscore;/*  (*entries)--; histp->entries is not yet initialized */}  /* fit_llen: no trimming   (1) regress scores vs log(n) using weighted variance   (2) calculate mean variance after length regression*/voidfit_llen(struct llen_str *llen, struct rstat_str *pr){  int j;  int n;  int n_size;  double x, y2, u, z;  double mean_x, mean_y, var_x, var_y, covar_xy;  double mean_y2, covar_xy2, var_y2, dllj;  double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;  /* now fit scores to best linear function of log(n), using   simple linear regression */    for (llen->min=0; llen->min < llen->max; llen->min++)    if (llen->hist[llen->min]) break;  llen->min--;  for (n_size=0,j = llen->min; j < llen->max; j++) {    if (llen->hist[j] > 1) {      dllj = (double)llen->hist[j];      llen->score_var[j] = llen->score2_sums[j]/dllj	- (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);      llen->score_var[j] /= (double)(llen->hist[j]-1);      if (llen->score_var[j] <= 0.1 ) llen->score_var[j] = 0.1;      n_size++;    }  }  pr->nb_tot = n_size;  n_w = 0.0;  sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;  for (j = llen->min; j < llen->max; j++)    if (llen->hist[j] > 1) {      x = j + 0.5;      dllj = (double)llen->hist[j];      n_w += dllj/llen->score_var[j];      sum_x +=   dllj * x / llen->score_var[j] ;      sum_y += llen->score_sums[j] / llen->score_var[j];      sum_x2 +=  dllj * x * x /llen->score_var[j];      sum_xy +=  x * llen->score_sums[j]/llen->score_var[j];    }  if (n_size < 5 ) {    llen->fit_flag=0;    pr->rho = 0;    pr->mu = sum_y/n_w;    return;  }  else {    det = n_w * sum_x2 - sum_x * sum_x;    if (det > 0.001) {      pr->rho = (n_w * sum_xy  - sum_x * sum_y)/det;      pr->rho_e = n_w/det;      pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;      pr->mu_e = sum_x2/det;    }    else {      llen->fit_flag = 0;      pr->rho = 0;      pr->mu = sum_y/n_w;      return;    }  }  det = n_w * sum_x2 - sum_x * sum_x;  pr->rho = (n_w * sum_xy  - sum_x * sum_y)/det;  pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;  n = 0;  mean_x = mean_y = mean_y2 = 0.0;  var_x = var_y = 0.0;  covar_xy = covar_xy2 = 0.0;  for (j = llen->min; j <= llen->max; j++)    if (llen->hist[j] > 1 ) {      n += llen->hist[j];      x = (double)j + 0.5;      mean_x += (double)llen->hist[j] * x;      mean_y += llen->score_sums[j];      var_x += (double)llen->hist[j] * x * x;      var_y += llen->score2_sums[j];      covar_xy += x * llen->score_sums[j];    }  mean_x /= n; mean_y /= n;  var_x = var_x / n - mean_x * mean_x;  var_y = var_y / n - mean_y * mean_y;    covar_xy = covar_xy / n - mean_x * mean_y;/*  pr->rho = covar_xy / var_x;  pr->mu = mean_y - pr->rho * mean_x;*/  mean_y2 = covar_xy2 = var_y2 = 0.0;  for (j = llen->min; j <= llen->max; j++)     if (llen->hist[j] > 1) {      x = (double)j + 0.5;      u = pr->rho * x + pr->mu;      y2 = llen->score2_sums[j] - 2.0 * llen->score_sums[j] * u + llen->hist[j] * u * u;/*      dllj = (double)llen->hist[j];      fprintf(stderr,"%.2f\t%d\t%g\t%g\n",x/LN_FACT,llen->hist[j],	      llen->score_sums[j]/dllj,y2/dllj);*/      mean_y2 += y2;      var_y2 += y2 * y2;      covar_xy2 += x * y2;      /*      fprintf(stderr,"%6.1f %4d %8d %8d %7.2f %8.2f\n",	      x,llen->hist[j],llen->score_sums[j],llen->score2_sums[j],u,y2); */    }    pr->mean_var = mean_y2 /= (double)n;  pr->mean_var_sqrt = sqrt(pr->mean_var);  covar_xy2 = covar_xy2 / (double)n - mean_x * mean_y2;  if (pr->mean_var <= 0.01) {    llen->fit_flag = 0;    pr->mean_var = (pr->mu > 1.0) ? pr->mu: 1.0;  }  /*  fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var %.4f\n",	  pr->rho*LN_FACT,pr->mu,pr->mean_var);  */  if (n > 1) pr->var_e = (var_y2/n - mean_y2 * mean_y2)/(n-1);  else pr->var_e = 0.0;  if (llen->fit_flag) {    pr->rho2 = covar_xy2 / var_x;    pr->mu2 = pr->mean_var - pr->rho2 * mean_x;  }  else {    pr->rho2 = 0;    pr->mu2 = pr->mean_var;  }  if (pr->rho2 < 0.0 )    z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);  else z =  pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;  if (z < 2*LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;  pr->var_cutoff = pr->rho2 * LN_FACT*log(z) + pr->mu2;}/* fit_llens: trim high variance bins   (1) regress scores vs log(n) using weighted variance   (2) regress residuals vs log(n)   (3) remove high variance bins   (4) calculate mean variance after length regression*/voidfit_llens(struct llen_str *llen, struct rstat_str *pr){  int j;  int n, n_u2;  double x, y, y2, u, u2, v, z;  double mean_x, mean_y, var_x, var_y, covar_xy;  double mean_y2, covar_xy2;  double mean_u2, mean_3u2, dllj;  double sum_x, sum_y, sum_x2, sum_xy, sum_v, det, n_w;/* now fit scores to best linear function of log(n), using   simple linear regression */    for (llen->min=0; llen->min < llen->max; llen->min++)    if (llen->hist[llen->min]) break;  llen->min--;  for (j = llen->min; j < llen->max; j++) {    if (llen->hist[j] > 1) {      dllj = (double)llen->hist[j];      llen->score_var[j] = (double)llen->score2_sums[j]/dllj	- (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);      llen->score_var[j] /= (double)(llen->hist[j]-1);      if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0;    }  }	    n_w = 0.0;  sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;  for (j = llen->min; j < llen->max; j++)    if (llen->hist[j] > 1) {      x = j + 0.5;      dllj = (double)llen->hist[j];      n_w += dllj/llen->score_var[j];      sum_x +=   dllj * x / llen->score_var[j] ;      sum_y += llen->score_sums[j] / llen->score_var[j];      sum_x2 +=  dllj * x * x /llen->score_var[j];      sum_xy +=  x * llen->score_sums[j]/llen->score_var[j];    }  det = n_w * sum_x2 - sum_x * sum_x;  pr->rho = (n_w * sum_xy  - sum_x * sum_y)/det;  pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/det;/* printf(" rho1/mu1: %.2f/%.2f\n",pr->rho*LN_FACT,pr->mu); */  n = 0;  mean_x = mean_y = mean_y2 = 0.0;  var_x = var_y = 0.0;  covar_xy = covar_xy2 = 0.0;  for (j = llen->min; j <= llen->max; j++)     if (llen->hist[j] > 1 ) {      n += llen->hist[j];      x = (double)j + 0.5;      dllj = (double)llen->hist[j];      mean_x += dllj * x;      mean_y += llen->score_sums[j];      var_x += dllj * x * x;      var_y += llen->score2_sums[j];      covar_xy += x * llen->score_sums[j];    }  mean_x /= n; mean_y /= n;  var_x = var_x / n - mean_x * mean_x;  var_y = var_y / n - mean_y * mean_y;  

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -