📄 scaleswn.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 5 页
字号:
	  pu->r_u.rg.rho*LN_FACT,sqrt(pu->r_u.rg.rho_e),pu->r_u.rg.mu,sqrt(pu->r_u.rg.mu_e),	  pu->r_u.rg.mean_var,sqrt(pu->r_u.rg.var_e),llen.zero_s,pu->r_u.rg.n_trimmed, nit,	  PI_SQRT6/sqrt(pu->r_u.rg.mean_var));  return REGI_STATS;}/* this procedure implements Altschul's pre-calculated values for lambda, K */#include "alt_parms.h"intlook_p(struct alt_p parm[], int gap, int ext,        double *K, double *Lambda, double *H);intproc_hist_a(struct stat_str *sptr, int nstats, 	    struct pstruct *ppst, struct hist_str *histp,	    int do_trim, struct pstat_str *pu){  double Lambda, K, H;  char *f_string;  int r_v;  int t_gdelval, t_ggapval;#ifdef OLD_FASTA_GAP  t_gdelval = ppst->gdelval;  t_ggapval = ppst->ggapval;#else  t_gdelval = ppst->gdelval+ppst->ggapval;  t_ggapval = ppst->ggapval;#endif  f_string = histp->stat_info;  if (strcmp(ppst->pamfile,"BL50")==0 || strcmp(ppst->pamfile,"BLOSUM50")==0)      r_v = look_p(bl50_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else if (strcmp(ppst->pamfile,"BL62")==0 || strcmp(ppst->pamfile,"BLOSUM62")==0)      r_v = look_p(bl62_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else if (strcmp(ppst->pamfile,"BL80")==0 || strcmp(ppst->pamfile,"BLOSUM80")==0)      r_v = look_p(bl80_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else if (strcmp(ppst->pamfile,"P250")==0)      r_v = look_p(p250_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else if (strcmp(ppst->pamfile,"P120")==0)      r_v = look_p(p120_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else if (strcmp(ppst->pamfile,"MD_10")==0)      r_v = look_p(md10_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else if (strcmp(ppst->pamfile,"MD_20")==0)      r_v = look_p(md20_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else if (strcmp(ppst->pamfile,"MD_40")==0)      r_v = look_p(md40_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else if (strcmp(ppst->pamfile,"OPT5")==0)      r_v = look_p(opt5_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else if (strcmp(ppst->pamfile,"DNA")==0 || strcmp(ppst->pamfile,"+5/-4")==0)      r_v = look_p(nt54_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else if (strcmp(ppst->pamfile,"+3/-2")==0)      r_v = look_p(nt32_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else if (strcmp(ppst->pamfile,"+1/-3")==0)      r_v = look_p(nt13_p,t_gdelval,t_ggapval,&K,&Lambda,&H);  else r_v = 0;  pu->r_u.ag.Lambda = Lambda;  pu->r_u.ag.K = K;  pu->r_u.ag.H = H;  if (r_v == 0) {    fprintf(stderr,"Parameters not available for: %s: %d/%d\n",	    ppst->pamfile,t_gdelval,t_ggapval);    find_zp = &find_zr;    return proc_hist_r(sptr, nstats, ppst, histp, do_trim, pu);  }  /*    fprintf(stderr," the parameters are: Lambda: %5.3f K: %5.3f H: %5.3f\n",	    Lambda, K, H);	    */    pu->r_u.ag.a_n0 = (double)ppst->n0;    pu->r_u.ag.a_n0f = log (K * pu->r_u.ag.a_n0)/H;    sprintf(f_string,"Altschul/Gish params: n0: %d Lambda: %5.3f K: %5.3f H: %5.3f",	    ppst->n0,Lambda, K, H);    return AG_STATS;}int ag_parm(char *pamfile, int gdelval, int ggapval, struct pstat_str *pu){  double Lambda, K, H;  int r_v;  if (strcmp(pamfile,"BL50")==0)    r_v = look_p(bl50_p,gdelval,ggapval,&K,&Lambda,&H);  else if (strcmp(pamfile,"BL62")==0)      r_v = look_p(bl62_p,gdelval,ggapval,&K,&Lambda,&H);  else if (strcmp(pamfile,"P250")==0)      r_v = look_p(p250_p,gdelval,ggapval,&K,&Lambda,&H);  else if (strcmp(pamfile,"P120")==0)      r_v = look_p(p120_p,gdelval,ggapval,&K,&Lambda,&H);  else if (strcmp(pamfile,"MD_10")==0)      r_v = look_p(md10_p,gdelval,ggapval,&K,&Lambda,&H);  else if (strcmp(pamfile,"MD_20")==0)      r_v = look_p(md20_p,gdelval,ggapval,&K,&Lambda,&H);  else if (strcmp(pamfile,"MD_40")==0)      r_v = look_p(md40_p,gdelval,ggapval,&K,&Lambda,&H);  else if (strcmp(pamfile,"DNA")==0 || strcmp(pamfile,"+5/-4")==0)      r_v = look_p(nt54_p,gdelval,ggapval, &K,&Lambda,&H);  else if (strcmp(pamfile,"+3/-2")==0)      r_v = look_p(nt32_p,gdelval,ggapval, &K,&Lambda,&H);  else if (strcmp(pamfile,"+1/-3")==0)      r_v = look_p(nt13_p,gdelval,ggapval, &K,&Lambda,&H);  else r_v = 0;  pu->r_u.ag.K = K;  pu->r_u.ag.Lambda = Lambda;  pu->r_u.ag.H = H;  /*  if (r_v == 0) {    fprintf(stderr,"Parameters not available for: %s: %d/%d\n",	    pamfile,gdelval,ggapval);  }  */  return r_v;}intlook_p(struct alt_p parm[], int gap, int ext,       double *K, double *Lambda, double *H){  int i;  gap = -gap;  ext = -ext;  if (gap > parm[1].gap) {    *K = parm[0].K;    *Lambda = parm[0].Lambda;    *H = parm[0].H;    return 1;  }  for (i=1; parm[i].gap > 0; i++) {    if (parm[i].gap > gap) continue;    else if (parm[i].gap == gap && parm[i].ext > ext ) continue;    else if (parm[i].gap == gap && parm[i].ext == ext) {      *K = parm[i].K;      *Lambda = parm[i].Lambda;      *H = parm[i].H;      return 1;    }    else break;  }  return 0;}/* uncensored and censored maximum likelihood estimates developed   by Aaron Mackey based on a preprint from Sean Eddy */int mle_cen  (struct stat_str *, int, int, double, double *, double *);intproc_hist_ml(struct stat_str *sptr, int nstats, 	     struct pstruct *ppst, struct hist_str *histp,	     int do_trim, struct pstat_str *pu){  double f_cen;  char s_string[128];  char *f_string;  f_string = histp->stat_info;  pu->r_u.ag.a_n0 = (double)ppst->n0;  if (ppst->zsflag < 10) s_string[0]='\0';  else if (ppst->zs_win > 0)    sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win);  else     sprintf(s_string,"(shuffled [%d])",nstats);  if (!do_trim) {    if (mle_cen(sptr, nstats, ppst->n0, 0.0, &pu->r_u.ag.Lambda, &pu->r_u.ag.K) == -1)      goto bad_mle;    sprintf(f_string,"%s MLE statistics: Lambda= %6.4f;  K=%6.4g",	    s_string,pu->r_u.ag.Lambda,pu->r_u.ag.K);  }  else {    if (nstats/20 > 1000) f_cen = 1000.0/(double)nstats;    else f_cen = 0.05;    if (mle_cen(sptr, nstats, ppst->n0, f_cen, &pu->r_u.ag.Lambda, &pu->r_u.ag.K) == -1)      goto bad_mle;    sprintf(f_string, "MLE_cen statistics: Lambda= %6.4f;  K=%6.4g (cen=%d)",	    pu->r_u.ag.Lambda,pu->r_u.ag.K,(int)((double)nstats*f_cen));  }      return MLE_STATS; bad_mle:  find_zp = &find_zn;    return proc_hist_n(sptr, nstats, ppst, histp, do_trim, pu);}intmle_cen2  (struct stat_str *, int, int, double, double *, double *, double *, double *);intproc_hist_ml2(struct stat_str *sptr, int nstats, 	      struct pstruct *ppst, struct hist_str *histp,	      int do_trim, struct pstat_str *pu){  int i, ns=0, nneg=0;  double f_cen, ave_lambda;  char s_string[128], ex_string[64];  char *f_string;  f_string = histp->stat_info;  pu->r_u.m2.a_n0 = (double)ppst->n0;  if (ppst->zsflag < 10) s_string[0]='\0';  else if (ppst->zs_win > 0)    sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win);  else     sprintf(s_string,"(shuffled [%d])",nstats);  pu->r_u.m2.ave_comp = 0.0;  pu->r_u.m2.max_comp = -1.0;  ns = nneg = 0;  for (i=0; i<nstats; i++) {    if (sptr[i].comp > pu->r_u.m2.max_comp) pu->r_u.m2.max_comp = sptr[i].comp;    if (sptr[i].comp > 0.0) {      pu->r_u.m2.ave_comp += log(sptr[i].comp);      ns++;    }    else nneg++;  }  pu->r_u.m2.ave_comp /= (double)ns;  pu->r_u.m2.ave_comp = exp(pu->r_u.m2.ave_comp);  for (i=0; i<nstats; i++) if (sptr[i].comp < 0.0) {    sptr[i].comp = pu->r_u.m2.ave_comp;  }  if (nneg > 0)    sprintf(ex_string,"composition = -1 for %d sequences",nneg);  else ex_string[0]='\0';  if (!do_trim) {    if (mle_cen2(sptr, nstats, ppst->n0, 0.0,	     &pu->r_u.m2.mle2_a0, &pu->r_u.m2.mle2_a1,	     &pu->r_u.m2.mle2_a2, &pu->r_u.m2.mle2_b1) == -1) goto bad_mle2;    ave_lambda = 1.0/(pu->r_u.m2.ave_comp*pu->r_u.m2.mle2_b1);    sprintf(f_string,"%s MLE-2 statistics: a0= %6.4f;  a1=%6.4f; a2=%6.4f; b1=%6.4f\n  ave Lamdba: %6.4f",	    s_string, pu->r_u.m2.mle2_a0, pu->r_u.m2.mle2_a1, pu->r_u.m2.mle2_a2, pu->r_u.m2.mle2_b1,ave_lambda);  }  else {    if (nstats/20 > 500) f_cen = 500.0/(double)nstats;    else f_cen = 0.05;    if (mle_cen2(sptr, nstats, ppst->n0, f_cen, &pu->r_u.m2.mle2_a0, &pu->r_u.m2.mle2_a1, &pu->r_u.m2.mle2_a2, &pu->r_u.m2.mle2_b1)== -1) goto bad_mle2;    ave_lambda = 1.0/(pu->r_u.m2.ave_comp*pu->r_u.m2.mle2_b1);    sprintf(f_string,"%s MLE-2-cen statistics: a0= %6.4f;  a1=%6.4f; a2=%6.4f; b1=%6.4f (cen=%d)\n  ave Lambda:%6.4f",	    s_string, pu->r_u.m2.mle2_a0, pu->r_u.m2.mle2_a1, pu->r_u.m2.mle2_a2, pu->r_u.m2.mle2_b1, (int)((double)nstats*f_cen),ave_lambda);  }      return MLE2_STATS; bad_mle2:  find_zp = &find_zn;  return proc_hist_n(sptr, nstats, ppst, histp, do_trim, pu);}double first_deriv_cen(double lambda, struct stat_str *sptr, 		       int start, int stop,		       double sumlenL, double cenL,		       double sumlenH, double cenH);double second_deriv_cen(double lambda, struct stat_str *sptr,			int start, int stop,			double sumlenL, double cenL,			double sumlenH, double cenH);static voidst_sort (struct stat_str *v, int n) {  struct stat_str tmp_str;  int gap, i, j;  int tmp;  double dtmp;  for (gap = 1; gap < n/3; gap = 3*gap +1) ;  for (; gap > 0; gap = (gap-1)/3) {    for (i = gap; i < n; i++) {      for (j = i - gap; j >= 0; j -= gap) {	if (v[j].score <= v[j + gap].score) break;	tmp = v[j].score;	v[j].score = v[j + gap].score;	v[j + gap].score = tmp;	tmp = v[j].n1;	v[j].n1 = v[j + gap].n1;	v[j + gap].n1 = tmp;	dtmp = v[j].comp;	v[j].comp = v[j + gap].comp;	v[j + gap].comp = tmp;	dtmp = v[j].H;	v[j].H = v[j + gap].H;	v[j + gap].H = tmp;      }    }  }}/* sptr[].score, sptr[].n1; sptr[] must be sorted   int n = total number of samples   int M = length of query   double fn = fraction of scores to be censored fn/2.0 from top, bottom   double *Lambda = Lambda estimate   double *K = K estimate*/#define MAX_NIT 100intmle_cen(struct stat_str *sptr, int n, int M, double fc,	double *Lambda, double *K) {  double sumlenL, sumlenH, cenL, cenH;  double sum_s, sum2_s, mean_s, var_s, dtmp;  int start, stop;  int i, nf;  int nit = 0;  double deriv, deriv2, lambda, old_lambda, sum = 0.0;  /*   int sumlenL, int sumlenghtsR = sum of low (Left), right (High) seqs.   int cenL, cenH = censoring score low, high   */  nf = (fc/2.0) * n;  start = nf;  stop = n - nf;  st_sort(sptr,n);  sum_s = sum2_s = 0.0;  for (i=start; i<stop; i++) {    sum_s += sptr[i].score;  }  dtmp = (double)(stop-start);  mean_s = sum_s/dtmp;  for (i=start; i<stop; i++) {    sum2_s += sptr[i].score * sptr[i].score;  }  var_s = sum2_s/(dtmp-1.0);  sumlenL = sumlenH = 0.0;  for (i=0; i<start; i++) sumlenL += (double)sptr[i].n1;  for (i=stop; i<n; i++) sumlenH += (double)sptr[i].n1;  if (nf > 0) {    cenL = (double)sptr[start].score;    cenH = (double)sptr[stop].score;  }  else {    cenL = (double)sptr[start].score/2.0;    cenH = (double)sptr[start].score*2.0;  }  if (cenL >= cenH) return -1;  /* initial guess for lambda is 0.2 - this does not work for matrices     with very different scales */  /*  lambda = 0.2; */  lambda = PI_SQRT6/sqrt(var_s);  if (lambda > 1.0) {    fprintf(stderr," Lambda initial estimate error: lambda: %6.4g; var_s: %6.4g\n",lambda,var_s);    lambda = 0.2;  }  do {    deriv = first_deriv_cen(lambda, sptr, start, stop,			    sumlenL, cenL, sumlenH, cenH);    /*   (uncensored version)	 first_deriv(lambda, &sptr[start], stop - start))    */    /*  (uncensored version)    deriv2 = second_deriv(lambda, &sptr[start], stop-start);    */    deriv2 = second_deriv_cen(lambda, sptr, start, stop,			     sumlenL, cenL, sumlenH, cenH);     old_lambda = lambda;    if (lambda - deriv/deriv2 > 0.0) lambda = lambda - deriv/deriv2;    else lambda = lambda/2.0;    nit++;  } while (fabs((lambda - old_lambda)/lambda) > TINY && nit < MAX_NIT);  /*  fprintf(stderr," mle_cen nit: %d\n",nit); */  if (nit >= MAX_NIT) return -1;    for(i = start; i < stop ; i++) {    sum += (double) sptr[i].n1 * exp(- lambda * (double)sptr[i].score);  }  *Lambda = lambda;  /*   *K = (double)(stop-start)/((double)M*sum);  */  *K = (double)n/((double)M*		  (sum+sumlenL*exp(-lambda*cenL)-sumlenH*exp(-lambda*cenH)));  return 0;}/*doublefirst_deriv(double lambda, struct stat_str *sptr, int n) {  int i;  double sum = 0.0, sum1 = 0.0, sum2 = 0.0;  double s, l, es;  for(i = 0 ; i < n ; i++) {    s = (double)sptr[i].score;    l = (double)sptr[i].n1;    es = exp(-lambda * s );    sum += s;    sum2 += l * es;    sum1 += s * l * es;  }  return (1.0/lambda) - (sum/(double)n) + (sum1/sum2);}*//*doublesecond_deriv(double lambda, struct stat_str *sptr, int n) {  double sum1 = 0.0, sum2 = 0.0, sum3 = 0.0;  double s, l, es;  int i;  for(i = 0 ; i < n ; i++) {    l = (double)sptr[i].n1;    s = (double)sptr[i].score;    es = exp(-lambda * s);    sum2 += l * es;    sum1 += l * s * es;    sum3 += l * s * s * es;  }  return ((sum1*sum1)/(sum2*sum2)) - (sum3/sum2) -  (1.0/(lambda*lambda));}*/doublefirst_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop,		double sumlenL, double cenL, double sumlenH, double cenH) {  int i;  double sum = 0.0, sum1 = 0.0, sum2 = 0.0;  double s, l, es;  for(i = start ; i < stop ; i++) {    s = (double)sptr[i].score;    l = (double)sptr[i].n1;    es = exp(-lambda * s );    sum += s;    sum2 += l * es;    sum1 += s * l * es;  }  sum1 += sumlenL*cenL*exp(-lambda*cenL) - sumlenH*cenH*exp(-lambda*cenH);  sum2 += sumlenL*exp(-lambda*cenL) - sumlenH*exp(-lambda*cenH);  return (1.0 / lambda) - (sum /(double)(stop-start)) + (sum1 / sum2);}doublesecond_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop,		 double sumlenL, double cenL, double sumlenH, double cenH) {
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -