📄 scaleswn.c
字号:
pu->r_u.rg.rho*LN_FACT,sqrt(pu->r_u.rg.rho_e),pu->r_u.rg.mu,sqrt(pu->r_u.rg.mu_e), pu->r_u.rg.mean_var,sqrt(pu->r_u.rg.var_e),llen.zero_s,pu->r_u.rg.n_trimmed, nit, PI_SQRT6/sqrt(pu->r_u.rg.mean_var)); return REGI_STATS;}/* this procedure implements Altschul's pre-calculated values for lambda, K */#include "alt_parms.h"intlook_p(struct alt_p parm[], int gap, int ext, double *K, double *Lambda, double *H);intproc_hist_a(struct stat_str *sptr, int nstats, struct pstruct *ppst, struct hist_str *histp, int do_trim, struct pstat_str *pu){ double Lambda, K, H; char *f_string; int r_v; int t_gdelval, t_ggapval;#ifdef OLD_FASTA_GAP t_gdelval = ppst->gdelval; t_ggapval = ppst->ggapval;#else t_gdelval = ppst->gdelval+ppst->ggapval; t_ggapval = ppst->ggapval;#endif f_string = histp->stat_info; if (strcmp(ppst->pamfile,"BL50")==0 || strcmp(ppst->pamfile,"BLOSUM50")==0) r_v = look_p(bl50_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else if (strcmp(ppst->pamfile,"BL62")==0 || strcmp(ppst->pamfile,"BLOSUM62")==0) r_v = look_p(bl62_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else if (strcmp(ppst->pamfile,"BL80")==0 || strcmp(ppst->pamfile,"BLOSUM80")==0) r_v = look_p(bl80_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else if (strcmp(ppst->pamfile,"P250")==0) r_v = look_p(p250_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else if (strcmp(ppst->pamfile,"P120")==0) r_v = look_p(p120_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else if (strcmp(ppst->pamfile,"MD_10")==0) r_v = look_p(md10_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else if (strcmp(ppst->pamfile,"MD_20")==0) r_v = look_p(md20_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else if (strcmp(ppst->pamfile,"MD_40")==0) r_v = look_p(md40_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else if (strcmp(ppst->pamfile,"OPT5")==0) r_v = look_p(opt5_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else if (strcmp(ppst->pamfile,"DNA")==0 || strcmp(ppst->pamfile,"+5/-4")==0) r_v = look_p(nt54_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else if (strcmp(ppst->pamfile,"+3/-2")==0) r_v = look_p(nt32_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else if (strcmp(ppst->pamfile,"+1/-3")==0) r_v = look_p(nt13_p,t_gdelval,t_ggapval,&K,&Lambda,&H); else r_v = 0; pu->r_u.ag.Lambda = Lambda; pu->r_u.ag.K = K; pu->r_u.ag.H = H; if (r_v == 0) { fprintf(stderr,"Parameters not available for: %s: %d/%d\n", ppst->pamfile,t_gdelval,t_ggapval); find_zp = &find_zr; return proc_hist_r(sptr, nstats, ppst, histp, do_trim, pu); } /* fprintf(stderr," the parameters are: Lambda: %5.3f K: %5.3f H: %5.3f\n", Lambda, K, H); */ pu->r_u.ag.a_n0 = (double)ppst->n0; pu->r_u.ag.a_n0f = log (K * pu->r_u.ag.a_n0)/H; sprintf(f_string,"Altschul/Gish params: n0: %d Lambda: %5.3f K: %5.3f H: %5.3f", ppst->n0,Lambda, K, H); return AG_STATS;}int ag_parm(char *pamfile, int gdelval, int ggapval, struct pstat_str *pu){ double Lambda, K, H; int r_v; if (strcmp(pamfile,"BL50")==0) r_v = look_p(bl50_p,gdelval,ggapval,&K,&Lambda,&H); else if (strcmp(pamfile,"BL62")==0) r_v = look_p(bl62_p,gdelval,ggapval,&K,&Lambda,&H); else if (strcmp(pamfile,"P250")==0) r_v = look_p(p250_p,gdelval,ggapval,&K,&Lambda,&H); else if (strcmp(pamfile,"P120")==0) r_v = look_p(p120_p,gdelval,ggapval,&K,&Lambda,&H); else if (strcmp(pamfile,"MD_10")==0) r_v = look_p(md10_p,gdelval,ggapval,&K,&Lambda,&H); else if (strcmp(pamfile,"MD_20")==0) r_v = look_p(md20_p,gdelval,ggapval,&K,&Lambda,&H); else if (strcmp(pamfile,"MD_40")==0) r_v = look_p(md40_p,gdelval,ggapval,&K,&Lambda,&H); else if (strcmp(pamfile,"DNA")==0 || strcmp(pamfile,"+5/-4")==0) r_v = look_p(nt54_p,gdelval,ggapval, &K,&Lambda,&H); else if (strcmp(pamfile,"+3/-2")==0) r_v = look_p(nt32_p,gdelval,ggapval, &K,&Lambda,&H); else if (strcmp(pamfile,"+1/-3")==0) r_v = look_p(nt13_p,gdelval,ggapval, &K,&Lambda,&H); else r_v = 0; pu->r_u.ag.K = K; pu->r_u.ag.Lambda = Lambda; pu->r_u.ag.H = H; /* if (r_v == 0) { fprintf(stderr,"Parameters not available for: %s: %d/%d\n", pamfile,gdelval,ggapval); } */ return r_v;}intlook_p(struct alt_p parm[], int gap, int ext, double *K, double *Lambda, double *H){ int i; gap = -gap; ext = -ext; if (gap > parm[1].gap) { *K = parm[0].K; *Lambda = parm[0].Lambda; *H = parm[0].H; return 1; } for (i=1; parm[i].gap > 0; i++) { if (parm[i].gap > gap) continue; else if (parm[i].gap == gap && parm[i].ext > ext ) continue; else if (parm[i].gap == gap && parm[i].ext == ext) { *K = parm[i].K; *Lambda = parm[i].Lambda; *H = parm[i].H; return 1; } else break; } return 0;}/* uncensored and censored maximum likelihood estimates developed by Aaron Mackey based on a preprint from Sean Eddy */int mle_cen (struct stat_str *, int, int, double, double *, double *);intproc_hist_ml(struct stat_str *sptr, int nstats, struct pstruct *ppst, struct hist_str *histp, int do_trim, struct pstat_str *pu){ double f_cen; char s_string[128]; char *f_string; f_string = histp->stat_info; pu->r_u.ag.a_n0 = (double)ppst->n0; if (ppst->zsflag < 10) s_string[0]='\0'; else if (ppst->zs_win > 0) sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win); else sprintf(s_string,"(shuffled [%d])",nstats); if (!do_trim) { if (mle_cen(sptr, nstats, ppst->n0, 0.0, &pu->r_u.ag.Lambda, &pu->r_u.ag.K) == -1) goto bad_mle; sprintf(f_string,"%s MLE statistics: Lambda= %6.4f; K=%6.4g", s_string,pu->r_u.ag.Lambda,pu->r_u.ag.K); } else { if (nstats/20 > 1000) f_cen = 1000.0/(double)nstats; else f_cen = 0.05; if (mle_cen(sptr, nstats, ppst->n0, f_cen, &pu->r_u.ag.Lambda, &pu->r_u.ag.K) == -1) goto bad_mle; sprintf(f_string, "MLE_cen statistics: Lambda= %6.4f; K=%6.4g (cen=%d)", pu->r_u.ag.Lambda,pu->r_u.ag.K,(int)((double)nstats*f_cen)); } return MLE_STATS; bad_mle: find_zp = &find_zn; return proc_hist_n(sptr, nstats, ppst, histp, do_trim, pu);}intmle_cen2 (struct stat_str *, int, int, double, double *, double *, double *, double *);intproc_hist_ml2(struct stat_str *sptr, int nstats, struct pstruct *ppst, struct hist_str *histp, int do_trim, struct pstat_str *pu){ int i, ns=0, nneg=0; double f_cen, ave_lambda; char s_string[128], ex_string[64]; char *f_string; f_string = histp->stat_info; pu->r_u.m2.a_n0 = (double)ppst->n0; if (ppst->zsflag < 10) s_string[0]='\0'; else if (ppst->zs_win > 0) sprintf(s_string,"(shuffled [%d], win: %d)",nstats,ppst->zs_win); else sprintf(s_string,"(shuffled [%d])",nstats); pu->r_u.m2.ave_comp = 0.0; pu->r_u.m2.max_comp = -1.0; ns = nneg = 0; for (i=0; i<nstats; i++) { if (sptr[i].comp > pu->r_u.m2.max_comp) pu->r_u.m2.max_comp = sptr[i].comp; if (sptr[i].comp > 0.0) { pu->r_u.m2.ave_comp += log(sptr[i].comp); ns++; } else nneg++; } pu->r_u.m2.ave_comp /= (double)ns; pu->r_u.m2.ave_comp = exp(pu->r_u.m2.ave_comp); for (i=0; i<nstats; i++) if (sptr[i].comp < 0.0) { sptr[i].comp = pu->r_u.m2.ave_comp; } if (nneg > 0) sprintf(ex_string,"composition = -1 for %d sequences",nneg); else ex_string[0]='\0'; if (!do_trim) { if (mle_cen2(sptr, nstats, ppst->n0, 0.0, &pu->r_u.m2.mle2_a0, &pu->r_u.m2.mle2_a1, &pu->r_u.m2.mle2_a2, &pu->r_u.m2.mle2_b1) == -1) goto bad_mle2; ave_lambda = 1.0/(pu->r_u.m2.ave_comp*pu->r_u.m2.mle2_b1); sprintf(f_string,"%s MLE-2 statistics: a0= %6.4f; a1=%6.4f; a2=%6.4f; b1=%6.4f\n ave Lamdba: %6.4f", s_string, pu->r_u.m2.mle2_a0, pu->r_u.m2.mle2_a1, pu->r_u.m2.mle2_a2, pu->r_u.m2.mle2_b1,ave_lambda); } else { if (nstats/20 > 500) f_cen = 500.0/(double)nstats; else f_cen = 0.05; if (mle_cen2(sptr, nstats, ppst->n0, f_cen, &pu->r_u.m2.mle2_a0, &pu->r_u.m2.mle2_a1, &pu->r_u.m2.mle2_a2, &pu->r_u.m2.mle2_b1)== -1) goto bad_mle2; ave_lambda = 1.0/(pu->r_u.m2.ave_comp*pu->r_u.m2.mle2_b1); sprintf(f_string,"%s MLE-2-cen statistics: a0= %6.4f; a1=%6.4f; a2=%6.4f; b1=%6.4f (cen=%d)\n ave Lambda:%6.4f", s_string, pu->r_u.m2.mle2_a0, pu->r_u.m2.mle2_a1, pu->r_u.m2.mle2_a2, pu->r_u.m2.mle2_b1, (int)((double)nstats*f_cen),ave_lambda); } return MLE2_STATS; bad_mle2: find_zp = &find_zn; return proc_hist_n(sptr, nstats, ppst, histp, do_trim, pu);}double first_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop, double sumlenL, double cenL, double sumlenH, double cenH);double second_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop, double sumlenL, double cenL, double sumlenH, double cenH);static voidst_sort (struct stat_str *v, int n) { struct stat_str tmp_str; int gap, i, j; int tmp; double dtmp; for (gap = 1; gap < n/3; gap = 3*gap +1) ; for (; gap > 0; gap = (gap-1)/3) { for (i = gap; i < n; i++) { for (j = i - gap; j >= 0; j -= gap) { if (v[j].score <= v[j + gap].score) break; tmp = v[j].score; v[j].score = v[j + gap].score; v[j + gap].score = tmp; tmp = v[j].n1; v[j].n1 = v[j + gap].n1; v[j + gap].n1 = tmp; dtmp = v[j].comp; v[j].comp = v[j + gap].comp; v[j + gap].comp = tmp; dtmp = v[j].H; v[j].H = v[j + gap].H; v[j + gap].H = tmp; } } }}/* sptr[].score, sptr[].n1; sptr[] must be sorted int n = total number of samples int M = length of query double fn = fraction of scores to be censored fn/2.0 from top, bottom double *Lambda = Lambda estimate double *K = K estimate*/#define MAX_NIT 100intmle_cen(struct stat_str *sptr, int n, int M, double fc, double *Lambda, double *K) { double sumlenL, sumlenH, cenL, cenH; double sum_s, sum2_s, mean_s, var_s, dtmp; int start, stop; int i, nf; int nit = 0; double deriv, deriv2, lambda, old_lambda, sum = 0.0; /* int sumlenL, int sumlenghtsR = sum of low (Left), right (High) seqs. int cenL, cenH = censoring score low, high */ nf = (fc/2.0) * n; start = nf; stop = n - nf; st_sort(sptr,n); sum_s = sum2_s = 0.0; for (i=start; i<stop; i++) { sum_s += sptr[i].score; } dtmp = (double)(stop-start); mean_s = sum_s/dtmp; for (i=start; i<stop; i++) { sum2_s += sptr[i].score * sptr[i].score; } var_s = sum2_s/(dtmp-1.0); sumlenL = sumlenH = 0.0; for (i=0; i<start; i++) sumlenL += (double)sptr[i].n1; for (i=stop; i<n; i++) sumlenH += (double)sptr[i].n1; if (nf > 0) { cenL = (double)sptr[start].score; cenH = (double)sptr[stop].score; } else { cenL = (double)sptr[start].score/2.0; cenH = (double)sptr[start].score*2.0; } if (cenL >= cenH) return -1; /* initial guess for lambda is 0.2 - this does not work for matrices with very different scales */ /* lambda = 0.2; */ lambda = PI_SQRT6/sqrt(var_s); if (lambda > 1.0) { fprintf(stderr," Lambda initial estimate error: lambda: %6.4g; var_s: %6.4g\n",lambda,var_s); lambda = 0.2; } do { deriv = first_deriv_cen(lambda, sptr, start, stop, sumlenL, cenL, sumlenH, cenH); /* (uncensored version) first_deriv(lambda, &sptr[start], stop - start)) */ /* (uncensored version) deriv2 = second_deriv(lambda, &sptr[start], stop-start); */ deriv2 = second_deriv_cen(lambda, sptr, start, stop, sumlenL, cenL, sumlenH, cenH); old_lambda = lambda; if (lambda - deriv/deriv2 > 0.0) lambda = lambda - deriv/deriv2; else lambda = lambda/2.0; nit++; } while (fabs((lambda - old_lambda)/lambda) > TINY && nit < MAX_NIT); /* fprintf(stderr," mle_cen nit: %d\n",nit); */ if (nit >= MAX_NIT) return -1; for(i = start; i < stop ; i++) { sum += (double) sptr[i].n1 * exp(- lambda * (double)sptr[i].score); } *Lambda = lambda; /* *K = (double)(stop-start)/((double)M*sum); */ *K = (double)n/((double)M* (sum+sumlenL*exp(-lambda*cenL)-sumlenH*exp(-lambda*cenH))); return 0;}/*doublefirst_deriv(double lambda, struct stat_str *sptr, int n) { int i; double sum = 0.0, sum1 = 0.0, sum2 = 0.0; double s, l, es; for(i = 0 ; i < n ; i++) { s = (double)sptr[i].score; l = (double)sptr[i].n1; es = exp(-lambda * s ); sum += s; sum2 += l * es; sum1 += s * l * es; } return (1.0/lambda) - (sum/(double)n) + (sum1/sum2);}*//*doublesecond_deriv(double lambda, struct stat_str *sptr, int n) { double sum1 = 0.0, sum2 = 0.0, sum3 = 0.0; double s, l, es; int i; for(i = 0 ; i < n ; i++) { l = (double)sptr[i].n1; s = (double)sptr[i].score; es = exp(-lambda * s); sum2 += l * es; sum1 += l * s * es; sum3 += l * s * s * es; } return ((sum1*sum1)/(sum2*sum2)) - (sum3/sum2) - (1.0/(lambda*lambda));}*/doublefirst_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop, double sumlenL, double cenL, double sumlenH, double cenH) { int i; double sum = 0.0, sum1 = 0.0, sum2 = 0.0; double s, l, es; for(i = start ; i < stop ; i++) { s = (double)sptr[i].score; l = (double)sptr[i].n1; es = exp(-lambda * s ); sum += s; sum2 += l * es; sum1 += s * l * es; } sum1 += sumlenL*cenL*exp(-lambda*cenL) - sumlenH*cenH*exp(-lambda*cenH); sum2 += sumlenL*exp(-lambda*cenL) - sumlenH*exp(-lambda*cenH); return (1.0 / lambda) - (sum /(double)(stop-start)) + (sum1 / sum2);}doublesecond_deriv_cen(double lambda, struct stat_str *sptr, int start, int stop, double sumlenL, double cenL, double sumlenH, double cenH) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -