📄 dnaml.c
字号:
if (invarfrac){ printf("-i%3.2f given, so invarfrac=%3.2f\n",invarfrac,invarfrac); }else{ do { printf("Fraction of invariant sites?\n"); scanf("%lf%*[^\n]", &invarfrac); getchar(); countup (&loopcount, 10); } while ((invarfrac <= 0.0) || (invarfrac >= 1.0)); } initgammacat(rcategs-1, alpha, rrate, probcat); for (i = 0; i < rcategs-1; i++) probcat[i] = probcat[i]*(1.0-invarfrac); probcat[rcategs-1] = invarfrac; rrate[rcategs-1] = 0.0; } else { initcategs(rcategs, rrate); initprobcat(rcategs, &probsum, probcat); } } } if (!didchangercat){ rrate = (double *) Malloc(rcategs*sizeof(double)); probcat = (double *) Malloc(rcategs*sizeof(double)); rrate[0] = 1.0; probcat[0] = 1.0; } if (!didchangecat){ rate = (double *) Malloc(categs*sizeof(double)); rate[0] = 1.0; } } /* getoptions */void reallocsites(void){ long i; for (i=0; i < spp; i++) { free(y[i]); y[i] = (Char *) Malloc(sites*sizeof(Char)); } free(category); free(weight); free(alias); free(ally); free(location); free(aliasweight); category = (long *) Malloc(sites*sizeof(long)); weight = (long *) Malloc(sites*sizeof(long)); alias = (long *) Malloc(sites*sizeof(long)); ally = (long *) Malloc(sites*sizeof(long)); location = (long *) Malloc(sites*sizeof(long)); aliasweight = (long *) Malloc(sites*sizeof(long));}void allocrest(){ long i; y = (Char **) Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) y[i] = (Char *) Malloc(sites*sizeof(Char)); nayme = (naym *) Malloc(spp*sizeof(naym));; enterorder = (long *) Malloc(spp*sizeof(long)); category = (long *) Malloc(sites*sizeof(long)); weight = (long *) Malloc(sites*sizeof(long)); alias = (long *) Malloc(sites*sizeof(long)); ally = (long *) Malloc(sites*sizeof(long)); location = (long *) Malloc(sites*sizeof(long)); aliasweight = (long *) Malloc(sites*sizeof(long));} /* allocrest */void doinit(){ /* initializes variables */ inputnumbers(&spp, &sites, &nonodes2, 2); getoptions(); if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n", spp, sites); alloctree(&curtree.nodep, nonodes2, usertree); allocrest(); if (usertree && !reconsider) return; alloctree(&bestree.nodep, nonodes2, 0); alloctree(&priortree.nodep, nonodes2, 0); if (njumble <= 1) return; alloctree(&bestree2.nodep, nonodes2, 0);} /* doinit */void inputoptions(){ long i; if (!firstset && !justwts) { samenumsp(&sites, ith); reallocsites(); } for (i = 0; i < sites; i++) category[i] = 1; for (i = 0; i < sites; i++) weight[i] = 1; if (justwts || weights) inputweights(sites, weight, &weights); weightsum = 0; for (i = 0; i < sites; i++) weightsum += weight[i]; if (ctgry && categs > 1) { inputcategs(0, sites, category, categs, "DnaML"); if (printdata) printcategs(outfile, sites, category, "Site categories"); } if (weights && printdata) printweights(outfile, 0, sites, weight, "Sites");} /* inputoptions */void makeweights(){ /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= sites; i++) { alias[i - 1] = i; ally[i - 1] = 0; aliasweight[i - 1] = weight[i - 1]; location[i - 1] = 0; } sitesort2 (sites, aliasweight); sitecombine2(sites, aliasweight); sitescrunch2(sites, 1, 2, aliasweight); for (i = 1; i <= sites; i++) { if (aliasweight[i - 1] > 0) endsite = i; } for (i = 1; i <= endsite; i++) { location[alias[i - 1] - 1] = i; ally[alias[i - 1] - 1] = alias[i - 1]; } term = (double **) Malloc( endsite * sizeof(double *)); for (i = 0; i < endsite; i++) term[i] = (double *) Malloc( rcategs * sizeof(double)); slopeterm = (double **) Malloc( endsite * sizeof(double *)); for (i = 0; i < endsite; i++) slopeterm[i] = (double *) Malloc( rcategs * sizeof(double)); curveterm = (double **) Malloc(endsite * sizeof(double *)); for (i = 0; i < endsite; i++) curveterm[i] = (double *) Malloc( rcategs * sizeof(double)); mp = (vall *) Malloc( sites*sizeof(vall)); contribution = (contribarr *) Malloc( endsite*sizeof(contribarr));} /* makeweights */void getinput(){ /* reads the input data */ inputoptions(); if (!freqsfrom) getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy, &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange, freqsfrom, true); if (!justwts || firstset) inputdata(sites); makeweights(); setuptree2(curtree); if (!usertree || reconsider) { setuptree2(bestree); setuptree2(priortree); if (njumble > 1) setuptree2(bestree2); } allocx(nonodes2, rcategs, curtree.nodep, usertree); if (!usertree || reconsider) { allocx(nonodes2, rcategs, bestree.nodep, 0); allocx(nonodes2, rcategs, priortree.nodep, 0); if (njumble > 1) allocx(nonodes2, rcategs, bestree2.nodep, 0); } makevalues2(rcategs, curtree.nodep, endsite, spp, y, alias); if (freqsfrom) { empiricalfreqs(&freqa, &freqc, &freqg, &freqt, aliasweight, curtree.nodep); getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy, &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange, freqsfrom, true); } if (!justwts || firstset) fprintf(outfile, "\nTransition/transversion ratio = %10.6f\n\n", ttratio);} /* getinput */void inittable_for_usertree(FILE *intree){ /* If there's a user tree, then the ww/zz/wwzz/vvzz elements need to be allocated appropriately. */ long num_comma; long i, j; /* First, figure out the largest possible furcation, i.e. the number of commas plus one */ countcomma(&intree, &num_comma); num_comma++; for (i = 0; i < rcategs; i++) { for (j = 0; j < categs; j++) { /* Free the stuff allocated assuming bifurcations */ free (tbl[i][j]->ww); free (tbl[i][j]->zz); free (tbl[i][j]->wwzz); free (tbl[i][j]->vvzz); /* Then allocate for worst-case multifurcations */ tbl[i][j]->ww = (double *) Malloc( num_comma * sizeof (double)); tbl[i][j]->zz = (double *) Malloc( num_comma * sizeof (double)); tbl[i][j]->wwzz = (double *) Malloc( num_comma * sizeof (double)); tbl[i][j]->vvzz = (double *) Malloc( num_comma * sizeof (double)); } }} /* inittable_for_usertree */void inittable(){ /* Define a lookup table. Precompute values and print them out in tables */ long i, j; double sumrates; tbl = (valrec ***) Malloc(rcategs * sizeof(valrec **)); for (i = 0; i < rcategs; i++) { tbl[i] = (valrec **) Malloc(categs*sizeof(valrec *)); for (j = 0; j < categs; j++) tbl[i][j] = (valrec *) Malloc(sizeof(valrec)); } for (i = 0; i < rcategs; i++) { for (j = 0; j < categs; j++) { tbl[i][j]->rat = rrate[i]*rate[j]; tbl[i][j]->ratxi = tbl[i][j]->rat * xi; tbl[i][j]->ratxv = tbl[i][j]->rat * xv; /* Allocate assuming bifurcations, will be changed later if neccesarry (i.e. there's a user tree) */ tbl[i][j]->ww = (double *) Malloc( 2 * sizeof (double)); tbl[i][j]->zz = (double *) Malloc( 2 * sizeof (double)); tbl[i][j]->wwzz = (double *) Malloc( 2 * sizeof (double)); tbl[i][j]->vvzz = (double *) Malloc( 2 * sizeof (double)); } } if (!lngths) { /* restandardize rates */ sumrates = 0.0; for (i = 0; i < endsite; i++) { for (j = 0; j < rcategs; j++) sumrates += aliasweight[i] * probcat[j] * tbl[j][category[alias[i] - 1] - 1]->rat; } sumrates /= (double)sites; for (i = 0; i < rcategs; i++) for (j = 0; j < categs; j++) { tbl[i][j]->rat /= sumrates; tbl[i][j]->ratxi /= sumrates; tbl[i][j]->ratxv /= sumrates; } } if (rcategs > 1) { if (gama) { fprintf(outfile, "\nDiscrete approximation to gamma distributed rates\n"); fprintf(outfile, " Coefficient of variation of rates = %f (alpha = %f)\n", cv, alpha); } fprintf(outfile, "\nState in HMM Rate of change Probability\n\n"); for (i = 0; i < rcategs; i++) if (probcat[i] < 0.0001) fprintf(outfile, "%9ld%16.3f%20.6f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.001) fprintf(outfile, "%9ld%16.3f%19.5f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.01) fprintf(outfile, "%9ld%16.3f%18.4f\n", i+1, rrate[i], probcat[i]); else fprintf(outfile, "%9ld%16.3f%17.3f\n", i+1, rrate[i], probcat[i]); putc('\n', outfile); if (auto_) fprintf(outfile, "Expected length of a patch of sites having the same rate = %8.3f\n", 1/lambda); putc('\n', outfile); } if (categs > 1) { fprintf(outfile, "\nSite category Rate of change\n\n"); for (i = 0; i < categs; i++) fprintf(outfile, "%9ld%16.3f\n", i+1, rate[i]); } if ((rcategs > 1) || (categs >> 1)) fprintf(outfile, "\n\n");} /* inittable */double evaluate(node *p, boolean saveit){ contribarr tterm; double sum, sum2, sumc, y, lz, y1, z1zz, z1yy, prod12, prod1, prod2, prod3, sumterm, lterm; long i, j, k, lai; node *q; sitelike x1, x2; sum = 0.0; q = p->back; y = p->v; lz = -y; for (i = 0; i < rcategs; i++) for (j = 0; j < categs; j++) { tbl[i][j]->orig_zz = exp(tbl[i][j]->ratxi * lz); tbl[i][j]->z1 = exp(tbl[i][j]->ratxv * lz); tbl[i][j]->z1zz = tbl[i][j]->z1 * tbl[i][j]->orig_zz; tbl[i][j]->z1yy = tbl[i][j]->z1 - tbl[i][j]->z1zz; } for (i = 0; i < endsite; i++) { k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { if (y > 0.0) { y1 = 1.0 - tbl[j][k]->z1; z1zz = tbl[j][k]->z1zz; z1yy = tbl[j][k]->z1yy; } else { y1 = 0.0; z1zz = 1.0; z1yy = 0.0; } memcpy(x1, p->x[i][j], sizeof(sitelike)); prod1 = freqa * x1[0] + freqc * x1[(long)C - (long)A] + freqg * x1[(long)G - (long)A] + freqt * x1[(long)T - (long)A]; memcpy(x2, q->x[i][j], sizeof(sitelike)); prod2 = freqa * x2[0] + freqc * x2[(long)C - (long)A] + freqg * x2[(long)G - (long)A] + freqt * x2[(long)T - (long)A]; prod3 = (x1[0] * freqa + x1[(long)G - (long)A] * freqg) * (x2[0] * freqar + x2[(long)G - (long)A] * freqgr) + (x1[(long)C - (long)A] * freqc + x1[(long)T - (long)A] * freqt) * (x2[(long)C - (long)A] * freqcy + x2[(long)T - (long)A] * freqty); prod12 = freqa * x1[0] * x2[0] + freqc * x1[(long)C - (long)A] * x2[(long)C - (long)A] + freqg * x1[(long)G - (long)A] * x2[(long)G - (long)A] + freqt * x1[(long)T - (long)A] * x2[(long)T - (long)A]; tterm[j] = z1zz * prod12 + z1yy * prod3 + y1 * prod1 * prod2; } sumterm = 0.0; for (j = 0; j < rcategs; j++) sumterm += probcat[j] * tterm[j]; lterm = log(sumterm); for (j = 0; j < rcategs; j++) clai[j] = tterm[j] / sumterm; memcpy(contribution[i], clai, rcategs*sizeof(double)); if (saveit && !auto_ && usertree) l0gf[which - 1][i] = lterm; sum += aliasweight[i] * lterm; } for (j = 0; j < rcategs; j++) like[j] = 1.0; for (i = 0; i < sites; i++) { sumc = 0.0; for (k = 0; k < rcategs; k++) sumc += probcat[k] * like[k]; sumc *= lambda; if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { lai = location[ally[i] - 1]; memcpy(clai, contribution[lai - 1], rcategs*sizeof(double)); for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc) * clai[j]; } else { for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc); } memcpy(like, nulike, rcategs*sizeof(double)); } sum2 = 0.0; for (i = 0; i < rcategs; i++) sum2 += probcat[i] * like[i]; sum += log(sum2); curtree.likelihood = sum; if (!saveit || auto_ || !usertree) return sum; l0gl[which - 1] = sum; if (which == 1) { maxwhich = 1; maxlogl = sum; return sum; } if (sum > maxlogl) { maxwhich = which; maxlogl = sum; } return sum;} /* evaluate */void alloc_nvd (long num_sibs, nuview_data *local_nvd){ /* Allocate blocks of memory appropriate for the number of siblings a given node has */ local_nvd->yy = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->wwzz = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->vvzz = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->vzsumr = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->vzsumy = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->sum = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->sumr = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->sumy = (double *) Malloc( num_sibs * sizeof (double)); local_nvd->xx = (sitelike *) Malloc( num_sibs * sizeof (sitelike));} /* alloc_nvd */void free_nvd (nuview_data *local_nvd){ /* The natural complement to the alloc version */ free (local_nvd->yy); free (local_nvd->wwzz); free (local_nvd->vvzz); free (local_nvd->vzsumr); free (local_nvd->vzsumy); free (local_nvd->sum); free (local_nvd->sumr); free (local_nvd->sumy); free (local_nvd->xx);} /* free_nvd */void nuview(node *p){ long i, j, k, num_sibs, sib_index; nuview_data *local_nvd; node *sib_ptr, *sib_back_ptr; sitelike p_xx; double lw; /* Figure out how many siblings the current node has */ num_sibs = count_sibs (p); /* Recursive calls, should be called for all children */ sib_ptr = p; for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if (!sib_back_ptr->tip && !sib_back_ptr->initialized) nuview (sib_back_ptr); } /* Allocate the structure and blocks therein for variables used in this function */ local_nvd = (nuview_data *) Malloc( sizeof (nuview_data)); alloc_nvd (num_sibs, local_nvd); /* Loop 1: makes assignments to tbl based on some combination of what's already in tbl and the children's value of v */ sib_ptr = p; for (sib_index=0; sib_index < num_sibs; sib_index++) { sib_ptr = sib_ptr->next;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -