📄 wgts_categs.c
字号:
#include <stdlib.h>#include <stdio.h>#include <string.h>#include "fastDNAml_types.h"#include "fastDNAml_funcs.h"void makeboot (analdef *adef, rawdata *rdta, cruncheddata *cdta) { /* makeboot */ int i, j, nonzero; double randum(); nonzero = 0; for(i=1; i<=rdta->sites; i++) if(rdta->wgt[i]>0) nonzero++; for(j=1; j<=nonzero; j++) cdta->aliaswgt[j] = 0; for(j=1; j<=nonzero; j++) cdta->aliaswgt[(int)(nonzero*randum(&adef->boot))+1]++; j = 0; cdta->wgtsum = 0; for(i=1; i<=rdta->sites; i++) { if (rdta->wgt[i] > 0) cdta->wgtsum += (rdta->wgt2[i] = rdta->wgt[i] * cdta->aliaswgt[++j]); else rdta->wgt2[i] = 0; } } /* makeboot */void sitesort (rawdata *rdta, cruncheddata *cdta) /* Shell sort keeping sites with identical residues and weights in * the original order (i.e., a stable sort). * The index created in cdta->alias is 1 based. The * sitecombcrunch routine packs it to a 0 based index. */ { /* sitesort */ int gap, i, j, jj, jg, k, n, nsp; int *index, *category; boolean flip, tied; yType **data; index = cdta->alias; category = rdta->sitecat; data = rdta->y; n = rdta->sites; nsp = rdta->numsp; for (gap = n / 2; gap > 0; gap /= 2) { for (i = gap + 1; i <= n; i++) { j = i - gap; do { jj = index[j]; jg = index[j+gap]; flip = (category[jj] > category[jg]); tied = (category[jj] == category[jg]); for (k = 1; (k <= nsp) && tied; k++) { flip = (data[k][jj] > data[k][jg]); tied = (data[k][jj] == data[k][jg]); } if (flip) { index[j] = jg; index[j+gap] = jj; j -= gap; } } while (flip && (j > 0)); } /* for (i ... */ } /* for (gap ... */ } /* sitesort */void sitecombcrunch (rawdata *rdta, cruncheddata *cdta) /* combine sites that have identical patterns (and nonzero weight) */ { /* sitecombcrunch */ int i, sitei, j, sitej, k; boolean tied; i = 0; cdta->alias[0] = cdta->alias[1]; cdta->aliaswgt[0] = 0; for(j=1; j<=rdta->sites; j++) { sitei = cdta->alias[i]; sitej = cdta->alias[j]; tied = (rdta->sitecat[sitei] == rdta->sitecat[sitej]); for(k=1; tied && (k<=rdta->numsp); k++) tied = (rdta->y[k][sitei] == rdta->y[k][sitej]); if(tied) { cdta->aliaswgt[i] += rdta->wgt2[sitej]; } else { if(cdta->aliaswgt[i] > 0) i++; cdta->aliaswgt[i] = rdta->wgt2[sitej]; cdta->alias[i] = sitej; } } cdta->endsite = i; if(cdta->aliaswgt[i] > 0) cdta->endsite++; } /* sitecombcrunch */boolean makeweights (analdef *adef, rawdata *rdta, cruncheddata *cdta) /* make up weights vector to avoid duplicate computations */ { /* makeweights */ int i; if(adef->boot) makeboot(adef,rdta,cdta); else for(i=1; i<=rdta->sites; i++) rdta->wgt2[i] = rdta->wgt[i]; for(i=1; i<=rdta->sites; i++) cdta->alias[i] = i; sitesort(rdta,cdta); sitecombcrunch(rdta,cdta); printf("Total weight of positions in analysis = %d\n", cdta->wgtsum); printf("There are %d distinct data patterns (columns)\n\n", cdta->endsite); return TRUE; } /* makeweights */boolean makevalues (rawdata *rdta, cruncheddata *cdta) /* set up fractional likelihoods at tips */ { /* makevalues */ double temp, wtemp; int i, j; for (i = 1; i <= rdta->numsp; i++) { /* Pack and move tip data */ for (j = 0; j < cdta->endsite; j++) { rdta->y[i-1][j] = rdta->y[i][cdta->alias[j]]; } } for (j = 0; j < cdta->endsite; j++) { cdta->patcat[j] = i = rdta->sitecat[cdta->alias[j]]; cdta->patrat[j] = temp = rdta->catrat[i]; cdta->wr[j] = wtemp = temp * cdta->aliaswgt[j]; cdta->wr2[j] = temp * wtemp; } return TRUE; } /* makevalues */boolean empiricalfreqs (rawdata *rdta, cruncheddata *cdta) /* Get empirical base frequencies from the data */ { /* empiricalfreqs */ double sum, suma, sumc, sumg, sumt, wj, fa, fc, fg, ft; int i, j, k, code; yType *yptr; rdta->freqa = 0.25; rdta->freqc = 0.25; rdta->freqg = 0.25; rdta->freqt = 0.25; for (k = 1; k <= 8; k++) { suma = 0.0; sumc = 0.0; sumg = 0.0; sumt = 0.0; for (i = 0; i < rdta->numsp; i++) { yptr = rdta->y[i]; for (j = 0; j < cdta->endsite; j++) { code = *yptr++; fa = rdta->freqa * ( code & 1); fc = rdta->freqc * ((code >> 1) & 1); fg = rdta->freqg * ((code >> 2) & 1); ft = rdta->freqt * ((code >> 3) & 1); wj = cdta->aliaswgt[j] / (fa + fc + fg + ft); suma += wj * fa; sumc += wj * fc; sumg += wj * fg; sumt += wj * ft; } } sum = suma + sumc + sumg + sumt; rdta->freqa = suma / sum; rdta->freqc = sumc / sum; rdta->freqg = sumg / sum; rdta->freqt = sumt / sum; } return TRUE; } /* empiricalfreqs */void reportfreqs (analdef *adef, rawdata *rdta) { /* reportfreqs */ double suma, sumb; if (adef->empf) printf("Empirical "); printf("Base Frequencies:\n\n"); printf(" A %10.5f\n", rdta->freqa); printf(" C %10.5f\n", rdta->freqc); printf(" G %10.5f\n", rdta->freqg); printf(" T(U) %10.5f\n\n", rdta->freqt); rdta->freqr = rdta->freqa + rdta->freqg; rdta->invfreqr = 1.0/rdta->freqr; rdta->freqar = rdta->freqa * rdta->invfreqr; rdta->freqgr = rdta->freqg * rdta->invfreqr; rdta->freqy = rdta->freqc + rdta->freqt; rdta->invfreqy = 1.0/rdta->freqy; rdta->freqcy = rdta->freqc * rdta->invfreqy; rdta->freqty = rdta->freqt * rdta->invfreqy; printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio); suma = rdta->ttratio*rdta->freqr*rdta->freqy - (rdta->freqa*rdta->freqg + rdta->freqc*rdta->freqt); sumb = rdta->freqa*rdta->freqgr + rdta->freqc*rdta->freqty; rdta->xi = suma/(suma+sumb); rdta->xv = 1.0 - rdta->xi; if (rdta->xi <= 0.0) { printf("WARNING: This transition/transversion ratio\n"); printf(" is impossible with these base frequencies!\n"); printf("Transition/transversion parameter reset\n\n"); rdta->xi = 0.000001; rdta->xv = 1.0 - rdta->xi; rdta->ttratio = (sumb * rdta->xi / rdta->xv + rdta->freqa * rdta->freqg + rdta->freqc * rdta->freqt) / (rdta->freqr * rdta->freqy); printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio); } printf("(Transition/transversion parameter = %10.6f)\n\n", rdta->xi/rdta->xv); rdta->fracchange = 2.0 * rdta->xi * (rdta->freqa * rdta->freqgr + rdta->freqc * rdta->freqty) + rdta->xv * (1.0 - rdta->freqa * rdta->freqa - rdta->freqc * rdta->freqc - rdta->freqg * rdta->freqg - rdta->freqt * rdta->freqt); } /* reportfreqs */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -