⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wgts_categs.c

📁 fastDNAml is an attempt to solve the same problem as DNAML, but to do so faster and using less memo
💻 C
字号:
#include  <stdlib.h>#include  <stdio.h>#include  <string.h>#include  "fastDNAml_types.h"#include  "fastDNAml_funcs.h"void makeboot (analdef *adef, rawdata *rdta, cruncheddata *cdta)  { /* makeboot */    int  i, j, nonzero;    double  randum();    nonzero = 0;    for(i=1; i<=rdta->sites; i++)      if(rdta->wgt[i]>0)        nonzero++;    for(j=1; j<=nonzero; j++)      cdta->aliaswgt[j] = 0;    for(j=1; j<=nonzero; j++)      cdta->aliaswgt[(int)(nonzero*randum(&adef->boot))+1]++;    j = 0;    cdta->wgtsum = 0;    for(i=1; i<=rdta->sites; i++) {      if (rdta->wgt[i] > 0)        cdta->wgtsum += (rdta->wgt2[i] = rdta->wgt[i] * cdta->aliaswgt[++j]);      else        rdta->wgt2[i] = 0;      }  } /* makeboot */void sitesort (rawdata *rdta, cruncheddata *cdta)    /* Shell sort keeping sites with identical residues and weights in     * the original order (i.e., a stable sort).     * The index created in cdta->alias is 1 based.  The     * sitecombcrunch routine packs it to a 0 based index.     */  { /* sitesort */    int  gap, i, j, jj, jg, k, n, nsp;    int  *index, *category;    boolean  flip, tied;    yType  **data;    index    = cdta->alias;    category = rdta->sitecat;    data     = rdta->y;    n        = rdta->sites;    nsp      = rdta->numsp;    for (gap = n / 2; gap > 0; gap /= 2) {      for (i = gap + 1; i <= n; i++) {        j = i - gap;        do {          jj = index[j];          jg = index[j+gap];          flip = (category[jj] >  category[jg]);          tied = (category[jj] == category[jg]);          for (k = 1; (k <= nsp) && tied; k++) {            flip = (data[k][jj] >  data[k][jg]);            tied = (data[k][jj] == data[k][jg]);            }          if (flip) {            index[j]     = jg;            index[j+gap] = jj;            j -= gap;            }          } while (flip && (j > 0));        }  /* for (i ...   */      }    /* for (gap ... */  } /* sitesort */void sitecombcrunch (rawdata *rdta, cruncheddata *cdta)    /* combine sites that have identical patterns (and nonzero weight) */  { /* sitecombcrunch */    int  i, sitei, j, sitej, k;    boolean  tied;    i = 0;    cdta->alias[0] = cdta->alias[1];    cdta->aliaswgt[0] = 0;    for(j=1; j<=rdta->sites; j++) {      sitei = cdta->alias[i];      sitej = cdta->alias[j];      tied = (rdta->sitecat[sitei] == rdta->sitecat[sitej]);      for(k=1; tied && (k<=rdta->numsp); k++)        tied = (rdta->y[k][sitei] == rdta->y[k][sitej]);      if(tied) {        cdta->aliaswgt[i] += rdta->wgt2[sitej];      }      else {        if(cdta->aliaswgt[i] > 0) i++;        cdta->aliaswgt[i] = rdta->wgt2[sitej];        cdta->alias[i] = sitej;      }    }    cdta->endsite = i;    if(cdta->aliaswgt[i] > 0) cdta->endsite++;  } /* sitecombcrunch */boolean makeweights (analdef *adef, rawdata *rdta, cruncheddata *cdta)    /* make up weights vector to avoid duplicate computations */  { /* makeweights */    int  i;    if(adef->boot)      makeboot(adef,rdta,cdta);    else      for(i=1; i<=rdta->sites; i++)        rdta->wgt2[i] = rdta->wgt[i];    for(i=1; i<=rdta->sites; i++)      cdta->alias[i] = i;    sitesort(rdta,cdta);    sitecombcrunch(rdta,cdta);    printf("Total weight of positions in analysis = %d\n", cdta->wgtsum);    printf("There are %d distinct data patterns (columns)\n\n", cdta->endsite);    return TRUE;  } /* makeweights */boolean makevalues (rawdata *rdta, cruncheddata *cdta)    /* set up fractional likelihoods at tips */  { /* makevalues */    double  temp, wtemp;    int  i, j;    for (i = 1; i <= rdta->numsp; i++) {    /* Pack and move tip data */      for (j = 0; j < cdta->endsite; j++) {        rdta->y[i-1][j] = rdta->y[i][cdta->alias[j]];        }      }    for (j = 0; j < cdta->endsite; j++) {      cdta->patcat[j] = i = rdta->sitecat[cdta->alias[j]];      cdta->patrat[j] = temp = rdta->catrat[i];      cdta->wr[j]  = wtemp = temp * cdta->aliaswgt[j];      cdta->wr2[j] = temp * wtemp;      }    return TRUE;  } /* makevalues */boolean empiricalfreqs (rawdata *rdta, cruncheddata *cdta)    /* Get empirical base frequencies from the data */  { /* empiricalfreqs */    double  sum, suma, sumc, sumg, sumt, wj, fa, fc, fg, ft;    int     i, j, k, code;    yType  *yptr;    rdta->freqa = 0.25;    rdta->freqc = 0.25;    rdta->freqg = 0.25;    rdta->freqt = 0.25;    for (k = 1; k <= 8; k++) {      suma = 0.0;      sumc = 0.0;      sumg = 0.0;      sumt = 0.0;      for (i = 0; i < rdta->numsp; i++) {        yptr = rdta->y[i];        for (j = 0; j < cdta->endsite; j++) {          code = *yptr++;          fa = rdta->freqa * ( code       & 1);          fc = rdta->freqc * ((code >> 1) & 1);          fg = rdta->freqg * ((code >> 2) & 1);          ft = rdta->freqt * ((code >> 3) & 1);          wj = cdta->aliaswgt[j] / (fa + fc + fg + ft);          suma += wj * fa;          sumc += wj * fc;          sumg += wj * fg;          sumt += wj * ft;          }        }      sum = suma + sumc + sumg + sumt;      rdta->freqa = suma / sum;      rdta->freqc = sumc / sum;      rdta->freqg = sumg / sum;      rdta->freqt = sumt / sum;      }    return TRUE;  } /* empiricalfreqs */void reportfreqs (analdef *adef, rawdata *rdta)  { /* reportfreqs */    double  suma, sumb;    if (adef->empf) printf("Empirical ");    printf("Base Frequencies:\n\n");    printf("   A    %10.5f\n",   rdta->freqa);    printf("   C    %10.5f\n",   rdta->freqc);    printf("   G    %10.5f\n",   rdta->freqg);    printf("  T(U)  %10.5f\n\n", rdta->freqt);    rdta->freqr = rdta->freqa + rdta->freqg;    rdta->invfreqr = 1.0/rdta->freqr;    rdta->freqar = rdta->freqa * rdta->invfreqr;    rdta->freqgr = rdta->freqg * rdta->invfreqr;    rdta->freqy = rdta->freqc + rdta->freqt;    rdta->invfreqy = 1.0/rdta->freqy;    rdta->freqcy = rdta->freqc * rdta->invfreqy;    rdta->freqty = rdta->freqt * rdta->invfreqy;    printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio);    suma = rdta->ttratio*rdta->freqr*rdta->freqy         - (rdta->freqa*rdta->freqg + rdta->freqc*rdta->freqt);    sumb = rdta->freqa*rdta->freqgr + rdta->freqc*rdta->freqty;    rdta->xi = suma/(suma+sumb);    rdta->xv = 1.0 - rdta->xi;    if (rdta->xi <= 0.0) {      printf("WARNING: This transition/transversion ratio\n");      printf("         is impossible with these base frequencies!\n");      printf("Transition/transversion parameter reset\n\n");      rdta->xi = 0.000001;      rdta->xv = 1.0 - rdta->xi;      rdta->ttratio = (sumb * rdta->xi / rdta->xv                        + rdta->freqa * rdta->freqg                       + rdta->freqc * rdta->freqt)                    / (rdta->freqr * rdta->freqy);      printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio);      }    printf("(Transition/transversion parameter = %10.6f)\n\n",            rdta->xi/rdta->xv);    rdta->fracchange = 2.0 * rdta->xi * (rdta->freqa * rdta->freqgr                                       + rdta->freqc * rdta->freqty)                     + rdta->xv * (1.0 - rdta->freqa * rdta->freqa                                       - rdta->freqc * rdta->freqc                                       - rdta->freqg * rdta->freqg                                       - rdta->freqt * rdta->freqt);  } /* reportfreqs */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -