📄 fastdnaml.c

📁 fastDNAml is an attempt to solve the same problem as DNAML, but to do so faster and using less memo
💻 C
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/*  fastDNAml, a program for estimation of phylogenetic trees from sequences. *  Copyright (C) 1998, 1999, 2000 by Gary J. Olsen * *  This program is free software; you may redistribute it and/or modify it *  under the terms of the GNU General Public License as published by the Free *  Software Foundation; either version 2 of the License, or (at your option) *  any later version. * *  This program is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License *  for more details. * *  You should have received a copy of the GNU General Public License along *  with this program; if not, write to the Free Software Foundation, Inc., *  59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. * *  For any other enquiries write to Gary J. Olsen, Department of Microbiology, *  University of Illinois, Urbana, IL  61801, USA * *  Or send E-mail to gary@phylo.life.uiuc.edu * * *  fastDNAml is based in part on the program dnaml by Joseph Felsenstein. * *  Copyright notice from dnaml: * *     version 3.3. (c) Copyright 1986, 1990 by the University of Washington *     and Joseph Felsenstein.  Written by Joseph Felsenstein.  Permission is *     granted to copy and use this program provided no fee is charged for it *     and provided that this copyright notice is not removed. * * *  When publishing work that is based on results from fastDNAml please cite: * *  Felsenstein, J.  1981.  Evolutionary trees from DNA sequences: *  A maximum likelihood approach.  J. Mol. Evol. 17: 368-376. * *  and * *  Olsen, G. J., Matsuda, H., Hagstrom, R., and Overbeek, R.  1994. *  fastDNAml:  A tool for construction of phylogenetic trees of DNA *  sequences using maximum likelihood.  Comput. Appl. Biosci. 10: 41-48. *//*  Conversion to C and changes in sequential code by Gary Olsen, 1991-1994 * *  p4 version by Hideo Matsuda and Ross Overbeek, 1991-1993 *//* *  1.0    March 14, 1992 *         Initial "release" version * *  1.0.1  March 18, 1992 *         Add ntaxa to tree comments *         Set minimum branch length on reading tree *         Add blanks around operators in treeString (for prolog parsing) *         Add program version to treeString comments * *  1.0.2  April 6, 1992 *         Improved option line diagnostics *         Improved auxiliary line diagnostics *         Removed some trailing blanks from output * *  1.0.3  April 6, 1992 *         Checkpoint trees that do not need any optimization *         Print restart tree likelihood before optimizing *         Fix treefile option so that it really toggles * *  1.0.4  July 13, 1992 *         Add support for tree fact (instead of true Newick tree) in *            processTreeCom, treeReadLen, str_processTreeCom and *            str_treeReadLen *         Use bit operations in randum *         Correct error in bootstrap mask used with weighting mask * *  1.0.5  August 22, 1992 *         Fix reading of underscore as first nonblank character in name *         Add strchr and strstr functions to source code *         Add output treefile name to message "Tree also written ..." * *  1.0.6  November 20, 1992 *         Change (! nsites) test in setupTopol to (nsites == 0) for MIPS R4000 *         Add vectorizing compiler directives for CRAY *         Include updates and corrections to parallel code from H. Matsuda * *  1.0.7  March 25, 1993 *         Remove translation of underlines in taxon names * *  1.0.8  April 30, 1993 *         Remove version number from fastDNAml.h file name * *  1.0.9  August 12, 1993 *         Version was never released. *         Redefine treefile formats and default: *             0  None *             1  Newick *             2  Prolog *             3  PHYLIP (Default) *         Remove quote marks and comment from PHYLIP treefile format. * *  1.1.0  September 3-5, 1993 *         Arrays of size maxpatterns moved from stack to heap (mallocs) in *            evaluateslope, makenewz, and cmpBestTrees. *         Correct [maxsites] to [maxpatterns] in temporary array definitions *            in Vectorize code of newview and evaluate.  (These should also *            get converted to use malloc() at some point.) *         Change randum to use 12 bit segments, not 6.  Change its seed *            parameter to long *. *         Remove the code that took the absolute value of random seeds. *         Correct integer divide artifact in setting default transition/ *            transversion parameter values. *         When transition/transversion ratio is "reset", change to small *            value, not the program default. *         Report the "reset" transition/transversion ratio in the output. *         Move global data into analdef, rawDNA, and crunchedDNA structures. *         Change names of routines white and digit to whitechar and digitchar. *         Convert y[] to yType, which is normally char, but is int if the *            Vectorize flag is set. *         Split option line reading out of getoptions routine. * *  1.1.1  September 30, 1994 *         Incorporate changes made in 1.0.A (Feb. 11, 1994): *            Remove processing of quotation marks within comments. *            Break label finding into copy to string and find tip. *            Generalize tree reading to read trees when names are and are not *               already known. *            Remove absolute value from randum seed reading. *         Include integer version number and program date. *         Remove maxsite, maxpatterns and maxsp limitations. *         Incorporate code for retaining multiple trees. *         Activate code for Hasegawa & Kishino test of tree differences. *         Make quick add the default, with Q turning it off. *         Make emperical frequencies the option with F turning it off. *         Allow a residue frequency option line anywhere in the options. *         Increase string length passed to treeString (should be length *               checked, but ...) *         Introduce (Sept.30) and fix (Oct. 26) bug in bootstrap sampling. *         Fix error when user frequencies are last line and start with F. * *  1.2    September 5, 1997 *         Move likelihood components into structure. *         Change rawDNA to rawdata. *         Change crunchedDNA to cruncheddata. *         Recast the likelihoods per site into an array of stuctures, *               where each stucture (likelivector) includes the likelihoods *               of each residue type at the site, and a magnitude scale *               factor (exp).  This requires changing the space allocation, *               newview, makenewz, evaluate, and sigma. *         Change code of newview to rescale likelihoods up by 2**256 when *               the largest value falls below 2**-256.  This should solve *               floating point underflow for all practical sized trees. *               No changes are necessary in makenewz or sigma, since only *               relative likelihoods are necessary. * *  1.2.1  March 9, 1998 *         Convert likelihood adjustment factor (2**256) to a constant. *         Fix vectorized calculation of likelihood (error introduced in 1.2) * *  1.2.2  December 23, 1998 *         General code clean-up. *         Convert to function definitions with parameter type lists * *  1.2.2  January 3, 2000 *         Add copyright and license information *         Make this the current release version */#ifdef Master#  undef  Master#  define Master     1#  define Slave      0#  define Sequential 0#else#  ifdef Slave#    undef Slave#    define Master     0#    define Slave      1#    define Sequential 0#  else#    ifdef Sequential#      undef Sequential#    endif#    define Master     0#    define Slave      0#    define Sequential 1#  endif#endif#ifdef  CRAY#  define  Vectorize#endif#ifdef  Vectorize#  define maxpatterns  10000  /* maximum number of different site patterns */#endif#include <stdio.h>#include <string.h>#include <math.h>#include "fastDNAml_types.h"  /*  Requires version 1.2  */#include "fastDNAml_funcs.h"#include "fastDNAml_globals.h"/*xarray       *usedxtip, *freextip;char         likelihood_key[] = "likelihood";char         ntaxa_key[]      = "ntaxa";char         opt_level_key[]  = "opt_level";char         smoothed_key[]   = "smoothed";*//*=======================================================================*//*                              PROGRAM                                  *//*=======================================================================*//*-----------------------------------------------------------------------*//*            Management of xarray data                                  *//*                                                                       *//*-----------------------------------------------------------------------*/boolean linkdata2tree (rawdata *rdta, cruncheddata *cdta, tree *tr)    /* Link data array to the tree tips */  { /* linkdata2tree */    int  i;    for (i = 1; i <= tr->mxtips; i++) {    /* Associate data with tips */      tr->nodep[i]->tip = &(rdta->y[i-1][0]);      }    tr->rdta       = rdta;    tr->cdta       = cdta;    return TRUE;  } /* linkdata2tree */xarray *setupxarray (int npat)  { /* setupxarray */    xarray        *x;    likelivector  *data;    x = (xarray *) Malloc(sizeof(xarray));    if (x) {      data = (likelivector *) Malloc(npat * sizeof(likelivector));      if (data) {        x->lv = data;        x->prev = x->next = x;        x->owner = (node *) NULL;        }      else {        Free(x);        return (xarray *) NULL;        }      }    return x;  } /* setupxarray */boolean linkxarray (int req, int min, int npat,                    xarray **freexptr, xarray **usedxptr)    /*  Link a set of xarrays */  { /* linkxarray */    xarray  *first, *prev, *x;    int  i;    first = prev = (xarray*)NULL;    i = 0;    do {      x = setupxarray(npat);      if(x) {        if(!first) {          first = x;        }        else {          prev->next = x;          x->prev = prev;        }        prev = x;        i++;      }      else {        printf("ERROR: Failure to get requested xarray memory\n");        if(i<min)  return  FALSE;      }    } while((i<req) && x);    if (first) {      first->prev = prev;      prev->next = first;    }    *freexptr = first;    *usedxptr = (xarray*)NULL;    return  TRUE;  } /* linkxarray */boolean setupnodex (tree *tr)  { /* setupnodex */    nodeptr  p;    int  i;    for (i = tr->mxtips + 1; (i <= 2*(tr->mxtips) - 2); i++) {      p = tr->nodep[i];      if (! (p->x = setupxarray(tr->cdta->endsite))) {        printf("ERROR: Failure to get internal node xarray memory\n");        return  FALSE;        }      }    return  TRUE;  } /* setupnodex */xarray *getxtip (nodeptr p)  { /* getxtip */    xarray  *new;    boolean  splice;    if (! p) return (xarray *) NULL;    splice = FALSE;    if (p->x) {                  /* array is there; move to tail of list */      new = p->x;      if (new == new->prev) ;             /* linked to self; leave it */      else if (new == usedxtip) usedxtip = usedxtip->next; /* at head */      else if (new == usedxtip->prev) ;   /* already at tail */      else {                              /* move to tail of list */        new->prev->next = new->next;        new->next->prev = new->prev;        splice = TRUE;      }    }    else if (freextip) {                 /* take from unused list */      p->x = new = freextip;      new->owner = p;      if (new->prev != new) {            /* not only member of freelist */        new->prev->next = new->next;        new->next->prev = new->prev;        freextip = new->next;      }      else        freextip = (xarray *) NULL;      splice = TRUE;    }    else if (usedxtip) {                 /* take from head of used list */      usedxtip->owner->x = (xarray *) NULL;      p->x = new = usedxtip;      new->owner = p;      usedxtip = usedxtip->next;    }    else {      printf("ERROR: Unable to locate memory for tip %d.\n", p->number);      return  (xarray *) NULL;    }    if (splice) {      if (usedxtip) {                  /* list is not empty */        usedxtip->prev->next = new;        new->prev = usedxtip->prev;        usedxtip->prev = new;        new->next = usedxtip;      }      else        usedxtip = new->prev = new->next = new;    }    return  new;  } /* getxtip */xarray *getxnode (nodeptr p)    /* Ensure that internal node p has memory */  { /* getxnode */    nodeptr  s;    if (! (p->x)) {  /*  Move likelihood array on this node to sector p */      if ((s = p->next)->x || (s = s->next)->x) {        p->x = s->x;        s->x = (xarray *) NULL;        }      else {        printf("ERROR: Unable to locate memory at node %d.\n", p->number);        exit(1);        }      }    return  p->x;  } /* getxnode *//*-----------------------------------------------------------------------*//*            Maximum likelihood calculations                            *//*                                                                       *//*-----------------------------------------------------------------------*/boolean newview (tree *tr, nodeptr p)        /*  Update likelihoods at node */  { /* newview */    double   zq, lzq, xvlzq, zr, lzr, xvlzr;    nodeptr  q, r;    likelivector *lp, *lq, *lr;    int  i;/* If node p is a tip, then make sure that it has x-data. */    if (p->tip) {      likelivector *l;      int           code;      yType        *yptr;      if (p->x) return TRUE;  /*  They are already there */      if (! getxtip(p)) return FALSE; /*  They are not, so get memory */      l = p->x->lv;           /*  Pointer to first likelihood vector value */      yptr = p->tip;          /*  Pointer to first nucleotide datum */
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -