⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 readtree.c

📁 是有关基因比对的经典算法的实现。这对于初学计算生物学的人是非常重要的算法。
💻 C
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <stdarg.h>
#include <ctype.h>
#include "clustalw.h"

/*
 *   Prototypes
 */
static void create_tree(FILE *fd,INODEPTR ptree, INODEPTR parent, INODEPTR *lptr, INODEPTR *ptrs, IN_TREEPTR itree);
static void create_node(INODEPTR pptr, INODEPTR parent);
static INODEPTR insert_node(INODEPTR pptr);
static void skip_space(FILE *fd);
static INODEPTR avail(void);
static void set_info(INODEPTR p, INODEPTR parent, sint pleaf, char *pname, float pdist);
static INODEPTR reroot(INODEPTR ptree, sint nseqs,INODEPTR *leafptr, INODEPTR *ptrs);
static INODEPTR insert_root(INODEPTR p, float diff);
static float calc_root_mean(INODEPTR root, float *maxdist,INODEPTR *leafptr);
static float calc_mean(INODEPTR nptr, float *maxdist, sint nseqs,INODEPTR *leafptr);
static void order_nodes(INODEPTR *leafptr);
static void free_tree_nodes(INODEPTR p);

static sint status;
/*
   read a phylip format tree file into a structure IN_TREE
	treefile	- the name of the phylip file
	seqs		- the sequences, used to check that the tree sequence names
			  correspond to the alignment sequence names
			  set to NULL, if you don't want to compare with the alignment
	fseq		- the first sequence in 'seqs' to be used for the name check
	nseq		- the no. of sequences in 'seqs' to be used for the name check
	itree		- the structure that will contain the tree
   returns -1 for an error, 1 for a tree with branch lengths, 0 for a tree with no lengths
*/

sint read_tree(char *treefile, SEQ *seqs, sint fseq, sint nseq,IN_TREEPTR itree)
{

  FILE *fd;
  char c;
  char name1[MAXNAMES+1], name2[MAXNAMES+1];
  sint i, j, k;
  sint ret;
  Boolean found;
  INODEPTR *lptr;		/* contains a list of pointers to the tree leaves,
				   ie. the sequences */
  INODEPTR *ptrs;		/* contains a list of pointers to all tree nodes */
  INODEPTR seq_tree;		/* a pointer to the initial root of the tree */

#ifdef VMS
  if ((fd = fopen(treefile,"r","rat=cr","rfm=var")) == NULL)
#else
  if ((fd = fopen(treefile, "r")) == NULL)
#endif
    {
      error("cannot open %s", treefile);
      return((sint)-1);
    }

  skip_space(fd);
  c = (char)getc(fd);
  if (c != '(')
    {
      error("Wrong format in tree file %s", treefile);
      return((sint)-1);
    }
  rewind(fd);

  itree->rooted_tree = TRUE;
  itree->distance_tree = TRUE;
  itree->nnodes = 0;
  itree->nleaves = 0;

/*
  Allocate memory for tree
*/
  ptrs = (INODEPTR *)ckalloc(3*(nseq-fseq+1) * sizeof(INODEPTR));
  lptr = (INODEPTR *)ckalloc((nseq-fseq+1) * sizeof(INODEPTR));
  itree->leafptr = (INODEPTR *)ckalloc((nseq+1) * sizeof(INODEPTR));
  
  seq_tree = avail();
  set_info(seq_tree, NULL, 0, "", (float)0.0);

  status=1;
  create_tree(fd,seq_tree,NULL,lptr,ptrs,itree);
  fclose(fd);


  if (seqs!= NULL && itree->nleaves != nseq-fseq)
     {
         error("tree not compatible with alignment\n(%d sequences in alignment and %d in tree", (pint)nseq-fseq,(pint)itree->nleaves);
         return((sint)-1);
     }

  /*if(status==0) warning("negative distances in tree - cannot calculate sequence weights");*/

  lptr[itree->nleaves]=NULL;
  ptrs[itree->nnodes]=NULL;

/*
  If the tree is unrooted, reroot the tree - ie. minimise the difference
  between the mean root->leaf distances for the left and right branches of
  the tree.
*/

  if (itree->distance_tree == FALSE)
     {
  	if (itree->rooted_tree == FALSE)
          {
       	     error("input tree is unrooted and has no distances.\nCannot align sequences");
             return((sint)-1);
          }
     }

  if (itree->rooted_tree == FALSE)
     {
        itree->root = reroot(seq_tree, itree->nleaves,lptr,ptrs);
     }
  else
     {
        itree->root = seq_tree;
     }

/*
  calculate the 'order' of each node.
*/
  order_nodes(lptr);

  if (itree->nleaves >= 2)
     {
/*
  If there are more than three sequences....
*/
/*
  assign the sequence nodes (in the same order as in the alignment file)
*/
      if(seqs!=NULL)
      for (i=fseq; i<nseq; i++)
       {
         if (strlen(seqs[i].name) > MAXNAMES)
             warning("name %s is too long for PHYLIP tree format (max %d chars)", seqs[i].name,MAXNAMES);

         for (k=0; k< strlen(seqs[i].name) && k<MAXNAMES ; k++)
           {
             c = seqs[i].name[k];
             if ((c>0x40) && (c<0x5b)) c=c | 0x20;
             if (c == ' ') c = '_';
             name2[k] = c;
           }
         name2[k]='\0';
         found = FALSE;
         for (j=0; j<itree->nleaves; j++)
           {
            for (k=0; k< strlen(lptr[j]->name) && k<MAXNAMES ; k++)
              {
                c = lptr[j]->name[k];
                if ((c>0x40) && (c<0x5b)) c=c | 0x20;
                name1[k] = c;
              }
            name1[k]='\0';
            if (strcmp(name1, name2) == 0)
              {
                itree->leafptr[i] = lptr[j];
                found = TRUE;
              }
           }
         if (found == FALSE)
           {
             error("tree not compatible with alignment:\n%s not found", name2);
             ret=0;
           }
       }
      else
	for(i=0;i<itree->nleaves;i++) 
		itree->leafptr[i] = lptr[i];

     }

   if(status==0)
	ret=0;
   else if(itree->distance_tree)
   	ret=1;
   else
   	ret=(-1);

   ptrs=ckfree((void *)ptrs);
   lptr=ckfree((void *)lptr);
   return ret;
}

/* A bit dirty, but char ch is needed here for the recursive function create_tree() */
static char ch;
static void create_tree(FILE *fd,INODEPTR ptree, INODEPTR parent, INODEPTR *lptr, INODEPTR *ptrs, IN_TREEPTR itree)
{
   INODEPTR p;
   sint i, type;
   float dist;
   char name[MAXNAMES+1];

/*
  is this a node or a leaf ?
*/
  skip_space(fd);
  ch = (char)getc(fd);
  if (ch == '(')
    {  
/*
   this must be a node....
*/
      type = NODE;
      name[0] = '\0';
      ptrs[itree->nnodes++] = ptree;

      create_node(ptree, parent);

      p = ptree->left;
      create_tree(fd, p, ptree, lptr, ptrs,itree);
           
      if ( ch == ',')
       {
          p = ptree->right;
          create_tree(fd, p, ptree, lptr, ptrs,itree);
          if ( ch == ',')
            {
               ptree = insert_node(ptree);
               ptrs[itree->nnodes++] = ptree;
               p = ptree->right;
               create_tree(fd, p, ptree, lptr, ptrs,itree);
               itree->rooted_tree = FALSE;
            }
       }

      skip_space(fd);
      ch = (char)getc(fd);
    }
/*
   ...otherwise, this is a leaf
*/
  else
    {
      type = LEAF;
      ptrs[itree->nnodes++] = lptr[itree->nleaves++] = ptree;
/*
   get the sequence name
*/
      name[0] = ch;
      ch = (char)getc(fd);
      i = 1;
      while ((ch != ':') && (ch != ',') && (ch != ')'))
        {
          if (i < MAXNAMES) name[i++] = ch;
          ch = (char)getc(fd);
        }
      name[i] = '\0';
      if (ch != ':')
         {
           itree->distance_tree = FALSE;
           dist = 0.0;
         }
    }

/*
   get the distance information
*/
  dist = 0.0;
  if (ch == ':')
     {
       skip_space(fd);
       fscanf(fd,"%f",&dist);
       skip_space(fd);
       ch = (char)getc(fd);
     }
   set_info(ptree, parent, type, name, dist);

   if(dist<0.0) status=0;
}

static void create_node(INODEPTR pptr, INODEPTR parent)
{
  INODEPTR t;

  pptr->parent = parent;
  t = avail();
  pptr->left = t;
  t = avail();
  pptr->right = t;
    
}

static INODEPTR insert_node(INODEPTR pptr)
{

   INODEPTR newnode;

   newnode = avail();
   create_node(newnode, pptr->parent);

   newnode->left = pptr;
   pptr->parent = newnode;

   set_info(newnode, pptr->parent, NODE, "", (float)0.0);

   return(newnode);
}

static void skip_space(FILE *fd)
{
  int   c;
  
  do
     c = getc(fd);
  while(isspace(c));

  ungetc(c, fd);
}

static INODEPTR avail(void)
{
   INODEPTR p;
   p = ckalloc(sizeof(INODE));
   p->left = NULL;
   p->right = NULL;
   p->parent = NULL;
   p->dist = 0.0;
   p->leaf = 0;
   p->order = 0;
   p->name[0] = '\0';
   return(p);
}

void free_tree(IN_TREEPTR itree)
{
   free_tree_nodes(itree->root);
      
   itree->leafptr=ckfree((void *)itree->leafptr);
}

static void free_tree_nodes(INODEPTR p)
{
   if (p==NULL) return;
   if (p->left != NULL)
     {
       free_tree_nodes(p->left);
     }
   if (p->right != NULL)
     {
       free_tree_nodes(p->right);
     }
   p->left = NULL;
   p->right = NULL;
   p=ckfree((void *)p);   
}

static void set_info(INODEPTR p, INODEPTR parent, sint pleaf, char *pname, float pdist)
{
   p->parent = parent;
   p->leaf = pleaf;
   p->dist = pdist;
   p->order = 0;
   strcpy(p->name, pname);
   if (p->leaf == TRUE)
     {
        p->left = NULL;
        p->right = NULL;
     }
}

/*
Let nodedist[i] be the distance from leaf i to a node.
Calculate the sum of nodedist[i] for each leaf on the left side of the node
and the sum of nodedist[i] for each leaf on the right side of the new root.
Find a new root node such that the difference between the left and right sums
is a minimum.
*/
static INODEPTR reroot(INODEPTR ptree, sint nseqs,INODEPTR *lptr, INODEPTR *ptrs)
{

   INODEPTR p, rootnode, rootptr;
   float   diff, mindiff = 0.0, mindepth = 1.0, maxdist;
   sint   i;

/*
  find the difference between the means of leaf->node
  distances on the left and on the right of each node
*/
   rootptr = ptree;
   for (i=0; (p=ptrs[i])!=NULL; i++)
     {
        if (p->parent == NULL)
           diff = calc_root_mean(p, &maxdist,lptr);
        else
           diff = calc_mean(p, &maxdist, nseqs,lptr);

        if ((diff == 0) || ((diff > 0) && (diff < 2 * p->dist)))
          {
              if ((maxdist < mindepth) || (i==0))
                 {
                    rootptr = p;
                    mindepth = maxdist;
                    mindiff = diff;
                 }
           }

     }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -