⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 calctree.c

📁 clustalw1.83.DOS.ZIP,用于多序列比对的软件
💻 C
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <stdarg.h>
#include <ctype.h>
#include "clustalw.h"

#define MAXERRS 10

/*
 *   Prototypes
 */
static void create_tree(treeptr ptree, treeptr parent);
static void create_node(treeptr pptr, treeptr parent);
static treeptr insert_node(treeptr pptr);
static void skip_space(FILE *fd);
static treeptr avail(void);
static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist);
static treeptr reroot(treeptr ptree, sint nseqs);
static treeptr insert_root(treeptr p, float diff);
static float calc_root_mean(treeptr root, float *maxdist);
static float calc_mean(treeptr nptr, float *maxdist, sint nseqs);
static void order_nodes(void);
static sint calc_weight(sint leaf);
static void group_seqs(treeptr p, sint *next_groups, sint nseqs);
static void mark_group1(treeptr p, sint *groups, sint n);
static void mark_group2(treeptr p, sint *groups, sint n);
static void save_set(sint n, sint *groups);
static void clear_tree_nodes(treeptr p);


/*
 *   Global variables
 */
extern Boolean interactive;
extern Boolean distance_tree;
extern Boolean usemenu;
extern sint debug;
extern double **tmat;
extern sint **sets;
extern sint nsets;
extern char **names;
extern sint *seq_weight;
extern Boolean no_weights;

char ch;
FILE *fd;
treeptr *lptr;
treeptr *olptr;
treeptr *nptr;
treeptr *ptrs;
sint nnodes = 0;
sint ntotal = 0;
Boolean rooted_tree = TRUE;
static treeptr seq_tree,root;
static sint *groups, numseq;

void calc_seq_weights(sint first_seq, sint last_seq, sint *sweight)
{
  sint   i, nseqs;
  sint   temp, sum, *weight;


/*
  If there are more than three sequences....
*/
  nseqs = last_seq-first_seq;
   if ((nseqs >= 2) && (distance_tree == TRUE) && (no_weights == FALSE))
     {
/*
  Calculate sequence weights based on Phylip tree.
*/
      weight = (sint *)ckalloc((last_seq+1) * sizeof(sint));

      for (i=first_seq; i<last_seq; i++)
           weight[i] = calc_weight(i);

/*
  Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
*/

         sum = 0;
         for (i=first_seq; i<last_seq; i++)
            sum += weight[i];

         if (sum == 0)
          {
            for (i=first_seq; i<last_seq; i++)
               weight[i] = 1;
            sum = i;
          }

         for (i=first_seq; i<last_seq; i++)
           {
              sweight[i] = (weight[i] * INT_SCALE_FACTOR) / sum;
              if (sweight[i] < 1) sweight[i] = 1;
           }

       weight=ckfree((void *)weight);

     }

   else
     {
/*
  Otherwise, use identity weights.
*/
        temp = INT_SCALE_FACTOR / nseqs;
        for (i=first_seq; i<last_seq; i++)
           sweight[i] = temp;
     }

}

void create_sets(sint first_seq, sint last_seq)
{
  sint   i, j, nseqs;

  nsets = 0;
  nseqs = last_seq-first_seq;
  if (nseqs >= 2)
     {
/*
  If there are more than three sequences....
*/
       groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));
       group_seqs(root, groups, nseqs);
       groups=ckfree((void *)groups);

     }

   else
     {
       groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));
       for (i=0;i<nseqs-1;i++)
         {
           for (j=0;j<nseqs;j++)
              if (j<=i) groups[j] = 1;
              else if (j==i+1) groups[j] = 2;
              else groups[j] = 0;
           save_set(nseqs, groups);
         }
       groups=ckfree((void *)groups);
     }

}

sint read_tree(char *treefile, sint first_seq, sint last_seq)
{

  char c;
  char name1[MAXNAMES+1], name2[MAXNAMES+1];
  sint i, j, k;
  Boolean found;

  numseq = 0;
  nnodes = 0;
  ntotal = 0;
  rooted_tree = TRUE;

#ifdef VMS
  if ((fd = fopen(treefile,"r","rat=cr","rfm=var")) == NULL)
#else
  if ((fd = fopen(treefile, "r")) == NULL)
#endif
    {
      error("cannot open %s", treefile);
      return((sint)0);
    }

  skip_space(fd);
  ch = (char)getc(fd);
  if (ch != '(')
    {
      error("Wrong format in tree file %s", treefile);
      return((sint)0);
    }
  rewind(fd);

  distance_tree = TRUE;

/*
  Allocate memory for tree
*/
  nptr = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr));
  ptrs = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr));
  lptr = (treeptr *)ckalloc((last_seq-first_seq+1) * sizeof(treeptr));
  olptr = (treeptr *)ckalloc((last_seq+1) * sizeof(treeptr));
  
  seq_tree = avail();
  set_info(seq_tree, NULL, 0, "", 0.0);

  create_tree(seq_tree,NULL);
  fclose(fd);


  if (numseq != last_seq-first_seq)
     {
         error("tree not compatible with alignment\n(%d sequences in alignment and %d in tree", (pint)last_seq-first_seq,(pint)numseq);
         return((sint)0);
     }

/*
  If the tree is unrooted, reroot the tree - ie. minimise the difference
  between the mean root->leaf distances for the left and right branches of
  the tree.
*/

  if (distance_tree == FALSE)
     {
  	if (rooted_tree == FALSE)
          {
       	     error("input tree is unrooted and has no distances.\nCannot align sequences");
             return((sint)0);
          }
     }

  if (rooted_tree == FALSE)
     {
        root = reroot(seq_tree, last_seq-first_seq+1);
     }
  else
     {
        root = seq_tree;
     }

/*
  calculate the 'order' of each node.
*/
  order_nodes();

  if (numseq >= 2)
     {
/*
  If there are more than three sequences....
*/
/*
  assign the sequence nodes (in the same order as in the alignment file)
*/
      for (i=first_seq; i<last_seq; i++)
       {
         if (strlen(names[i+1]) > MAXNAMES)
             warning("name %s is too long for PHYLIP tree format (max %d chars)", names[i+1],MAXNAMES);

         for (k=0; k< strlen(names[i+1]) && k<MAXNAMES ; k++)
           {
             c = names[i+1][k];
             if ((c>0x40) && (c<0x5b)) c=c | 0x20;
             if (c == ' ') c = '_';
             name2[k] = c;
           }
         name2[k]='\0';
         found = FALSE;
         for (j=0; j<numseq; j++)
           {
            for (k=0; k< strlen(lptr[j]->name) && k<MAXNAMES ; k++)
              {
                c = lptr[j]->name[k];
                if ((c>0x40) && (c<0x5b)) c=c | 0x20;
                name1[k] = c;
              }
            name1[k]='\0';
            if (strcmp(name1, name2) == 0)
              {
                olptr[i] = lptr[j];
                found = TRUE;
              }
           }
         if (found == FALSE)
           {
             error("tree not compatible with alignment:\n%s not found", name2);
             return((sint)0);
           }
       }

     }
   return((sint)1);
}

static void create_tree(treeptr ptree, treeptr parent)
{
   treeptr p;

   sint i, type;
   float dist;
   char name[MAXNAMES+1];

/*
  is this a node or a leaf ?
*/
  skip_space(fd);
  ch = (char)getc(fd);
  if (ch == '(')
    {  
/*
   this must be a node....
*/
      type = NODE;
      name[0] = '\0';
      ptrs[ntotal] = nptr[nnodes] = ptree;
      nnodes++;
      ntotal++;

      create_node(ptree, parent);

      p = ptree->left;
      create_tree(p, ptree);
           
      if ( ch == ',')
       {
          p = ptree->right;
          create_tree(p, ptree);
          if ( ch == ',')
            {
               ptree = insert_node(ptree);
               ptrs[ntotal] = nptr[nnodes] = ptree;
               nnodes++;
               ntotal++;
               p = ptree->right;
               create_tree(p, ptree);
               rooted_tree = FALSE;
            }
       }

      skip_space(fd);
      ch = (char)getc(fd);
    }
/*
   ...otherwise, this is a leaf
*/
  else
    {
      type = LEAF;
      ptrs[ntotal++] = lptr[numseq++] = ptree;
/*
   get the sequence name
*/
      name[0] = ch;
      ch = (char)getc(fd);
      i = 1;
      while ((ch != ':') && (ch != ',') && (ch != ')'))
        {
          if (i < MAXNAMES) name[i++] = ch;
          ch = (char)getc(fd);
        }
      name[i] = '\0';
      if (ch != ':')
         {
           distance_tree = FALSE;
           dist = 0.0;
         }
    }

/*
   get the distance information
*/
  dist = 0.0;
  if (ch == ':')
     {
       skip_space(fd);
       fscanf(fd,"%f",&dist);
       skip_space(fd);
       ch = (char)getc(fd);
     }
   set_info(ptree, parent, type, name, dist);


}

static void create_node(treeptr pptr, treeptr parent)
{
  treeptr t;

  pptr->parent = parent;
  t = avail();
  pptr->left = t;
  t = avail();
  pptr->right = t;
    
}

static treeptr insert_node(treeptr pptr)
{

   treeptr newnode;

   newnode = avail();
   create_node(newnode, pptr->parent);

   newnode->left = pptr;
   pptr->parent = newnode;

   set_info(newnode, pptr->parent, NODE, "", 0.0);

   return(newnode);
}

static void skip_space(FILE *fd)
{
  int   c;
  
  do
     c = getc(fd);
  while(isspace(c));

  ungetc(c, fd);
}

static treeptr avail(void)
{
   treeptr p;
   p = ckalloc(sizeof(stree));
   p->left = NULL;
   p->right = NULL;
   p->parent = NULL;
   p->dist = 0.0;
   p->leaf = 0;
   p->order = 0;
   p->name[0] = '\0';
   return(p);
}

void clear_tree(treeptr p)
{
   clear_tree_nodes(p);
      
   nptr=ckfree((void *)nptr);
   ptrs=ckfree((void *)ptrs);
   lptr=ckfree((void *)lptr);
   olptr=ckfree((void *)olptr);
}

static void clear_tree_nodes(treeptr p)
{
   if (p==NULL) p = root;
   if (p->left != NULL)
     {
       clear_tree_nodes(p->left);
     }
   if (p->right != NULL)
     {
       clear_tree_nodes(p->right);
     }
   p->left = NULL;
   p->right = NULL;
   p=ckfree((void *)p);   
}

static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist)
{
   p->parent = parent;
   p->leaf = pleaf;
   p->dist = pdist;
   p->order = 0;
   strcpy(p->name, pname);
   if (p->leaf == TRUE)
     {
        p->left = NULL;
        p->right = NULL;
     }
}

static treeptr reroot(treeptr ptree, sint nseqs)
{

   treeptr p, rootnode, rootptr;
   float   diff, mindiff = 0.0, mindepth = 1.0, maxdist;
   sint   i;
   Boolean first = TRUE;

/*
  find the difference between the means of leaf->node
  distances on the left and on the right of each node
*/
   rootptr = ptree;
   for (i=0; i<ntotal; i++)
     {
        p = ptrs[i];
        if (p->parent == NULL)
           diff = calc_root_mean(p, &maxdist);
        else
           diff = calc_mean(p, &maxdist, nseqs);

        if ((diff == 0) || ((diff > 0) && (diff < 2 * p->dist)))
          {
              if ((maxdist < mindepth) || (first == TRUE))
                 {
                    first = FALSE;
                    rootptr = p;
                    mindepth = maxdist;
                    mindiff = diff;
                 }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -