⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 calctree.c

📁 生物序列比对程序clustw的源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <stdarg.h>#include <ctype.h>#include "clustalw.h"#define MAXERRS 10/* *   Prototypes */static void create_tree(treeptr ptree, treeptr parent);static void create_node(treeptr pptr, treeptr parent);static treeptr insert_node(treeptr pptr);static void skip_space(FILE *fd);static treeptr avail(void);static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist);static treeptr reroot(treeptr ptree, sint nseqs);static treeptr insert_root(treeptr p, float diff);static float calc_root_mean(treeptr root, float *maxdist);static float calc_mean(treeptr nptr, float *maxdist, sint nseqs);static void order_nodes(void);static sint calc_weight(sint leaf);static void group_seqs(treeptr p, sint *next_groups, sint nseqs);static void mark_group1(treeptr p, sint *groups, sint n);static void mark_group2(treeptr p, sint *groups, sint n);static void save_set(sint n, sint *groups);static void clear_tree_nodes(treeptr p);/* *   Global variables */extern Boolean interactive;extern Boolean distance_tree;extern Boolean usemenu;extern sint debug;extern double **tmat;extern sint **sets;extern sint nsets;extern char **names;extern sint *seq_weight;extern Boolean no_weights;char ch;FILE *fd;treeptr *lptr;treeptr *olptr;treeptr *nptr;treeptr *ptrs;sint nnodes = 0;sint ntotal = 0;Boolean rooted_tree = TRUE;static treeptr seq_tree,root;static sint *groups, numseq;void calc_seq_weights(sint first_seq, sint last_seq, sint *sweight){  sint   i, nseqs;  sint   temp, sum, *weight;/*  If there are more than three sequences....*/  nseqs = last_seq-first_seq;   if ((nseqs >= 2) && (distance_tree == TRUE) && (no_weights == FALSE))     {/*  Calculate sequence weights based on Phylip tree.*/      weight = (sint *)ckalloc((last_seq+1) * sizeof(sint));      for (i=first_seq; i<last_seq; i++)           weight[i] = calc_weight(i);/*  Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR*/         sum = 0;         for (i=first_seq; i<last_seq; i++)            sum += weight[i];         if (sum == 0)          {            for (i=first_seq; i<last_seq; i++)               weight[i] = 1;            sum = i;          }         for (i=first_seq; i<last_seq; i++)           {              sweight[i] = (weight[i] * INT_SCALE_FACTOR) / sum;              if (sweight[i] < 1) sweight[i] = 1;           }       weight=ckfree((void *)weight);     }   else     {/*  Otherwise, use identity weights.*/        temp = INT_SCALE_FACTOR / nseqs;        for (i=first_seq; i<last_seq; i++)           sweight[i] = temp;     }}void create_sets(sint first_seq, sint last_seq){  sint   i, j, nseqs;  nsets = 0;  nseqs = last_seq-first_seq;  if (nseqs >= 2)     {/*  If there are more than three sequences....*/       groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));       group_seqs(root, groups, nseqs);       groups=ckfree((void *)groups);     }   else     {       groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));       for (i=0;i<nseqs-1;i++)         {           for (j=0;j<nseqs;j++)              if (j<=i) groups[j] = 1;              else if (j==i+1) groups[j] = 2;              else groups[j] = 0;           save_set(nseqs, groups);         }       groups=ckfree((void *)groups);     }}sint read_tree(char *treefile, sint first_seq, sint last_seq){  char c;  char name1[MAXNAMES+1], name2[MAXNAMES+1];  sint i, j, k;  Boolean found;  numseq = 0;  nnodes = 0;  ntotal = 0;  rooted_tree = TRUE;#ifdef VMS  if ((fd = fopen(treefile,"r","rat=cr","rfm=var")) == NULL)#else  if ((fd = fopen(treefile, "r")) == NULL)#endif    {      error("cannot open %s", treefile);      return((sint)0);    }  skip_space(fd);  ch = (char)getc(fd);  if (ch != '(')    {      error("Wrong format in tree file %s", treefile);      return((sint)0);    }  rewind(fd);  distance_tree = TRUE;/*  Allocate memory for tree*/  nptr = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr));  ptrs = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr));  lptr = (treeptr *)ckalloc((last_seq-first_seq+1) * sizeof(treeptr));  olptr = (treeptr *)ckalloc((last_seq+1) * sizeof(treeptr));    seq_tree = avail();  set_info(seq_tree, NULL, 0, "", 0.0);  create_tree(seq_tree,NULL);  fclose(fd);  if (numseq != last_seq-first_seq)     {         error("tree not compatible with alignment\n(%d sequences in alignment and %d in tree", (pint)last_seq-first_seq,(pint)numseq);         return((sint)0);     }/*  If the tree is unrooted, reroot the tree - ie. minimise the difference  between the mean root->leaf distances for the left and right branches of  the tree.*/  if (distance_tree == FALSE)     {  	if (rooted_tree == FALSE)          {       	     error("input tree is unrooted and has no distances.\nCannot align sequences");             return((sint)0);          }     }  if (rooted_tree == FALSE)     {        root = reroot(seq_tree, last_seq-first_seq+1);     }  else     {        root = seq_tree;     }/*  calculate the 'order' of each node.*/  order_nodes();  if (numseq >= 2)     {/*  If there are more than three sequences....*//*  assign the sequence nodes (in the same order as in the alignment file)*/      for (i=first_seq; i<last_seq; i++)       {         if (strlen(names[i+1]) > MAXNAMES)             warning("name %s is too long for PHYLIP tree format (max %d chars)", names[i+1],MAXNAMES);         for (k=0; k< strlen(names[i+1]) && k<MAXNAMES ; k++)           {             c = names[i+1][k];             if ((c>0x40) && (c<0x5b)) c=c | 0x20;             if (c == ' ') c = '_';             name2[k] = c;           }         name2[k]='\0';         found = FALSE;         for (j=0; j<numseq; j++)           {            for (k=0; k< strlen(lptr[j]->name) && k<MAXNAMES ; k++)              {                c = lptr[j]->name[k];                if ((c>0x40) && (c<0x5b)) c=c | 0x20;                name1[k] = c;              }            name1[k]='\0';            if (strcmp(name1, name2) == 0)              {                olptr[i] = lptr[j];                found = TRUE;              }           }         if (found == FALSE)           {             error("tree not compatible with alignment:\n%s not found", name2);             return((sint)0);           }       }     }   return((sint)1);}static void create_tree(treeptr ptree, treeptr parent){   treeptr p;   sint i, type;   float dist;   char name[MAXNAMES+1];/*  is this a node or a leaf ?*/  skip_space(fd);  ch = (char)getc(fd);  if (ch == '(')    {  /*   this must be a node....*/      type = NODE;      name[0] = '\0';      ptrs[ntotal] = nptr[nnodes] = ptree;      nnodes++;      ntotal++;      create_node(ptree, parent);      p = ptree->left;      create_tree(p, ptree);                 if ( ch == ',')       {          p = ptree->right;          create_tree(p, ptree);          if ( ch == ',')            {               ptree = insert_node(ptree);               ptrs[ntotal] = nptr[nnodes] = ptree;               nnodes++;               ntotal++;               p = ptree->right;               create_tree(p, ptree);               rooted_tree = FALSE;            }       }      skip_space(fd);      ch = (char)getc(fd);    }/*   ...otherwise, this is a leaf*/  else    {      type = LEAF;      ptrs[ntotal++] = lptr[numseq++] = ptree;/*   get the sequence name*/      name[0] = ch;      ch = (char)getc(fd);      i = 1;      while ((ch != ':') && (ch != ',') && (ch != ')'))        {          if (i < MAXNAMES) name[i++] = ch;          ch = (char)getc(fd);        }      name[i] = '\0';      if (ch != ':')         {           distance_tree = FALSE;           dist = 0.0;         }    }/*   get the distance information*/  dist = 0.0;  if (ch == ':')     {       skip_space(fd);       fscanf(fd,"%f",&dist);       skip_space(fd);       ch = (char)getc(fd);     }   set_info(ptree, parent, type, name, dist);}static void create_node(treeptr pptr, treeptr parent){  treeptr t;  pptr->parent = parent;  t = avail();  pptr->left = t;  t = avail();  pptr->right = t;    }static treeptr insert_node(treeptr pptr){   treeptr newnode;   newnode = avail();   create_node(newnode, pptr->parent);   newnode->left = pptr;   pptr->parent = newnode;   set_info(newnode, pptr->parent, NODE, "", 0.0);   return(newnode);}static void skip_space(FILE *fd){  int   c;    do     c = getc(fd);  while(isspace(c));  ungetc(c, fd);}static treeptr avail(void){   treeptr p;   p = ckalloc(sizeof(stree));   p->left = NULL;   p->right = NULL;   p->parent = NULL;   p->dist = 0.0;   p->leaf = 0;   p->order = 0;   p->name[0] = '\0';   return(p);}void clear_tree(treeptr p){   clear_tree_nodes(p);         nptr=ckfree((void *)nptr);   ptrs=ckfree((void *)ptrs);   lptr=ckfree((void *)lptr);   olptr=ckfree((void *)olptr);}static void clear_tree_nodes(treeptr p){   if (p==NULL) p = root;   if (p->left != NULL)     {       clear_tree_nodes(p->left);     }   if (p->right != NULL)     {       clear_tree_nodes(p->right);     }   p->left = NULL;   p->right = NULL;   p=ckfree((void *)p);   }static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist){   p->parent = parent;   p->leaf = pleaf;   p->dist = pdist;   p->order = 0;   strcpy(p->name, pname);   if (p->leaf == TRUE)     {        p->left = NULL;        p->right = NULL;     }}static treeptr reroot(treeptr ptree, sint nseqs){   treeptr p, rootnode, rootptr;   float   diff, mindiff = 0.0, mindepth = 1.0, maxdist;   sint   i;   Boolean first = TRUE;/*  find the difference between the means of leaf->node  distances on the left and on the right of each node*/   rootptr = ptree;   for (i=0; i<ntotal; i++)     {        p = ptrs[i];        if (p->parent == NULL)           diff = calc_root_mean(p, &maxdist);        else           diff = calc_mean(p, &maxdist, nseqs);        if ((diff == 0) || ((diff > 0) && (diff < 2 * p->dist)))          {              if ((maxdist < mindepth) || (first == TRUE))                 {                    first = FALSE;                    rootptr = p;                    mindepth = maxdist;                    mindiff = diff;                 }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -