📄 calctree.c
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <stdarg.h>#include <ctype.h>#include "clustalw.h"#define MAXERRS 10/* * Prototypes */static void create_tree(treeptr ptree, treeptr parent);static void create_node(treeptr pptr, treeptr parent);static treeptr insert_node(treeptr pptr);static void skip_space(FILE *fd);static treeptr avail(void);static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist);static treeptr reroot(treeptr ptree, sint nseqs);static treeptr insert_root(treeptr p, float diff);static float calc_root_mean(treeptr root, float *maxdist);static float calc_mean(treeptr nptr, float *maxdist, sint nseqs);static void order_nodes(void);static sint calc_weight(sint leaf);static void group_seqs(treeptr p, sint *next_groups, sint nseqs);static void mark_group1(treeptr p, sint *groups, sint n);static void mark_group2(treeptr p, sint *groups, sint n);static void save_set(sint n, sint *groups);static void clear_tree_nodes(treeptr p);/* * Global variables */extern Boolean interactive;extern Boolean distance_tree;extern Boolean usemenu;extern sint debug;extern double **tmat;extern sint **sets;extern sint nsets;extern char **names;extern sint *seq_weight;extern Boolean no_weights;char ch;FILE *fd;treeptr *lptr;treeptr *olptr;treeptr *nptr;treeptr *ptrs;sint nnodes = 0;sint ntotal = 0;Boolean rooted_tree = TRUE;static treeptr seq_tree,root;static sint *groups, numseq;void calc_seq_weights(sint first_seq, sint last_seq, sint *sweight){ sint i, nseqs; sint temp, sum, *weight;/* If there are more than three sequences....*/ nseqs = last_seq-first_seq; if ((nseqs >= 2) && (distance_tree == TRUE) && (no_weights == FALSE)) {/* Calculate sequence weights based on Phylip tree.*/ weight = (sint *)ckalloc((last_seq+1) * sizeof(sint)); for (i=first_seq; i<last_seq; i++) weight[i] = calc_weight(i);/* Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR*/ sum = 0; for (i=first_seq; i<last_seq; i++) sum += weight[i]; if (sum == 0) { for (i=first_seq; i<last_seq; i++) weight[i] = 1; sum = i; } for (i=first_seq; i<last_seq; i++) { sweight[i] = (weight[i] * INT_SCALE_FACTOR) / sum; if (sweight[i] < 1) sweight[i] = 1; } weight=ckfree((void *)weight); } else {/* Otherwise, use identity weights.*/ temp = INT_SCALE_FACTOR / nseqs; for (i=first_seq; i<last_seq; i++) sweight[i] = temp; }}void create_sets(sint first_seq, sint last_seq){ sint i, j, nseqs; nsets = 0; nseqs = last_seq-first_seq; if (nseqs >= 2) {/* If there are more than three sequences....*/ groups = (sint *)ckalloc((nseqs+1) * sizeof(sint)); group_seqs(root, groups, nseqs); groups=ckfree((void *)groups); } else { groups = (sint *)ckalloc((nseqs+1) * sizeof(sint)); for (i=0;i<nseqs-1;i++) { for (j=0;j<nseqs;j++) if (j<=i) groups[j] = 1; else if (j==i+1) groups[j] = 2; else groups[j] = 0; save_set(nseqs, groups); } groups=ckfree((void *)groups); }}sint read_tree(char *treefile, sint first_seq, sint last_seq){ char c; char name1[MAXNAMES+1], name2[MAXNAMES+1]; sint i, j, k; Boolean found; numseq = 0; nnodes = 0; ntotal = 0; rooted_tree = TRUE;#ifdef VMS if ((fd = fopen(treefile,"r","rat=cr","rfm=var")) == NULL)#else if ((fd = fopen(treefile, "r")) == NULL)#endif { error("cannot open %s", treefile); return((sint)0); } skip_space(fd); ch = (char)getc(fd); if (ch != '(') { error("Wrong format in tree file %s", treefile); return((sint)0); } rewind(fd); distance_tree = TRUE;/* Allocate memory for tree*/ nptr = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr)); ptrs = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr)); lptr = (treeptr *)ckalloc((last_seq-first_seq+1) * sizeof(treeptr)); olptr = (treeptr *)ckalloc((last_seq+1) * sizeof(treeptr)); seq_tree = avail(); set_info(seq_tree, NULL, 0, "", 0.0); create_tree(seq_tree,NULL); fclose(fd); if (numseq != last_seq-first_seq) { error("tree not compatible with alignment\n(%d sequences in alignment and %d in tree", (pint)last_seq-first_seq,(pint)numseq); return((sint)0); }/* If the tree is unrooted, reroot the tree - ie. minimise the difference between the mean root->leaf distances for the left and right branches of the tree.*/ if (distance_tree == FALSE) { if (rooted_tree == FALSE) { error("input tree is unrooted and has no distances.\nCannot align sequences"); return((sint)0); } } if (rooted_tree == FALSE) { root = reroot(seq_tree, last_seq-first_seq+1); } else { root = seq_tree; }/* calculate the 'order' of each node.*/ order_nodes(); if (numseq >= 2) {/* If there are more than three sequences....*//* assign the sequence nodes (in the same order as in the alignment file)*/ for (i=first_seq; i<last_seq; i++) { if (strlen(names[i+1]) > MAXNAMES) warning("name %s is too long for PHYLIP tree format (max %d chars)", names[i+1],MAXNAMES); for (k=0; k< strlen(names[i+1]) && k<MAXNAMES ; k++) { c = names[i+1][k]; if ((c>0x40) && (c<0x5b)) c=c | 0x20; if (c == ' ') c = '_'; name2[k] = c; } name2[k]='\0'; found = FALSE; for (j=0; j<numseq; j++) { for (k=0; k< strlen(lptr[j]->name) && k<MAXNAMES ; k++) { c = lptr[j]->name[k]; if ((c>0x40) && (c<0x5b)) c=c | 0x20; name1[k] = c; } name1[k]='\0'; if (strcmp(name1, name2) == 0) { olptr[i] = lptr[j]; found = TRUE; } } if (found == FALSE) { error("tree not compatible with alignment:\n%s not found", name2); return((sint)0); } } } return((sint)1);}static void create_tree(treeptr ptree, treeptr parent){ treeptr p; sint i, type; float dist; char name[MAXNAMES+1];/* is this a node or a leaf ?*/ skip_space(fd); ch = (char)getc(fd); if (ch == '(') { /* this must be a node....*/ type = NODE; name[0] = '\0'; ptrs[ntotal] = nptr[nnodes] = ptree; nnodes++; ntotal++; create_node(ptree, parent); p = ptree->left; create_tree(p, ptree); if ( ch == ',') { p = ptree->right; create_tree(p, ptree); if ( ch == ',') { ptree = insert_node(ptree); ptrs[ntotal] = nptr[nnodes] = ptree; nnodes++; ntotal++; p = ptree->right; create_tree(p, ptree); rooted_tree = FALSE; } } skip_space(fd); ch = (char)getc(fd); }/* ...otherwise, this is a leaf*/ else { type = LEAF; ptrs[ntotal++] = lptr[numseq++] = ptree;/* get the sequence name*/ name[0] = ch; ch = (char)getc(fd); i = 1; while ((ch != ':') && (ch != ',') && (ch != ')')) { if (i < MAXNAMES) name[i++] = ch; ch = (char)getc(fd); } name[i] = '\0'; if (ch != ':') { distance_tree = FALSE; dist = 0.0; } }/* get the distance information*/ dist = 0.0; if (ch == ':') { skip_space(fd); fscanf(fd,"%f",&dist); skip_space(fd); ch = (char)getc(fd); } set_info(ptree, parent, type, name, dist);}static void create_node(treeptr pptr, treeptr parent){ treeptr t; pptr->parent = parent; t = avail(); pptr->left = t; t = avail(); pptr->right = t; }static treeptr insert_node(treeptr pptr){ treeptr newnode; newnode = avail(); create_node(newnode, pptr->parent); newnode->left = pptr; pptr->parent = newnode; set_info(newnode, pptr->parent, NODE, "", 0.0); return(newnode);}static void skip_space(FILE *fd){ int c; do c = getc(fd); while(isspace(c)); ungetc(c, fd);}static treeptr avail(void){ treeptr p; p = ckalloc(sizeof(stree)); p->left = NULL; p->right = NULL; p->parent = NULL; p->dist = 0.0; p->leaf = 0; p->order = 0; p->name[0] = '\0'; return(p);}void clear_tree(treeptr p){ clear_tree_nodes(p); nptr=ckfree((void *)nptr); ptrs=ckfree((void *)ptrs); lptr=ckfree((void *)lptr); olptr=ckfree((void *)olptr);}static void clear_tree_nodes(treeptr p){ if (p==NULL) p = root; if (p->left != NULL) { clear_tree_nodes(p->left); } if (p->right != NULL) { clear_tree_nodes(p->right); } p->left = NULL; p->right = NULL; p=ckfree((void *)p); }static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist){ p->parent = parent; p->leaf = pleaf; p->dist = pdist; p->order = 0; strcpy(p->name, pname); if (p->leaf == TRUE) { p->left = NULL; p->right = NULL; }}static treeptr reroot(treeptr ptree, sint nseqs){ treeptr p, rootnode, rootptr; float diff, mindiff = 0.0, mindepth = 1.0, maxdist; sint i; Boolean first = TRUE;/* find the difference between the means of leaf->node distances on the left and on the right of each node*/ rootptr = ptree; for (i=0; i<ntotal; i++) { p = ptrs[i]; if (p->parent == NULL) diff = calc_root_mean(p, &maxdist); else diff = calc_mean(p, &maxdist, nseqs); if ((diff == 0) || ((diff > 0) && (diff < 2 * p->dist))) { if ((maxdist < mindepth) || (first == TRUE)) { first = FALSE; rootptr = p; mindepth = maxdist; mindiff = diff; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -