📄 calctree.c
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <stdarg.h>
#include <ctype.h>
#include "clustalw.h"
#define MAXERRS 10
/*
* Prototypes
*/
static void create_tree(treeptr ptree, treeptr parent);
static void create_node(treeptr pptr, treeptr parent);
static treeptr insert_node(treeptr pptr);
static void skip_space(FILE *fd);
static treeptr avail(void);
static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist);
static treeptr reroot(treeptr ptree, sint nseqs);
static treeptr insert_root(treeptr p, float diff);
static float calc_root_mean(treeptr root, float *maxdist);
static float calc_mean(treeptr nptr, float *maxdist, sint nseqs);
static void order_nodes(void);
static sint calc_weight(sint leaf);
static void group_seqs(treeptr p, sint *next_groups, sint nseqs);
static void mark_group1(treeptr p, sint *groups, sint n);
static void mark_group2(treeptr p, sint *groups, sint n);
static void save_set(sint n, sint *groups);
static void clear_tree_nodes(treeptr p);
/*
* Global variables
*/
extern Boolean interactive;
extern Boolean distance_tree;
extern Boolean usemenu;
extern sint debug;
extern double **tmat;
extern sint **sets;
extern sint nsets;
extern char **names;
extern sint *seq_weight;
extern Boolean no_weights;
char ch;
FILE *fd;
treeptr *lptr;
treeptr *olptr;
treeptr *nptr;
treeptr *ptrs;
sint nnodes = 0;
sint ntotal = 0;
Boolean rooted_tree = TRUE;
static treeptr seq_tree,root;
static sint *groups, numseq;
void calc_seq_weights(sint first_seq, sint last_seq, sint *sweight)
{
sint i, nseqs;
sint temp, sum, *weight;
/*
If there are more than three sequences....
*/
nseqs = last_seq-first_seq;
if ((nseqs >= 2) && (distance_tree == TRUE) && (no_weights == FALSE))
{
/*
Calculate sequence weights based on Phylip tree.
*/
weight = (sint *)ckalloc((last_seq+1) * sizeof(sint));
for (i=first_seq; i<last_seq; i++)
weight[i] = calc_weight(i);
/*
Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
*/
sum = 0;
for (i=first_seq; i<last_seq; i++)
sum += weight[i];
if (sum == 0)
{
for (i=first_seq; i<last_seq; i++)
weight[i] = 1;
sum = i;
}
for (i=first_seq; i<last_seq; i++)
{
sweight[i] = (weight[i] * INT_SCALE_FACTOR) / sum;
if (sweight[i] < 1) sweight[i] = 1;
}
weight=ckfree((void *)weight);
}
else
{
/*
Otherwise, use identity weights.
*/
temp = INT_SCALE_FACTOR / nseqs;
for (i=first_seq; i<last_seq; i++)
sweight[i] = temp;
}
}
void create_sets(sint first_seq, sint last_seq)
{
sint i, j, nseqs;
nsets = 0;
nseqs = last_seq-first_seq;
if (nseqs >= 2)
{
/*
If there are more than three sequences....
*/
groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));
group_seqs(root, groups, nseqs);
groups=ckfree((void *)groups);
}
else
{
groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));
for (i=0;i<nseqs-1;i++)
{
for (j=0;j<nseqs;j++)
if (j<=i) groups[j] = 1;
else if (j==i+1) groups[j] = 2;
else groups[j] = 0;
save_set(nseqs, groups);
}
groups=ckfree((void *)groups);
}
}
sint read_tree(char *treefile, sint first_seq, sint last_seq)
{
char c;
char name1[MAXNAMES+1], name2[MAXNAMES+1];
sint i, j, k;
Boolean found;
numseq = 0;
nnodes = 0;
ntotal = 0;
rooted_tree = TRUE;
#ifdef VMS
if ((fd = fopen(treefile,"r","rat=cr","rfm=var")) == NULL)
#else
if ((fd = fopen(treefile, "r")) == NULL)
#endif
{
error("cannot open %s", treefile);
return((sint)0);
}
skip_space(fd);
ch = (char)getc(fd);
if (ch != '(')
{
error("Wrong format in tree file %s", treefile);
return((sint)0);
}
rewind(fd);
distance_tree = TRUE;
/*
Allocate memory for tree
*/
nptr = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr));
ptrs = (treeptr *)ckalloc(3*(last_seq-first_seq+1) * sizeof(treeptr));
lptr = (treeptr *)ckalloc((last_seq-first_seq+1) * sizeof(treeptr));
olptr = (treeptr *)ckalloc((last_seq+1) * sizeof(treeptr));
seq_tree = avail();
set_info(seq_tree, NULL, 0, "", 0.0);
create_tree(seq_tree,NULL);
fclose(fd);
if (numseq != last_seq-first_seq)
{
error("tree not compatible with alignment\n(%d sequences in alignment and %d in tree", (pint)last_seq-first_seq,(pint)numseq);
return((sint)0);
}
/*
If the tree is unrooted, reroot the tree - ie. minimise the difference
between the mean root->leaf distances for the left and right branches of
the tree.
*/
if (distance_tree == FALSE)
{
if (rooted_tree == FALSE)
{
error("input tree is unrooted and has no distances.\nCannot align sequences");
return((sint)0);
}
}
if (rooted_tree == FALSE)
{
root = reroot(seq_tree, last_seq-first_seq+1);
}
else
{
root = seq_tree;
}
/*
calculate the 'order' of each node.
*/
order_nodes();
if (numseq >= 2)
{
/*
If there are more than three sequences....
*/
/*
assign the sequence nodes (in the same order as in the alignment file)
*/
for (i=first_seq; i<last_seq; i++)
{
if (strlen(names[i+1]) > MAXNAMES)
warning("name %s is too long for PHYLIP tree format (max %d chars)", names[i+1],MAXNAMES);
for (k=0; k< strlen(names[i+1]) && k<MAXNAMES ; k++)
{
c = names[i+1][k];
if ((c>0x40) && (c<0x5b)) c=c | 0x20;
if (c == ' ') c = '_';
name2[k] = c;
}
name2[k]='\0';
found = FALSE;
for (j=0; j<numseq; j++)
{
for (k=0; k< strlen(lptr[j]->name) && k<MAXNAMES ; k++)
{
c = lptr[j]->name[k];
if ((c>0x40) && (c<0x5b)) c=c | 0x20;
name1[k] = c;
}
name1[k]='\0';
if (strcmp(name1, name2) == 0)
{
olptr[i] = lptr[j];
found = TRUE;
}
}
if (found == FALSE)
{
error("tree not compatible with alignment:\n%s not found", name2);
return((sint)0);
}
}
}
return((sint)1);
}
static void create_tree(treeptr ptree, treeptr parent)
{
treeptr p;
sint i, type;
float dist;
char name[MAXNAMES+1];
/*
is this a node or a leaf ?
*/
skip_space(fd);
ch = (char)getc(fd);
if (ch == '(')
{
/*
this must be a node....
*/
type = NODE;
name[0] = '\0';
ptrs[ntotal] = nptr[nnodes] = ptree;
nnodes++;
ntotal++;
create_node(ptree, parent);
p = ptree->left;
create_tree(p, ptree);
if ( ch == ',')
{
p = ptree->right;
create_tree(p, ptree);
if ( ch == ',')
{
ptree = insert_node(ptree);
ptrs[ntotal] = nptr[nnodes] = ptree;
nnodes++;
ntotal++;
p = ptree->right;
create_tree(p, ptree);
rooted_tree = FALSE;
}
}
skip_space(fd);
ch = (char)getc(fd);
}
/*
...otherwise, this is a leaf
*/
else
{
type = LEAF;
ptrs[ntotal++] = lptr[numseq++] = ptree;
/*
get the sequence name
*/
name[0] = ch;
ch = (char)getc(fd);
i = 1;
while ((ch != ':') && (ch != ',') && (ch != ')'))
{
if (i < MAXNAMES) name[i++] = ch;
ch = (char)getc(fd);
}
name[i] = '\0';
if (ch != ':')
{
distance_tree = FALSE;
dist = 0.0;
}
}
/*
get the distance information
*/
dist = 0.0;
if (ch == ':')
{
skip_space(fd);
fscanf(fd,"%f",&dist);
skip_space(fd);
ch = (char)getc(fd);
}
set_info(ptree, parent, type, name, dist);
}
static void create_node(treeptr pptr, treeptr parent)
{
treeptr t;
pptr->parent = parent;
t = avail();
pptr->left = t;
t = avail();
pptr->right = t;
}
static treeptr insert_node(treeptr pptr)
{
treeptr newnode;
newnode = avail();
create_node(newnode, pptr->parent);
newnode->left = pptr;
pptr->parent = newnode;
set_info(newnode, pptr->parent, NODE, "", 0.0);
return(newnode);
}
static void skip_space(FILE *fd)
{
int c;
do
c = getc(fd);
while(isspace(c));
ungetc(c, fd);
}
static treeptr avail(void)
{
treeptr p;
p = ckalloc(sizeof(stree));
p->left = NULL;
p->right = NULL;
p->parent = NULL;
p->dist = 0.0;
p->leaf = 0;
p->order = 0;
p->name[0] = '\0';
return(p);
}
void clear_tree(treeptr p)
{
clear_tree_nodes(p);
nptr=ckfree((void *)nptr);
ptrs=ckfree((void *)ptrs);
lptr=ckfree((void *)lptr);
olptr=ckfree((void *)olptr);
}
static void clear_tree_nodes(treeptr p)
{
if (p==NULL) p = root;
if (p->left != NULL)
{
clear_tree_nodes(p->left);
}
if (p->right != NULL)
{
clear_tree_nodes(p->right);
}
p->left = NULL;
p->right = NULL;
p=ckfree((void *)p);
}
static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist)
{
p->parent = parent;
p->leaf = pleaf;
p->dist = pdist;
p->order = 0;
strcpy(p->name, pname);
if (p->leaf == TRUE)
{
p->left = NULL;
p->right = NULL;
}
}
static treeptr reroot(treeptr ptree, sint nseqs)
{
treeptr p, rootnode, rootptr;
float diff, mindiff = 0.0, mindepth = 1.0, maxdist;
sint i;
Boolean first = TRUE;
/*
find the difference between the means of leaf->node
distances on the left and on the right of each node
*/
rootptr = ptree;
for (i=0; i<ntotal; i++)
{
p = ptrs[i];
if (p->parent == NULL)
diff = calc_root_mean(p, &maxdist);
else
diff = calc_mean(p, &maxdist, nseqs);
if ((diff == 0) || ((diff > 0) && (diff < 2 * p->dist)))
{
if ((maxdist < mindepth) || (first == TRUE))
{
first = FALSE;
rootptr = p;
mindepth = maxdist;
mindiff = diff;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -