📄 readtree.c
字号:
/*
insert a new node as the ancestor of the node which produces the shallowest
tree.
*/
if (rootptr == ptree)
{
mindiff = rootptr->left->dist + rootptr->right->dist;
rootptr = rootptr->right;
}
rootnode = insert_root(rootptr, mindiff);
diff = calc_root_mean(rootnode, &maxdist,lptr);
return(rootnode);
}
static INODEPTR insert_root(INODEPTR p, float diff)
{
INODEPTR newp, prev, q, t;
float dist, prevdist,td;
newp = avail();
t = p->parent;
prevdist = t->dist;
p->parent = newp;
dist = p->dist;
p->dist = diff / 2;
if (p->dist < 0.0) p->dist = 0.0;
if (p->dist > dist) p->dist = dist;
t->dist = dist - p->dist;
newp->left = t;
newp->right = p;
newp->parent = NULL;
newp->dist = 0.0;
newp->leaf = NODE;
if (t->left == p) t->left = t->parent;
else t->right = t->parent;
prev = t;
q = t->parent;
t->parent = newp;
while (q != NULL)
{
if (q->left == prev)
{
q->left = q->parent;
q->parent = prev;
td = q->dist;
q->dist = prevdist;
prevdist = td;
prev = q;
q = q->left;
}
else
{
q->right = q->parent;
q->parent = prev;
td = q->dist;
q->dist = prevdist;
prevdist = td;
prev = q;
q = q->right;
}
}
/*
remove the old root node
*/
q = prev;
if (q->left == NULL)
{
dist = q->dist;
q = q->right;
q->dist += dist;
q->parent = prev->parent;
if (prev->parent->left == prev)
prev->parent->left = q;
else
prev->parent->right = q;
prev->right = NULL;
}
else
{
dist = q->dist;
q = q->left;
q->dist += dist;
q->parent = prev->parent;
if (prev->parent->left == prev)
prev->parent->left = q;
else
prev->parent->right = q;
prev->left = NULL;
}
return(newp);
}
static float calc_root_mean(INODEPTR root, float *maxdist,INODEPTR *lptr)
{
float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
INODEPTR p;
sint i;
sint nl, nr;
sint direction;
/*
for each leaf, determine whether the leaf is left or right of the root.
*/
dist = (*maxdist) = 0;
nl = nr = 0;
for (i=0; (p=lptr[i])!=NULL; i++)
{
dist = 0.0;
while (p->parent != root)
{
dist += p->dist;
p = p->parent;
}
if (p == root->left) direction = LEFT;
else direction = RIGHT;
dist += p->dist;
if (direction == LEFT)
{
lsum += dist;
nl++;
}
else
{
rsum += dist;
nr++;
}
if (dist > (*maxdist)) *maxdist = dist;
}
lmean = lsum / nl;
rmean = rsum / nr;
diff = lmean - rmean;
return(diff);
}
static float calc_mean(INODEPTR nptr, float *maxdist, sint nseqs,INODEPTR *lptr)
{
float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
INODEPTR p, *path2root;
float *dist2node;
sint depth = 0, i, n = 0;
sint nl , nr;
sint direction;
path2root = (INODEPTR *)ckalloc(nseqs * sizeof(INODEPTR));
dist2node = (float *)ckalloc(nseqs * sizeof(float));
nl = nr = 0;
(*maxdist) = 0;
/*
determine all nodes between the selected node and the root;
*/
get_path2root(nptr,path2root,dist2node,&depth);
/*
for each leaf, determine whether the leaf is left or right of the node.
(where RIGHT = descendant, LEFT = not descendant)
*/
for (i=0; (p=lptr[i])!=NULL; i++)
{
if (p == nptr)
{
direction = RIGHT;
dist = 0.0;
}
else
{
direction = LEFT;
/*
find the common ancestor.
*/
n=common_ancestor(p,path2root,depth,&dist);
if (path2root[n] == nptr) direction = RIGHT;
}
if (direction == LEFT)
{
lsum += dist;
lsum += dist2node[n-1];
nl++;
}
else
{
rsum += dist;
nr++;
}
if (dist > (*maxdist)) *maxdist = dist;
}
dist2node=ckfree((void *)dist2node);
path2root=ckfree((void *)path2root);
lmean = lsum / nl;
rmean = rsum / nr;
diff = lmean - rmean;
return(diff);
}
void get_path2root(INODEPTR nptr,INODEPTR *path2root,float *dist2node,sint *depth)
{
float dist;
INODEPTR p;
*depth = dist = 0;
p = nptr;
while (p != NULL) {
path2root[*depth] = p;
dist += p->dist;
dist2node[*depth] = dist;
p = p->parent;
(*depth)++;
}
}
/*
find the intersection of the path from node p to the root, and the path in path2root
depth = size of path2root
dist = dist from the top of path2root and the intersection
*/
sint common_ancestor(INODEPTR p,INODEPTR *path2root,sint depth,float *dist)
{
INODEPTR ancestor;
Boolean found;
sint n,j;
found = FALSE;
n = 0;
ancestor=p;
(*dist) = 0.0;
while ((found == FALSE) && (ancestor->parent != NULL)) {
for (j=0; j< depth; j++)
if (ancestor->parent == path2root[j]) {
found = TRUE;
n = j;
}
(*dist) += ancestor->dist;
ancestor = ancestor->parent;
}
return n;
}
static void order_nodes(INODEPTR *lptr)
{
sint i;
INODEPTR p;
for (i=0; (p=lptr[i])!=NULL; i++)
{
while (p != NULL)
{
p->order++;
p = p->parent;
}
}
}
/*
Calculates a matrix of pairwise sequence similarities from a phylogenetic tree
seqs the sequences
nseqs number of sequences
itree the phylogenetic tree
returns the similarity matrix
*/
double ** calc_similarities(SEQ *seqs,sint nseqs,IN_TREEPTR itree)
{
sint depth = 0, i,j, k, n;
sint found;
sint nerrs, *seq1,*seq2;
INODEPTR p, *path2root;
float dist;
float *dist2node, *bad_dist;
double **tmat;
char err_mess[1024],err1[MAXLINE],reply[MAXLINE];
tmat = (double **) ckalloc( (nseqs+1) * sizeof (double *) );
for(i=0;i<nseqs;i++)
tmat[i] = (double *)ckalloc( (nseqs+1) * sizeof (double) );
for(i=0;i<nseqs;i++)
for(j=0;j<nseqs;j++)
tmat[i][j]=0.0;
path2root = (INODEPTR *)ckalloc((nseqs) * sizeof(INODEPTR));
dist2node = (float *)ckalloc((nseqs) * sizeof(float));
seq1 = (sint *)ckalloc((nseqs) * sizeof(sint));
seq2 = (sint *)ckalloc((nseqs) * sizeof(sint));
bad_dist = (float *)ckalloc((nseqs) * sizeof(float));
if (nseqs < 2) return(NULL);
/*
for each leaf, determine all nodes between the leaf and the root;
*/
for (i = 0;i<nseqs; i++)
{
depth = dist = 0;
p = itree->leafptr[i];
while (p != NULL)
{
path2root[depth] = p;
dist += p->dist;
dist2node[depth] = dist;
p = p->parent;
depth++;
}
/*
for each pair....
*/
for (j=0; j < i; j++)
{
p = itree->leafptr[j];
dist = 0.0;
/*
find the common ancestor.
*/
found = FALSE;
n = 0;
while ((found == FALSE) && (p->parent != NULL))
{
for (k=0; k< depth; k++)
if (p->parent == path2root[k])
{
found = TRUE;
n = k;
}
dist += p->dist;
p = p->parent;
}
tmat[i][j] = dist + dist2node[n-1];
}
}
nerrs = 0;
for (i=0;i<nseqs;i++)
{
tmat[i][i] = 0.0;
for (j=0;j<i;j++)
{
if (tmat[i][j] < 0.01) tmat[i][j] = 0.01;
if (tmat[i][j] > 1.0) {
if (tmat[i][j] > 1.1) {
seq1[nerrs] = i;
seq2[nerrs] = j;
bad_dist[nerrs] = tmat[i][j];
nerrs++;
}
tmat[i][j] = 1.0;
}
}
}
if (nerrs>0)
{
strcpy(err_mess,"The following sequences are too divergent to be aligned:\n");
for (i=0;i<nerrs && i<5;i++)
{
sprintf(err1," %s and %s (distance %1.3f)\n",
seqs[seq1[i]+1].name,
seqs[seq2[i]+1].name,bad_dist[i]);
strcat(err_mess,err1);
}
strcat(err_mess,"(All distances should be between 0.0 and 1.0)\n");
strcat(err_mess,"This may not be fatal but you have been warned!\n");
strcat(err_mess,"SUGGESTION: Remove one or more problem sequences and try again");
(*reply)=prompt_for_yes_no(err_mess,"Continue ");
if ((*reply != 'y') && (*reply != 'Y'))
return(NULL);
}
path2root=ckfree((void *)path2root);
dist2node=ckfree((void *)dist2node);
seq1=ckfree((void *)seq1);
seq2=ckfree((void *)seq2);
bad_dist=ckfree((void *)bad_dist);
for (i=0;i<nseqs;i++)
{
tmat[i][i] = 0.0;
for (j=0;j<i;j++)
{
tmat[i][j] = 100.0 - (tmat[i][j]) * 100.0;
tmat[j][i] = tmat[i][j];
}
}
return(tmat);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -