📄 calctree.c
字号:
}
}
/*
insert a new node as the ancestor of the node which produces the shallowest
tree.
*/
if (rootptr == ptree)
{
mindiff = rootptr->left->dist + rootptr->right->dist;
rootptr = rootptr->right;
}
rootnode = insert_root(rootptr, mindiff);
diff = calc_root_mean(rootnode, &maxdist);
return(rootnode);
}
static treeptr insert_root(treeptr p, float diff)
{
treeptr newp, prev, q, t;
float dist, prevdist,td;
newp = avail();
t = p->parent;
prevdist = t->dist;
p->parent = newp;
dist = p->dist;
p->dist = diff / 2;
if (p->dist < 0.0) p->dist = 0.0;
if (p->dist > dist) p->dist = dist;
t->dist = dist - p->dist;
newp->left = t;
newp->right = p;
newp->parent = NULL;
newp->dist = 0.0;
newp->leaf = NODE;
if (t->left == p) t->left = t->parent;
else t->right = t->parent;
prev = t;
q = t->parent;
t->parent = newp;
while (q != NULL)
{
if (q->left == prev)
{
q->left = q->parent;
q->parent = prev;
td = q->dist;
q->dist = prevdist;
prevdist = td;
prev = q;
q = q->left;
}
else
{
q->right = q->parent;
q->parent = prev;
td = q->dist;
q->dist = prevdist;
prevdist = td;
prev = q;
q = q->right;
}
}
/*
remove the old root node
*/
q = prev;
if (q->left == NULL)
{
dist = q->dist;
q = q->right;
q->dist += dist;
q->parent = prev->parent;
if (prev->parent->left == prev)
prev->parent->left = q;
else
prev->parent->right = q;
prev->right = NULL;
}
else
{
dist = q->dist;
q = q->left;
q->dist += dist;
q->parent = prev->parent;
if (prev->parent->left == prev)
prev->parent->left = q;
else
prev->parent->right = q;
prev->left = NULL;
}
return(newp);
}
static float calc_root_mean(treeptr root, float *maxdist)
{
float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
treeptr p;
sint i;
sint nl, nr;
sint direction;
/*
for each leaf, determine whether the leaf is left or right of the root.
*/
dist = (*maxdist) = 0;
nl = nr = 0;
for (i=0; i< numseq; i++)
{
p = lptr[i];
dist = 0.0;
while (p->parent != root)
{
dist += p->dist;
p = p->parent;
}
if (p == root->left) direction = LEFT;
else direction = RIGHT;
dist += p->dist;
if (direction == LEFT)
{
lsum += dist;
nl++;
}
else
{
rsum += dist;
nr++;
}
if (dist > (*maxdist)) *maxdist = dist;
}
lmean = lsum / nl;
rmean = rsum / nr;
diff = lmean - rmean;
return(diff);
}
static float calc_mean(treeptr nptr, float *maxdist, sint nseqs)
{
float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
treeptr p, *path2root;
float *dist2node;
sint depth = 0, i,j , n = 0;
sint nl , nr;
sint direction, found;
path2root = (treeptr *)ckalloc(nseqs * sizeof(treeptr));
dist2node = (float *)ckalloc(nseqs * sizeof(float));
/*
determine all nodes between the selected node and the root;
*/
depth = (*maxdist) = dist = 0;
nl = nr = 0;
p = nptr;
while (p != NULL)
{
path2root[depth] = p;
dist += p->dist;
dist2node[depth] = dist;
p = p->parent;
depth++;
}
/*
*nl = *nr = 0;
for each leaf, determine whether the leaf is left or right of the node.
(RIGHT = descendant, LEFT = not descendant)
*/
for (i=0; i< numseq; i++)
{
p = lptr[i];
if (p == nptr)
{
direction = RIGHT;
dist = 0.0;
}
else
{
direction = LEFT;
dist = 0.0;
/*
find the common ancestor.
*/
found = FALSE;
n = 0;
while ((found == FALSE) && (p->parent != NULL))
{
for (j=0; j< depth; j++)
if (p->parent == path2root[j])
{
found = TRUE;
n = j;
}
dist += p->dist;
p = p->parent;
}
if (p == nptr) direction = RIGHT;
}
if (direction == LEFT)
{
lsum += dist;
lsum += dist2node[n-1];
nl++;
}
else
{
rsum += dist;
nr++;
}
if (dist > (*maxdist)) *maxdist = dist;
}
dist2node=ckfree((void *)dist2node);
path2root=ckfree((void *)path2root);
lmean = lsum / nl;
rmean = rsum / nr;
diff = lmean - rmean;
return(diff);
}
static void order_nodes(void)
{
sint i;
treeptr p;
for (i=0; i<numseq; i++)
{
p = lptr[i];
while (p != NULL)
{
p->order++;
p = p->parent;
}
}
}
static sint calc_weight(sint leaf)
{
treeptr p;
float weight = 0.0;
p = olptr[leaf];
while (p->parent != NULL)
{
weight += p->dist / p->order;
p = p->parent;
}
weight *= 100.0;
return((sint)weight);
}
static void group_seqs(treeptr p, sint *next_groups, sint nseqs)
{
sint i;
sint *tmp_groups;
tmp_groups = (sint *)ckalloc((nseqs+1) * sizeof(sint));
for (i=0;i<nseqs;i++)
tmp_groups[i] = 0;
if (p->left != NULL)
{
if (p->left->leaf == NODE)
{
group_seqs(p->left, next_groups, nseqs);
for (i=0;i<nseqs;i++)
if (next_groups[i] != 0) tmp_groups[i] = 1;
}
else
{
mark_group1(p->left, tmp_groups, nseqs);
}
}
if (p->right != NULL)
{
if (p->right->leaf == NODE)
{
group_seqs(p->right, next_groups, nseqs);
for (i=0;i<nseqs;i++)
if (next_groups[i] != 0) tmp_groups[i] = 2;
}
else
{
mark_group2(p->right, tmp_groups, nseqs);
}
save_set(nseqs, tmp_groups);
}
for (i=0;i<nseqs;i++)
next_groups[i] = tmp_groups[i];
tmp_groups=ckfree((void *)tmp_groups);
}
static void mark_group1(treeptr p, sint *groups, sint n)
{
sint i;
for (i=0;i<n;i++)
{
if (olptr[i] == p)
groups[i] = 1;
else
groups[i] = 0;
}
}
static void mark_group2(treeptr p, sint *groups, sint n)
{
sint i;
for (i=0;i<n;i++)
{
if (olptr[i] == p)
groups[i] = 2;
else if (groups[i] != 0)
groups[i] = 1;
}
}
static void save_set(sint n, sint *groups)
{
sint i;
for (i=0;i<n;i++)
sets[nsets+1][i+1] = groups[i];
nsets++;
}
sint calc_similarities(sint nseqs)
{
sint depth = 0, i,j, k, n;
sint found;
sint nerrs, seq1[MAXERRS],seq2[MAXERRS];
treeptr p, *path2root;
float dist;
float *dist2node, bad_dist[MAXERRS];
double **dmat;
char err_mess[1024],err1[MAXLINE],reply[MAXLINE];
path2root = (treeptr *)ckalloc((nseqs) * sizeof(treeptr));
dist2node = (float *)ckalloc((nseqs) * sizeof(float));
dmat = (double **)ckalloc((nseqs) * sizeof(double *));
for (i=0;i<nseqs;i++)
dmat[i] = (double *)ckalloc((nseqs) * sizeof(double));
if (nseqs >= 2)
{
/*
for each leaf, determine all nodes between the leaf and the root;
*/
for (i = 0;i<nseqs; i++)
{
depth = dist = 0;
p = olptr[i];
while (p != NULL)
{
path2root[depth] = p;
dist += p->dist;
dist2node[depth] = dist;
p = p->parent;
depth++;
}
/*
for each pair....
*/
for (j=0; j < i; j++)
{
p = olptr[j];
dist = 0.0;
/*
find the common ancestor.
*/
found = FALSE;
n = 0;
while ((found == FALSE) && (p->parent != NULL))
{
for (k=0; k< depth; k++)
if (p->parent == path2root[k])
{
found = TRUE;
n = k;
}
dist += p->dist;
p = p->parent;
}
dmat[i][j] = dist + dist2node[n-1];
}
}
nerrs = 0;
for (i=0;i<nseqs;i++)
{
dmat[i][i] = 0.0;
for (j=0;j<i;j++)
{
if (dmat[i][j] < 0.01) dmat[i][j] = 0.01;
if (dmat[i][j] > 1.0) {
if (dmat[i][j] > 1.1 && nerrs<MAXERRS) {
seq1[nerrs] = i;
seq2[nerrs] = j;
bad_dist[nerrs] = dmat[i][j];
nerrs++;
}
dmat[i][j] = 1.0;
}
}
}
if (nerrs>0)
{
strcpy(err_mess,"The following sequences are too divergent to be aligned:\n");
for (i=0;i<nerrs && i<5;i++)
{
sprintf(err1," %s and %s (distance %1.3f)\n",
names[seq1[i]+1],
names[seq2[i]+1],bad_dist[i]);
strcat(err_mess,err1);
}
strcat(err_mess,"(All distances should be between 0.0 and 1.0)\n");
strcat(err_mess,"This may not be fatal but you have been warned!\n");
strcat(err_mess,"SUGGESTION: Remove one or more problem sequences and try again");
if(interactive)
(*reply)=prompt_for_yes_no(err_mess,"Continue ");
else (*reply) = 'y';
if ((*reply != 'y') && (*reply != 'Y'))
return((sint)0);
}
}
else
{
for (i=0;i<nseqs;i++)
{
for (j=0;j<i;j++)
{
dmat[i][j] = tmat[i+1][j+1];
}
}
}
path2root=ckfree((void *)path2root);
dist2node=ckfree((void *)dist2node);
for (i=0;i<nseqs;i++)
{
tmat[i+1][i+1] = 0.0;
for (j=0;j<i;j++)
{
tmat[i+1][j+1] = 100.0 - (dmat[i][j]) * 100.0;
tmat[j+1][i+1] = tmat[i+1][j+1];
}
}
for (i=0;i<nseqs;i++) dmat[i]=ckfree((void *)dmat[i]);
dmat=ckfree((void *)dmat);
return((sint)1);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -