📄 trees.c
字号:
}
if(single_seq) {
tree_description[start_row][test_col] = 0;
fprintf(tree,"%d",test_col+first_seq-1);
fprintf(tree,":%7.5f)",right_branch[start_row]);
}
else {
for(col=1; col<=last_seq-first_seq+1; col++) {
if((tree_description[start_row][col]==1)&&
(tree_description[new_row][col]==1))
tree_description[start_row][col] = 0;
}
old_row=two_way_split_nexus(tree_description, tree, new_row, (sint)1, bootstrap);
fprintf(tree,":%7.5f",right_branch[start_row]);
if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
fprintf(tree,")");
}
if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0))
fprintf(tree,"%d",(pint)boot_totals[start_row]);
return(start_row);
}
void print_phylip_tree(char **tree_description, FILE *tree, sint bootstrap)
{
sint old_row;
if(last_seq-first_seq+1==2) {
fprintf(tree,"(%s:%7.5f,%s:%7.5f);",names[first_seq],tmat[first_seq][first_seq+1],names[first_seq+1],tmat[first_seq][first_seq+1]);
return;
}
fprintf(tree,"(\n");
old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,1,bootstrap);
fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-2]);
if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
fprintf(tree,",\n");
old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,2,bootstrap);
fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-1]);
if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
fprintf(tree,",\n");
old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,3,bootstrap);
fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1]);
if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
fprintf(tree,")");
if (bootstrap==BS_NODE_LABELS) fprintf(tree,"TRICHOTOMY");
fprintf(tree,";\n");
}
sint two_way_split
(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap)
{
sint row, new_row = 0, old_row, col, test_col = 0;
Boolean single_seq;
if(start_row != last_seq-first_seq+1-2) fprintf(tree,"(\n");
for(col=1; col<=last_seq-first_seq+1; col++) {
if(tree_description[start_row][col] == flag) {
test_col = col;
break;
}
}
single_seq = TRUE;
for(row=start_row-1; row>=1; row--)
if(tree_description[row][test_col] == 1) {
single_seq = FALSE;
new_row = row;
break;
}
if(single_seq) {
tree_description[start_row][test_col] = 0;
fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]);
if(start_row == last_seq-first_seq+1-2) {
return(0);
}
fprintf(tree,":%7.5f,\n",left_branch[start_row]);
}
else {
for(col=1; col<=last_seq-first_seq+1; col++) {
if((tree_description[start_row][col]==1)&&
(tree_description[new_row][col]==1))
tree_description[start_row][col] = 0;
}
old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap);
if(start_row == last_seq-first_seq+1-2) {
return(new_row);
}
fprintf(tree,":%7.5f",left_branch[start_row]);
if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
fprintf(tree,",\n");
}
for(col=1; col<=last_seq-first_seq+1; col++)
if(tree_description[start_row][col] == flag) {
test_col = col;
break;
}
single_seq = TRUE;
new_row = 0;
for(row=start_row-1; row>=1; row--)
if(tree_description[row][test_col] == 1) {
single_seq = FALSE;
new_row = row;
break;
}
if(single_seq) {
tree_description[start_row][test_col] = 0;
fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]);
fprintf(tree,":%7.5f)\n",right_branch[start_row]);
}
else {
for(col=1; col<=last_seq-first_seq+1; col++) {
if((tree_description[start_row][col]==1)&&
(tree_description[new_row][col]==1))
tree_description[start_row][col] = 0;
}
old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap);
fprintf(tree,":%7.5f",right_branch[start_row]);
if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
fprintf(tree,")\n");
}
if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0))
fprintf(tree,"%d",(pint)boot_totals[start_row]);
return(start_row);
}
void print_tree(char **tree_description, FILE *tree, sint *totals)
{
sint row,col;
fprintf(tree,"\n");
for(row=1; row<=last_seq-first_seq+1-3; row++) {
fprintf(tree," \n");
for(col=1; col<=last_seq-first_seq+1; col++) {
if(tree_description[row][col] == 0)
fprintf(tree,"*");
else
fprintf(tree,".");
}
if(totals[row] > 0)
fprintf(tree,"%7d",(pint)totals[row]);
}
fprintf(tree," \n");
for(col=1; col<=last_seq-first_seq+1; col++)
fprintf(tree,"%1d",(pint)tree_description[last_seq-first_seq+1-2][col]);
fprintf(tree,"\n");
}
sint dna_distance_matrix(FILE *tree)
{
sint m,n;
sint j,i;
sint res1, res2;
sint overspill = 0;
double p,q,e,a,b,k;
tree_gap_delete(); /* flag positions with gaps (tree_gaps[i] = 1 ) */
if(verbose) {
fprintf(tree,"\n");
fprintf(tree,"\n DIST = percentage divergence (/100)");
fprintf(tree,"\n p = rate of transition (A <-> G; C <-> T)");
fprintf(tree,"\n q = rate of transversion");
fprintf(tree,"\n Length = number of sites used in comparison");
fprintf(tree,"\n");
if(tossgaps) {
fprintf(tree,"\n All sites with gaps (in any sequence) deleted!");
fprintf(tree,"\n");
}
if(kimura) {
fprintf(tree,"\n Distances corrected by Kimura's 2 parameter model:");
fprintf(tree,"\n\n Kimura, M. (1980)");
fprintf(tree," A simple method for estimating evolutionary ");
fprintf(tree,"rates of base");
fprintf(tree,"\n substitutions through comparative studies of ");
fprintf(tree,"nucleotide sequences.");
fprintf(tree,"\n J. Mol. Evol., 16, 111-120.");
fprintf(tree,"\n\n");
}
}
for(m=1; m<last_seq-first_seq+1; ++m) /* for every pair of sequence */
for(n=m+1; n<=last_seq-first_seq+1; ++n) {
p = q = e = 0.0;
tmat[m][n] = tmat[n][m] = 0.0;
for(i=1; i<=seqlen_array[first_seq]; ++i) {
j = boot_positions[i];
if(tossgaps && (tree_gaps[j] > 0) )
goto skip; /* gap position */
res1 = seq_array[m+first_seq-1][j];
res2 = seq_array[n+first_seq-1][j];
if( (res1 == gap_pos1) || (res1 == gap_pos2) ||
(res2 == gap_pos1) || (res2 == gap_pos2))
goto skip; /* gap in a seq*/
if(!use_ambiguities)
if( is_ambiguity(res1) || is_ambiguity(res2))
goto skip; /* ambiguity code in a seq*/
e = e + 1.0;
if(res1 != res2) {
if(transition(res1,res2))
p = p + 1.0;
else
q = q + 1.0;
}
skip:;
}
/* Kimura's 2 parameter correction for multiple substitutions */
if(!kimura) {
if (e == 0) {
fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n);
k = 0.0;
p = 0.0;
q = 0.0;
}
else {
k = (p+q)/e;
if(p > 0.0)
p = p/e;
else
p = 0.0;
if(q > 0.0)
q = q/e;
else
q = 0.0;
}
tmat[m][n] = tmat[n][m] = k;
if(verbose) /* if screen output */
fprintf(tree,
"%4d vs.%4d: DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n"
,(pint)m,(pint)n,k,p,q,e);
}
else {
if (e == 0) {
fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n);
p = 0.0;
q = 0.0;
}
else {
if(p > 0.0)
p = p/e;
else
p = 0.0;
if(q > 0.0)
q = q/e;
else
q = 0.0;
}
if( ((2.0*p)+q) == 1.0 )
a = 0.0;
else
a = 1.0/(1.0-(2.0*p)-q);
if( q == 0.5 )
b = 0.0;
else
b = 1.0/(1.0-(2.0*q));
/* watch for values going off the scale for the correction. */
if( (a<=0.0) || (b<=0.0) ) {
overspill++;
k = 3.5; /* arbitrary high score */
}
else
k = 0.5*log(a) + 0.25*log(b);
tmat[m][n] = tmat[n][m] = k;
if(verbose) /* if screen output */
fprintf(tree,
"%4d vs.%4d: DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n"
,(pint)m,(pint)n,k,p,q,e);
}
}
return overspill; /* return the number of off-scale values */
}
sint prot_distance_matrix(FILE *tree)
{
sint m,n;
sint j,i;
sint res1, res2;
sint overspill = 0;
double p,e,k, table_entry;
tree_gap_delete(); /* flag positions with gaps (tree_gaps[i] = 1 ) */
if(verbose) {
fprintf(tree,"\n");
fprintf(tree,"\n DIST = percentage divergence (/100)");
fprintf(tree,"\n Length = number of sites used in comparison");
fprintf(tree,"\n\n");
if(tossgaps) {
fprintf(tree,"\n All sites with gaps (in any sequence) deleted");
fprintf(tree,"\n");
}
if(kimura) {
fprintf(tree,"\n Distances up tp 0.75 corrected by Kimura's empirical method:");
fprintf(tree,"\n\n Kimura, M. (1983)");
fprintf(tree," The Neutral Theory of Molecular Evolution.");
fprintf(tree,"\n Page 75. Cambridge University Press, Cambridge, England.");
fprintf(tree,"\n\n");
}
}
for(m=1; m<nseqs; ++m) /* for every pair of sequence */
for(n=m+1; n<=nseqs; ++n) {
p = e = 0.0;
tmat[m][n] = tmat[n][m] = 0.0;
for(i=1; i<=seqlen_array[1]; ++i) {
j = boot_positions[i];
if(tossgaps && (tree_gaps[j] > 0) ) goto skip; /* gap position */
res1 = seq_array[m][j];
res2 = seq_array[n][j];
if( (res1 == gap_pos1) || (res1 == gap_pos2) ||
(res2 == gap_pos1) || (res2 == gap_pos2))
goto skip; /* gap in a seq*/
e = e + 1.0;
if(res1 != res2) p = p + 1.0;
skip:;
}
if(p <= 0.0)
k = 0.0;
else
k = p/e;
/* DES debug */
/* fprintf(stdout,"Seq1=%4d Seq2=%4d k =%7.4f \n",(pint)m,(pint)n,k); */
/* DES debug */
if(kimura) {
if(k < 0.75) { /* use Kimura's formula */
if(k > 0.0) k = - log(1.0 - k - (k * k/5.0) );
}
else {
if(k > 0.930) {
overspill++;
k = 10.0; /* arbitrarily set to 1000% */
}
else {
table_entry = (k*1000.0) - 750.0;
k = (double)dayhoff_pams[(int)table_entry];
k = k/100.0;
}
}
}
tmat[m][n] = tmat[n][m] = k;
if(verbose) /* if screen output */
fprintf(tree,
"%4d vs.%4d DIST = %6.4f; length = %6.0f\n",
(pint)m,(pint)n,k,e);
}
return overspill;
}
void guide_tree(FILE *tree,sint firstseq,sint numseqs)
/*
Routine for producing unrooted NJ trees from seperately aligned
pairwise distances. This produces the GUIDE DENDROGRAMS in
PHYLIP format.
*/
{
static char **standard_tree;
sint i;
float dist;
phylip_phy_tree_file=tree;
verbose = FALSE;
first_seq=firstseq;
last_seq=first_seq+numseqs-1;
if(numseqs==2) {
dist=tmat[firstseq][firstseq+1]/2.0;
fprintf(tree,"(%s:%0.5f,%s:%0.5f);\n",
names[firstseq],dist,names[firstseq+1],dist);
}
else {
standard_tree = (char **) ckalloc( (last_seq-first_seq+2) * sizeof (char *) );
for(i=0; i<last_seq-first_seq+2; i++)
standard_tree[i] = (char *) ckalloc( (last_seq-first_seq+2) * sizeof(char));
nj_tree(standard_tree,clustal_phy_tree_file);
print_phylip_tree(standard_tree,phylip_phy_tree_file,0);
if(left_branch != NULL) left_branch=ckfree((void *)left_branch);
if(right_branch != NULL) right_branch=ckfree((void *)right_branch);
if(tkill != NULL) tkill=ckfree((void *)tkill);
if(av != NULL) av=ckfree((void *)av);
for (i=1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -