📄 trees.c
字号:
/*
Here is where the -0.00005 branch lengths come from for 3 or more
identical seqs.
*/
/* if(dmin <= 0.0) dmin = 0.0001; */
if(dmin <= 0.0) dmin = 0.000001;
av[mini] = dmin * 0.5;
/*........................Re-initialisation................................*/
fnseqs = fnseqs - 1.0;
tkill[minj] = 1;
for(j=1; j<=last_seq-first_seq+1; ++j)
if( tkill[j] != 1 ) {
da = ( tmat[mini][j] + tmat[minj][j] ) * 0.5;
if( (mini - j) < 0 )
tmat[mini][j] = da;
if( (mini - j) > 0)
tmat[j][mini] = da;
}
for(j=1; j<=last_seq-first_seq+1; ++j)
tmat[minj][j] = tmat[j][minj] = 0.0;
/****/ } /**end main cycle**/
/******************************Last Cycle (3 Seqs. left)********************/
nude = 1;
for(i=1; i<=last_seq-first_seq+1; ++i)
if( tkill[i] != 1 ) {
l[nude] = i;
nude = nude + 1;
}
b1 = (tmat[l[1]][l[2]] + tmat[l[1]][l[3]] - tmat[l[2]][l[3]]) * 0.5;
b2 = tmat[l[1]][l[2]] - b1;
b3 = tmat[l[1]][l[3]] - b1;
branch[1] = b1 - av[l[1]];
branch[2] = b2 - av[l[2]];
branch[3] = b3 - av[l[3]];
/* Reset tiny negative and positive branch lengths to zero */
if( fabs(branch[1]) < 0.0001) branch[1] = 0.0;
if( fabs(branch[2]) < 0.0001) branch[2] = 0.0;
if( fabs(branch[3]) < 0.0001) branch[3] = 0.0;
left_branch[last_seq-first_seq+1-2] = branch[1];
left_branch[last_seq-first_seq+1-1] = branch[2];
left_branch[last_seq-first_seq+1] = branch[3];
for(i=1; i<=last_seq-first_seq+1; i++)
tree_description[last_seq-first_seq+1-2][i] = 0;
if(verbose)
fprintf(tree,"\n Cycle%4d (Last cycle, trichotomy):\n",(pint)nc);
for(i=1; i<=3; ++i) {
if( av[l[i]] > 0.0) {
if(verbose)
fprintf(tree,"\n\t\t Node:%4d (%9.5f) ",(pint)l[i],branch[i]);
for(k=last_seq-first_seq+1-3; k>=1; k--)
if(tree_description[k][l[i]] == 1) {
for(j=1; j<=last_seq-first_seq+1; j++)
if(tree_description[k][j] == 1)
tree_description[last_seq-first_seq+1-2][j] = i;
break;
}
}
else {
if(verbose)
fprintf(tree,"\n\t\t SEQ:%4d (%9.5f) ",(pint)l[i],branch[i]);
tree_description[last_seq-first_seq+1-2][l[i]] = i;
}
if(i < 3) {
if(verbose)
fprintf(tree,"joins");
}
}
if(verbose)
fprintf(tree,"\n");
}
void bootstrap_tree(char *phylip_name,char *clustal_name, char *nexus_name)
{
sint i,j;
int ranno;
char path[MAXLINE+1];
char dummy[10];
char err_mess[1024];
static char **sample_tree;
static char **standard_tree;
static char **save_tree;
sint total_dists, overspill = 0, total_overspill = 0;
sint nfails = 0;
if(empty) {
error("You must load an alignment first");
return;
}
if(nseqs<4) {
error("Alignment has only %d sequences",nseqs);
return;
}
if(!output_tree_clustal && !output_tree_phylip && !output_tree_nexus) {
error("You must select either clustal or phylip or nexus tree output format");
return;
}
get_path(seqname, path);
if (output_tree_clustal) {
if (clustal_name[0]!=EOS) {
if((clustal_phy_tree_file = open_explicit_file(
clustal_name))==NULL) return;
}
else {
if((clustal_phy_tree_file = open_output_file(
"\nEnter name for bootstrap output file ",path,
clustal_name,"njb")) == NULL) return;
}
}
first_seq=1;
last_seq=nseqs;
if (output_tree_phylip) {
if (phylip_name[0]!=EOS) {
if((phylip_phy_tree_file = open_explicit_file(
phylip_name))==NULL) return;
}
else {
if((phylip_phy_tree_file = open_output_file(
"\nEnter name for bootstrap output file ",path,
phylip_name,"phb")) == NULL) return;
}
}
if (output_tree_nexus) {
if (nexus_name[0]!=EOS) {
if((nexus_phy_tree_file = open_explicit_file(
nexus_name))==NULL) return;
}
else {
if((nexus_phy_tree_file = open_output_file(
"\nEnter name for bootstrap output file ",path,
nexus_name,"treb")) == NULL) return;
}
}
boot_totals = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
for(i=0;i<nseqs+1;i++)
boot_totals[i]=0;
boot_positions = (sint *)ckalloc( (seqlen_array[first_seq]+2) * sizeof (sint) );
for(j=1; j<=seqlen_array[first_seq]; ++j) /* First select all positions for */
boot_positions[j] = j; /* the "standard" tree */
if(output_tree_clustal) {
verbose = TRUE; /* Turn on file output */
if(dnaflag)
overspill = dna_distance_matrix(clustal_phy_tree_file);
else
overspill = prot_distance_matrix(clustal_phy_tree_file);
}
if(output_tree_phylip) {
verbose = FALSE; /* Turn off file output */
if(dnaflag)
overspill = dna_distance_matrix(phylip_phy_tree_file);
else
overspill = prot_distance_matrix(phylip_phy_tree_file);
}
if(output_tree_nexus) {
verbose = FALSE; /* Turn off file output */
if(dnaflag)
overspill = dna_distance_matrix(nexus_phy_tree_file);
else
overspill = prot_distance_matrix(nexus_phy_tree_file);
}
/* check if any distances overflowed the distance corrections */
if ( overspill > 0 ) {
total_dists = (nseqs*(nseqs-1))/2;
overspill_message(overspill,total_dists);
}
tree_gaps=ckfree((void *)tree_gaps);
if (output_tree_clustal) verbose = TRUE; /* Turn on screen output */
standard_tree = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
for(i=0; i<nseqs+1; i++)
standard_tree[i] = (char *) ckalloc( (nseqs+1) * sizeof(char) );
/* compute the standard tree */
if(output_tree_clustal || output_tree_phylip || output_tree_nexus)
nj_tree(standard_tree,clustal_phy_tree_file);
if (output_tree_clustal)
fprintf(clustal_phy_tree_file,"\n\n\t\t\tBootstrap Confidence Limits\n\n");
/* save the left_branch and right_branch for phylip output */
save_left_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
save_right_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
for (i=1;i<=nseqs;i++) {
save_left_branch[i] = left_branch[i];
save_right_branch[i] = right_branch[i];
}
/*
The next line is a fossil from the days of using the cc ran()
ran_factor = RAND_MAX / seqlen_array[first_seq];
*/
if(usemenu)
boot_ran_seed =
getint("\n\nEnter seed no. for random number generator ",1,1000,boot_ran_seed);
/* do not use the native cc ran()
srand(boot_ran_seed);
*/
addrandinit((unsigned long) boot_ran_seed);
if (output_tree_clustal)
fprintf(clustal_phy_tree_file,"\n Random number generator seed = %7u\n",
boot_ran_seed);
if(usemenu)
boot_ntrials =
getint("\n\nEnter number of bootstrap trials ",1,10000,boot_ntrials);
if (output_tree_clustal) {
fprintf(clustal_phy_tree_file,"\n Number of bootstrap trials = %7d\n",
(pint)boot_ntrials);
fprintf(clustal_phy_tree_file,
"\n\n Diagrammatic representation of the above tree: \n");
fprintf(clustal_phy_tree_file,"\n Each row represents 1 tree cycle;");
fprintf(clustal_phy_tree_file," defining 2 groups.\n");
fprintf(clustal_phy_tree_file,"\n Each column is 1 sequence; ");
fprintf(clustal_phy_tree_file,"the stars in each line show 1 group; ");
fprintf(clustal_phy_tree_file,"\n the dots show the other\n");
fprintf(clustal_phy_tree_file,"\n Numbers show occurences in bootstrap samples.");
}
/*
print_tree(standard_tree, clustal_phy_tree_file, boot_totals);
*/
verbose = FALSE; /* Turn OFF screen output */
left_branch=ckfree((void *)left_branch);
right_branch=ckfree((void *)right_branch);
tkill=ckfree((void *)tkill);
av=ckfree((void *)av);
sample_tree = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
for(i=0; i<nseqs+1; i++)
sample_tree[i] = (char *) ckalloc( (nseqs+1) * sizeof(char) );
if (usemenu)
fprintf(stdout,"\n\nEach dot represents 10 trials\n\n");
total_overspill = 0;
nfails = 0;
for(i=1; i<=boot_ntrials; ++i) {
for(j=1; j<=seqlen_array[first_seq]; ++j) { /* select alignment */
/* positions for */
ranno = addrand( (unsigned long) seqlen_array[1]) + 1;
boot_positions[j] = ranno; /* bootstrap sample */
}
if(output_tree_clustal) {
if(dnaflag)
overspill = dna_distance_matrix(clustal_phy_tree_file);
else
overspill = prot_distance_matrix(clustal_phy_tree_file);
}
if(output_tree_phylip) {
if(dnaflag)
overspill = dna_distance_matrix(phylip_phy_tree_file);
else
overspill = prot_distance_matrix(phylip_phy_tree_file);
}
if(output_tree_nexus) {
if(dnaflag)
overspill = dna_distance_matrix(nexus_phy_tree_file);
else
overspill = prot_distance_matrix(nexus_phy_tree_file);
}
if( overspill > 0) {
total_overspill = total_overspill + overspill;
nfails++;
}
tree_gaps=ckfree((void *)tree_gaps);
if(output_tree_clustal || output_tree_phylip || output_tree_nexus)
nj_tree(sample_tree,clustal_phy_tree_file);
left_branch=ckfree((void *)left_branch);
right_branch=ckfree((void *)right_branch);
tkill=ckfree((void *)tkill);
av=ckfree((void *)av);
compare_tree(standard_tree, sample_tree, boot_totals, last_seq-first_seq+1);
if (usemenu) {
if(i % 10 == 0) fprintf(stdout,".");
if(i % 100 == 0) fprintf(stdout,"\n");
}
}
/* check if any distances overflowed the distance corrections */
if ( nfails > 0 ) {
total_dists = (nseqs*(nseqs-1))/2;
fprintf(stdout,"\n");
fprintf(stdout,"\n WARNING: %ld of the distances out of a total of %ld times %ld",
(long)total_overspill,(long)total_dists,(long)boot_ntrials);
fprintf(stdout,"\n were out of range for the distance correction.");
fprintf(stdout,"\n This affected %d out of %d bootstrap trials.",
(pint)nfails,(pint)boot_ntrials);
fprintf(stdout,"\n This may not be fatal but you have been warned!");
fprintf(stdout,"\n");
fprintf(stdout,"\n SUGGESTIONS: 1) turn off the correction");
fprintf(stdout,"\n or 2) remove the most distant sequences");
fprintf(stdout,"\n or 3) use the PHYLIP package.");
fprintf(stdout,"\n\n");
if (usemenu)
getstr("Press [RETURN] to continue",dummy);
}
boot_positions=ckfree((void *)boot_positions);
for (i=1;i<nseqs+1;i++)
sample_tree[i]=ckfree((void *)sample_tree[i]);
sample_tree=ckfree((void *)sample_tree);
/*
fprintf(clustal_phy_tree_file,"\n\n Bootstrap totals for each group\n");
*/
if (output_tree_clustal)
print_tree(standard_tree, clustal_phy_tree_file, boot_totals);
save_tree = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
for(i=0; i<nseqs+1; i++)
save_tree[i] = (char *) ckalloc( (nseqs+1) * sizeof(char) );
for(i=1; i<nseqs+1; i++)
for(j=1; j<nseqs+1; j++)
save_tree[i][j] = standard_tree[i][j];
if(output_tree_phylip) {
left_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
right_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
for (i=1;i<=nseqs;i++) {
left_branch[i] = save_left_branch[i];
right_branch[i] = save_right_branch[i];
}
print_phylip_tree(standard_tree,phylip_phy_tree_file,
bootstrap_format);
left_branch=ckfree((void *)left_branch);
right_branch=ckfree((void *)right_branch);
}
for(i=1; i<nseqs+1; i++)
for(j=1; j<nseqs+1; j++)
standard_tree[i][j] = save_tree[i][j];
if(output_tree_nexus) {
left_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
right_branch = (double *) ckalloc( (nseqs+2) * sizeof (double) );
for (i=1;i<=nseqs;i++) {
left_branch[i] = save_left_branch[i];
right_branch[i] = save_right_branch[i];
}
print_nexus_tree(standard_tree,nexus_phy_tree_file,
bootstrap_format);
left_branch=ckfree((void *)left_branch);
right_branch=ckfree((void *)right_branch);
}
boot_totals=ckfree((void *)boot_totals);
save_left_branch=ckfree((void *)save_left_branch);
save_right_branch=ckfree((void *)save_right_branch);
for (i=1;i<nseqs+1;i++)
standard_tree[i]=ckfree((void *)standard_tree[i]);
standard_tree=ckfree((void *)standard_tree);
for (i=0;i<nseqs+1;i++)
save_tree[i]=ckfree((void *)save_tree[i]);
save_tree=ckfree((void *)save_tree);
if (output_tree_clustal)
fclose(clustal_phy_tree_file);
if (output_tree_phylip)
fclose(phylip_phy_tree_file);
if (output_tree_nexus)
fclose(nexus_phy_tree_file);
if (output_tree_clustal)
info("Bootstrap output file completed [%s]"
,clustal_name);
if (output_tree_phylip)
info("Bootstrap output file completed [%s]"
,phylip_name);
if (output_tree_nexus)
info("Bootstrap output file completed [%s]"
,nexus_name);
}
void compare_tree(char **tree1, char **tree2, sint *hits, sint n)
{
sint i,j,k;
sint nhits1, nhits2;
for(i=1; i<=n-3; i++) {
for(j=1; j<=n-3; j++) {
nhits1 = 0;
nhits2 = 0;
for(k=1; k<=n; k++) {
if(tree1[i][k] == tree2[j][k]) nhits1++;
if(tree1[i][k] != tree2[j][k]) nhits2++;
}
if((nhits1 == last_seq-first_seq+1) || (nhits2 == last_seq-first_seq+1)) hits[i]++;
}
}
}
void print_nexus_tree(char **tree_description, FILE *tree, sint bootstrap)
{
sint i;
sint old_row;
fprintf(tree,"#NEXUS\n\n");
fprintf(tree,"BEGIN TREES;\n\n");
fprintf(tree,"\tTRANSLATE\n");
for(i=1;i<nseqs;i++) {
fprintf(tree,"\t\t%d %s,\n",(pint)i,names[i]);
}
fprintf(tree,"\t\t%d %s\n",(pint)nseqs,names[nseqs]);
fprintf(tree,"\t\t;\n");
fprintf(tree,"\tUTREE PAUP_1= ");
if(last_seq-first_seq+1==2) {
fprintf(tree,"(%d:%7.5f,%d:%7.5f);",first_seq,tmat[first_seq][first_seq+1],first_seq+1,tmat[first_seq][first_seq+1]);
}
else {
fprintf(tree,"(");
old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,1,bootstrap);
fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-2]);
if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
fprintf(tree,",");
old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,2,bootstrap);
fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-1]);
if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
fprintf(tree,",");
old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,3,bootstrap);
fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1]);
if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
fprintf(tree,")");
if (bootstrap==BS_NODE_LABELS) fprintf(tree,"TRICHOTOMY");
fprintf(tree,";");
}
fprintf(tree,"\nENDBLOCK;\n");
}
sint two_way_split_nexus
(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap)
{
sint row, new_row = 0, old_row, col, test_col = 0;
Boolean single_seq;
if(start_row != last_seq-first_seq+1-2) fprintf(tree,"(");
for(col=1; col<=last_seq-first_seq+1; col++) {
if(tree_description[start_row][col] == flag) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -