📄 trees.c
字号:
if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0)) fprintf(tree,"%d",(pint)boot_totals[start_row]); return(start_row);}void print_phylip_tree(char **tree_description, FILE *tree, sint bootstrap){ sint old_row; if(last_seq-first_seq+1==2) { fprintf(tree,"(%s:%7.5f,%s:%7.5f);",names[first_seq],tmat[first_seq][first_seq+1],names[first_seq+1],tmat[first_seq][first_seq+1]); return; } fprintf(tree,"(\n"); old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,1,bootstrap); fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-2]); if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0)) fprintf(tree,"[%d]",(pint)boot_totals[old_row]); fprintf(tree,",\n"); old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,2,bootstrap); fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-1]); if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0)) fprintf(tree,"[%d]",(pint)boot_totals[old_row]); fprintf(tree,",\n"); old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,3,bootstrap); fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1]); if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0)) fprintf(tree,"[%d]",(pint)boot_totals[old_row]); fprintf(tree,")"); if (bootstrap==BS_NODE_LABELS) fprintf(tree,"TRICHOTOMY"); fprintf(tree,";\n");}sint two_way_split(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap){ sint row, new_row = 0, old_row, col, test_col = 0; Boolean single_seq; if(start_row != last_seq-first_seq+1-2) fprintf(tree,"(\n"); for(col=1; col<=last_seq-first_seq+1; col++) { if(tree_description[start_row][col] == flag) { test_col = col; break; } } single_seq = TRUE; for(row=start_row-1; row>=1; row--) if(tree_description[row][test_col] == 1) { single_seq = FALSE; new_row = row; break; } if(single_seq) { tree_description[start_row][test_col] = 0; fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]); if(start_row == last_seq-first_seq+1-2) { return(0); } fprintf(tree,":%7.5f,\n",left_branch[start_row]); } else { for(col=1; col<=last_seq-first_seq+1; col++) { if((tree_description[start_row][col]==1)&& (tree_description[new_row][col]==1)) tree_description[start_row][col] = 0; } old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap); if(start_row == last_seq-first_seq+1-2) { return(new_row); } fprintf(tree,":%7.5f",left_branch[start_row]); if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0)) fprintf(tree,"[%d]",(pint)boot_totals[old_row]); fprintf(tree,",\n"); } for(col=1; col<=last_seq-first_seq+1; col++) if(tree_description[start_row][col] == flag) { test_col = col; break; } single_seq = TRUE; new_row = 0; for(row=start_row-1; row>=1; row--) if(tree_description[row][test_col] == 1) { single_seq = FALSE; new_row = row; break; } if(single_seq) { tree_description[start_row][test_col] = 0; fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]); fprintf(tree,":%7.5f)\n",right_branch[start_row]); } else { for(col=1; col<=last_seq-first_seq+1; col++) { if((tree_description[start_row][col]==1)&& (tree_description[new_row][col]==1)) tree_description[start_row][col] = 0; } old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap); fprintf(tree,":%7.5f",right_branch[start_row]); if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0)) fprintf(tree,"[%d]",(pint)boot_totals[old_row]); fprintf(tree,")\n"); } if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0)) fprintf(tree,"%d",(pint)boot_totals[start_row]); return(start_row);}void print_tree(char **tree_description, FILE *tree, sint *totals){ sint row,col; fprintf(tree,"\n"); for(row=1; row<=last_seq-first_seq+1-3; row++) { fprintf(tree," \n"); for(col=1; col<=last_seq-first_seq+1; col++) { if(tree_description[row][col] == 0) fprintf(tree,"*"); else fprintf(tree,"."); } if(totals[row] > 0) fprintf(tree,"%7d",(pint)totals[row]); } fprintf(tree," \n"); for(col=1; col<=last_seq-first_seq+1; col++) fprintf(tree,"%1d",(pint)tree_description[last_seq-first_seq+1-2][col]); fprintf(tree,"\n");}sint dna_distance_matrix(FILE *tree){ sint m,n; sint j,i; sint res1, res2; sint overspill = 0; double p,q,e,a,b,k; tree_gap_delete(); /* flag positions with gaps (tree_gaps[i] = 1 ) */ if(verbose) { fprintf(tree,"\n"); fprintf(tree,"\n DIST = percentage divergence (/100)"); fprintf(tree,"\n p = rate of transition (A <-> G; C <-> T)"); fprintf(tree,"\n q = rate of transversion"); fprintf(tree,"\n Length = number of sites used in comparison"); fprintf(tree,"\n"); if(tossgaps) { fprintf(tree,"\n All sites with gaps (in any sequence) deleted!"); fprintf(tree,"\n"); } if(kimura) { fprintf(tree,"\n Distances corrected by Kimura's 2 parameter model:"); fprintf(tree,"\n\n Kimura, M. (1980)"); fprintf(tree," A simple method for estimating evolutionary "); fprintf(tree,"rates of base"); fprintf(tree,"\n substitutions through comparative studies of "); fprintf(tree,"nucleotide sequences."); fprintf(tree,"\n J. Mol. Evol., 16, 111-120."); fprintf(tree,"\n\n"); } } for(m=1; m<last_seq-first_seq+1; ++m) /* for every pair of sequence */ for(n=m+1; n<=last_seq-first_seq+1; ++n) { p = q = e = 0.0; tmat[m][n] = tmat[n][m] = 0.0; for(i=1; i<=seqlen_array[first_seq]; ++i) { j = boot_positions[i]; if(tossgaps && (tree_gaps[j] > 0) ) goto skip; /* gap position */ res1 = seq_array[m+first_seq-1][j]; res2 = seq_array[n+first_seq-1][j]; if( (res1 == gap_pos1) || (res1 == gap_pos2) || (res2 == gap_pos1) || (res2 == gap_pos2)) goto skip; /* gap in a seq*/ if(!use_ambiguities) if( is_ambiguity(res1) || is_ambiguity(res2)) goto skip; /* ambiguity code in a seq*/ e = e + 1.0; if(res1 != res2) { if(transition(res1,res2)) p = p + 1.0; else q = q + 1.0; } skip:; } /* Kimura's 2 parameter correction for multiple substitutions */ if(!kimura) { if (e == 0) { fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n); k = 0.0; p = 0.0; q = 0.0; } else { k = (p+q)/e; if(p > 0.0) p = p/e; else p = 0.0; if(q > 0.0) q = q/e; else q = 0.0; } tmat[m][n] = tmat[n][m] = k; if(verbose) /* if screen output */ fprintf(tree, "%4d vs.%4d: DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n" ,(pint)m,(pint)n,k,p,q,e); } else { if (e == 0) { fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n); p = 0.0; q = 0.0; } else { if(p > 0.0) p = p/e; else p = 0.0; if(q > 0.0) q = q/e; else q = 0.0; } if( ((2.0*p)+q) == 1.0 ) a = 0.0; else a = 1.0/(1.0-(2.0*p)-q); if( q == 0.5 ) b = 0.0; else b = 1.0/(1.0-(2.0*q));/* watch for values going off the scale for the correction. */ if( (a<=0.0) || (b<=0.0) ) { overspill++; k = 3.5; /* arbitrary high score */ } else k = 0.5*log(a) + 0.25*log(b); tmat[m][n] = tmat[n][m] = k; if(verbose) /* if screen output */ fprintf(tree, "%4d vs.%4d: DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n" ,(pint)m,(pint)n,k,p,q,e); } } return overspill; /* return the number of off-scale values */}sint prot_distance_matrix(FILE *tree){ sint m,n; sint j,i; sint res1, res2; sint overspill = 0; double p,e,k, table_entry; tree_gap_delete(); /* flag positions with gaps (tree_gaps[i] = 1 ) */ if(verbose) { fprintf(tree,"\n"); fprintf(tree,"\n DIST = percentage divergence (/100)"); fprintf(tree,"\n Length = number of sites used in comparison"); fprintf(tree,"\n\n"); if(tossgaps) { fprintf(tree,"\n All sites with gaps (in any sequence) deleted"); fprintf(tree,"\n"); } if(kimura) { fprintf(tree,"\n Distances up tp 0.75 corrected by Kimura's empirical method:"); fprintf(tree,"\n\n Kimura, M. (1983)"); fprintf(tree," The Neutral Theory of Molecular Evolution."); fprintf(tree,"\n Page 75. Cambridge University Press, Cambridge, England."); fprintf(tree,"\n\n"); } } for(m=1; m<nseqs; ++m) /* for every pair of sequence */ for(n=m+1; n<=nseqs; ++n) { p = e = 0.0; tmat[m][n] = tmat[n][m] = 0.0; for(i=1; i<=seqlen_array[1]; ++i) { j = boot_positions[i]; if(tossgaps && (tree_gaps[j] > 0) ) goto skip; /* gap position */ res1 = seq_array[m][j]; res2 = seq_array[n][j]; if( (res1 == gap_pos1) || (res1 == gap_pos2) || (res2 == gap_pos1) || (res2 == gap_pos2)) goto skip; /* gap in a seq*/ e = e + 1.0; if(res1 != res2) p = p + 1.0; skip:; } if(p <= 0.0) k = 0.0; else k = p/e;/* DES debug *//* fprintf(stdout,"Seq1=%4d Seq2=%4d k =%7.4f \n",(pint)m,(pint)n,k); *//* DES debug */ if(kimura) { if(k < 0.75) { /* use Kimura's formula */ if(k > 0.0) k = - log(1.0 - k - (k * k/5.0) ); } else { if(k > 0.930) { overspill++; k = 10.0; /* arbitrarily set to 1000% */ } else { table_entry = (k*1000.0) - 750.0; k = (double)dayhoff_pams[(int)table_entry]; k = k/100.0; } } } tmat[m][n] = tmat[n][m] = k; if(verbose) /* if screen output */ fprintf(tree, "%4d vs.%4d DIST = %6.4f; length = %6.0f\n", (pint)m,(pint)n,k,e); } return overspill;}void guide_tree(FILE *tree,sint firstseq,sint numseqs)/* Routine for producing unrooted NJ trees from seperately aligned pairwise distances. This produces the GUIDE DENDROGRAMS in PHYLIP format.*/{ static char **standard_tree; sint i; float dist; phylip_phy_tree_file=tree; verbose = FALSE; first_seq=firstseq; last_seq=first_seq+numseqs-1; if(numseqs==2) { dist=tmat[firstseq][firstseq+1]/2.0; fprintf(tree,"(%s:%0.5f,%s:%0.5f);\n", names[firstseq],dist,names[firstseq+1],dist); } else { standard_tree = (char **) ckalloc( (last_seq-first_seq+2) * sizeof (char *) ); for(i=0; i<last_seq-first_seq+2; i++) standard_tree[i] = (char *) ckalloc( (last_seq-first_seq+2) * sizeof(char)); nj_tree(standard_tree,clustal_phy_tree_file); print_phylip_tree(standard_tree,phylip_phy_tree_file,0); if(left_branch != NULL) left_branch=ckfree((void *)left_branch); if(right_branch != NULL) right_branch=ckfree((void *)right_branch); if(tkill != NULL) tkill=ckfree((void *)tkill); if(av != NULL) av=ckfree((void *)av); for (i=1;i<last_seq-first_seq+2;i++) standard_tree[i]=ckfree((void *)standard_tree[i]); standard_tree=ckfree((void *)standard_tree); } fclose(phylip_phy_tree_file);}static Boolean is_ambiguity(char c){ int i; char codes[]="ACGTU"; if(use_ambiguities==TRUE) { return FALSE; } for(i=0;i<5;i++) if(amino_acid_codes[c]==codes[i]) return FALSE; return TRUE;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -