⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trees.c

📁 经典生物信息学多序列比对工具clustalw
💻 C
📖 第 1 页 / 共 5 页
字号:
/* Phyle of filogenetic tree calculating functions for CLUSTAL W *//* DES was here  FEB. 1994 */#include <stdio.h>#include <string.h>#include <stdlib.h>#include <math.h>#include "clustalw.h"#include "dayhoff.h"    /* set correction for amino acid distances >= 75% *//* *   Prototypes */Boolean transition(sint base1, sint base2);void tree_gap_delete(void);void distance_matrix_output(FILE *ofile);void nj_tree(char **tree_description, FILE *tree);void compare_tree(char **tree1, char **tree2, sint *hits, sint n);void print_phylip_tree(char **tree_description, FILE *tree, sint bootstrap);void print_nexus_tree(char **tree_description, FILE *tree, sint bootstrap);sint two_way_split(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap);sint two_way_split_nexus(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap);void print_tree(char **tree_description, FILE *tree, sint *totals);static Boolean is_ambiguity(char c);static void overspill_message(sint overspill,sint total_dists);/* *   Global variables */extern sint max_names;extern double **tmat;     /* general nxn array of reals; allocated from main */                          /* this is used as a distance matrix */extern Boolean dnaflag;   /* TRUE for DNA seqs; FALSE for proteins */extern Boolean tossgaps;  /* Ignore places in align. where ANY seq. has a gap*/extern Boolean kimura;    /* Use correction for multiple substitutions */extern Boolean output_tree_clustal;   /* clustal text output for trees */extern Boolean output_tree_phylip;    /* phylip nested parentheses format */extern Boolean output_tree_distances; /* phylip distance matrix */extern Boolean output_tree_nexus;     /* nexus format tree */extern Boolean output_pim;     /* perc identity matrix output Ramu */extern sint    bootstrap_format;      /* bootstrap file format */extern Boolean empty;                 /* any sequences in memory? */extern Boolean usemenu;   /* interactive (TRUE) or command line (FALSE) */extern sint nseqs;extern sint max_aln_length;extern sint *seqlen_array; /* the lengths of the sequences */extern char **seq_array;   /* the sequences */extern char **names;       /* the seq. names */extern char seqname[];		/* name of input file */extern sint gap_pos1,gap_pos2;extern Boolean use_ambiguities;extern char *amino_acid_codes;static double 	*av;static double 	*left_branch, *right_branch;static double 	*save_left_branch, *save_right_branch;static sint	*boot_totals;static sint 	*tkill;/*    The next line is a fossil from the days of using the cc ran()static int 	ran_factor;*/static sint 	*boot_positions;static FILE 	*phylip_phy_tree_file;static FILE 	*clustal_phy_tree_file;static FILE 	*distances_phy_tree_file;static FILE 	*nexus_phy_tree_file;static FILE     *pim_file; /* Ramu */static Boolean 	verbose;static char 	*tree_gaps;static sint first_seq, last_seq;                     /* array of weights; 1 for use this posn.; 0 don't */extern sint boot_ntrials;		/* number of bootstrap trials */extern unsigned sint boot_ran_seed;	/* random number generator seed */void phylogenetic_tree(char *phylip_name,char *clustal_name,char *dist_name, char *nexus_name, char *pim_name)/*    Calculate a tree using the distances in the nseqs*nseqs array tmat.   This is the routine for getting the REAL trees after alignment.*/{	char path[FILENAMELEN+1];	sint i, j;	sint overspill = 0;	sint total_dists;	static char **standard_tree;	static char **save_tree;	char lin2[10];	if(empty) {		error("You must load an alignment first");		return;	}	if(nseqs<2) {		error("Alignment has only %d sequences",nseqs);		return;	}	first_seq=1;	last_seq=nseqs;	get_path(seqname,path);	if(output_tree_clustal) {        if (clustal_name[0]!=EOS) {                if((clustal_phy_tree_file = open_explicit_file(                clustal_name))==NULL) return;        }        else {		if((clustal_phy_tree_file = open_output_file(		"\nEnter name for CLUSTAL    tree output file  ",path,		clustal_name,"nj")) == NULL) return;        }}if(output_tree_phylip) {        if (phylip_name[0]!=EOS) {                if((phylip_phy_tree_file = open_explicit_file(                phylip_name))==NULL) return;        }        else {                 if((phylip_phy_tree_file = open_output_file(		"\nEnter name for PHYLIP     tree output file  ",path,                phylip_name,"ph")) == NULL) return;        }}if(output_tree_distances){        if (dist_name[0]!=EOS) {                if((distances_phy_tree_file = open_explicit_file(                dist_name))==NULL) return;        }        else {		if((distances_phy_tree_file = open_output_file(		"\nEnter name for distance matrix output file  ",path,		dist_name,"dst")) == NULL) return;        }}if(output_tree_nexus){        if (nexus_name[0]!=EOS) {                if((nexus_phy_tree_file = open_explicit_file(                nexus_name))==NULL) return;        }        else {		if((nexus_phy_tree_file = open_output_file(		"\nEnter name for NEXUS tree output file  ",path,		nexus_name,"tre")) == NULL) return;        }}if(output_pim){        if (pim_name[0]!=EOS) {        	if((pim_file = open_explicit_file(		pim_name))==NULL) return;      }      else {        	if((pim_file = open_output_file(		"\nEnter name for % Identity matrix output file  ",path,                pim_name,"pim")) == NULL) return;      }}	boot_positions = (sint *)ckalloc( (seqlen_array[first_seq]+2) * sizeof (sint) );	for(j=1; j<=seqlen_array[first_seq]; ++j) 		boot_positions[j] = j;			if(output_tree_clustal) {		verbose = TRUE;     /* Turn on file output */		if(dnaflag)			overspill = dna_distance_matrix(clustal_phy_tree_file);		else 			overspill = prot_distance_matrix(clustal_phy_tree_file);	}	if(output_tree_phylip) {		verbose = FALSE;     /* Turn off file output */		if(dnaflag)			overspill = dna_distance_matrix(phylip_phy_tree_file);		else 			overspill = prot_distance_matrix(phylip_phy_tree_file);	}	if(output_tree_nexus) {		verbose = FALSE;     /* Turn off file output */		if(dnaflag)			overspill = dna_distance_matrix(nexus_phy_tree_file);		else 			overspill = prot_distance_matrix(nexus_phy_tree_file);	}        if(output_pim) { /* Ramu  */          	verbose = FALSE;     /* Turn off file output */          	if(dnaflag)           		calc_percidentity(pim_file);          	else            		calc_percidentity(pim_file);        }	if(output_tree_distances) {		verbose = FALSE;     /* Turn off file output */		if(dnaflag)			overspill = dna_distance_matrix(distances_phy_tree_file);		else 			overspill = prot_distance_matrix(distances_phy_tree_file);      		distance_matrix_output(distances_phy_tree_file);	}/* check if any distances overflowed the distance corrections */	if ( overspill > 0 ) {		total_dists = (nseqs*(nseqs-1))/2;		overspill_message(overspill,total_dists);	}	if(output_tree_clustal) verbose = TRUE;     /* Turn on file output */	standard_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );	for(i=0; i<nseqs+1; i++) 		standard_tree[i]  = (char *) ckalloc( (nseqs+1) * sizeof(char) );	save_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );	for(i=0; i<nseqs+1; i++) 		save_tree[i]  = (char *) ckalloc( (nseqs+1) * sizeof(char) );	if(output_tree_clustal || output_tree_phylip || output_tree_nexus) 		nj_tree(standard_tree,clustal_phy_tree_file);	for(i=1; i<nseqs+1; i++) 		for(j=1; j<nseqs+1; j++) 			save_tree[i][j]  = standard_tree[i][j];	if(output_tree_phylip) 		print_phylip_tree(standard_tree,phylip_phy_tree_file,0);	for(i=1; i<nseqs+1; i++) 		for(j=1; j<nseqs+1; j++) 			standard_tree[i][j]  = save_tree[i][j];	if(output_tree_nexus) 		print_nexus_tree(standard_tree,nexus_phy_tree_file,0);/*	print_tree(standard_tree,phy_tree_file);*/	tree_gaps=ckfree((void *)tree_gaps);	boot_positions=ckfree((void *)boot_positions);	if (left_branch != NULL) left_branch=ckfree((void *)left_branch);	if (right_branch != NULL) right_branch=ckfree((void *)right_branch);	if (tkill != NULL) tkill=ckfree((void *)tkill);	if (av != NULL) av=ckfree((void *)av);	for (i=0;i<nseqs+1;i++)		standard_tree[i]=ckfree((void *)standard_tree[i]);	standard_tree=ckfree((void *)standard_tree);	for (i=0;i<nseqs+1;i++)		save_tree[i]=ckfree((void *)save_tree[i]);	save_tree=ckfree((void *)save_tree);if(output_tree_clustal) {	fclose(clustal_phy_tree_file);		info("Phylogenetic tree file created:   [%s]",clustal_name);}if(output_tree_phylip) {	fclose(phylip_phy_tree_file);		info("Phylogenetic tree file created:   [%s]",phylip_name);}if(output_tree_distances) {	fclose(distances_phy_tree_file);		info("Distance matrix  file  created:   [%s]",dist_name);}if(output_tree_nexus) {	fclose(nexus_phy_tree_file);		info("Nexus tree file  created:   [%s]",nexus_name);}if(output_pim) {	fclose(pim_file);	info(" perc identity matrix file  created:   [%s]",pim_name);}}static void overspill_message(sint overspill,sint total_dists){	char err_mess[1024]="";	sprintf(err_mess,"%d of the distances out of a total of %d",	(pint)overspill,(pint)total_dists);	strcat(err_mess,"\n were out of range for the distance correction.");	strcat(err_mess,"\n");	strcat(err_mess,"\n SUGGESTIONS: 1) remove the most distant sequences");	strcat(err_mess,"\n           or 2) use the PHYLIP package");	strcat(err_mess,"\n           or 3) turn off the correction.");	strcat(err_mess,"\n Note: Use option 3 with caution! With this degree");	strcat(err_mess,"\n of divergence you will have great difficulty");	strcat(err_mess,"\n getting robust and reliable trees.");	strcat(err_mess,"\n\n");	warning(err_mess);}Boolean transition(sint base1, sint base2) /* TRUE if transition; else FALSE *//*    assumes that the bases of DNA sequences have been translated as   a,A = 0;   c,C = 1;   g,G = 2;   t,T,u,U = 3;  N = 4;     a,A = 0;   c,C = 2;   g,G = 6;   t,T,u,U =17;     A <--> G  and  T <--> C  are transitions;  all others are transversions.*/{	if( ((base1 == 0) && (base2 == 6)) || ((base1 == 6) && (base2 == 0)) )		return TRUE;                                     /* A <--> G */	if( ((base1 ==17) && (base2 == 2)) || ((base1 == 2) && (base2 ==17)) )		return TRUE;                                     /* T <--> C */    return FALSE;}void tree_gap_delete(void)   /* flag all positions in alignment that have a gap */{			  /* in ANY sequence */	sint seqn;	sint posn;	tree_gaps = (char *)ckalloc( (max_aln_length+1) * sizeof (char) );        	for(posn=1; posn<=seqlen_array[first_seq]; ++posn) {		tree_gaps[posn] = 0;     	for(seqn=1; seqn<=last_seq-first_seq+1; ++seqn)  {			if((seq_array[seqn+first_seq-1][posn] == gap_pos1) ||			   (seq_array[seqn+first_seq-1][posn] == gap_pos2)) {			   tree_gaps[posn] = 1;				break;			}		}	}}void distance_matrix_output(FILE *ofile){	sint i,j;		fprintf(ofile,"%6d",(pint)last_seq-first_seq+1);	for(i=1;i<=last_seq-first_seq+1;i++) {		fprintf(ofile,"\n%-*s ",max_names,names[i]);		for(j=1;j<=last_seq-first_seq+1;j++) {			fprintf(ofile,"%6.3f ",tmat[i][j]);			if(j % 8 == 0) {				if(j!=last_seq-first_seq+1) fprintf(ofile,"\n"); 				if(j != last_seq-first_seq+1 ) fprintf(ofile,"          ");			}		}	}}#ifdef ORIGINAL_NJ_TREEvoid nj_tree(char **tree_description, FILE *tree){	register int i;	sint l[4],nude,k;	sint nc,mini,minj,j,ii,jj;	double fnseqs,fnseqs2=0,sumd;	double diq,djq,dij,d2r,dr,dio,djo,da;	double tmin,total,dmin;	double bi,bj,b1,b2,b3,branch[4];	sint typei,typej;             /* 0 = node; 1 = OTU */		fnseqs = (double)last_seq-first_seq+1;/*********************** First initialisation ***************************/		if(verbose)  {		fprintf(tree,"\n\n\t\t\tNeighbor-joining Method\n");		fprintf(tree,"\n Saitou, N. and Nei, M. (1987)");		fprintf(tree," The Neighbor-joining Method:");		fprintf(tree,"\n A New Method for Reconstructing Phylogenetic Trees.");		fprintf(tree,"\n Mol. Biol. Evol., 4(4), 406-425\n");		fprintf(tree,"\n\n This is an UNROOTED tree\n");		fprintf(tree,"\n Numbers in parentheses are branch lengths\n\n");	}		if (fnseqs == 2) {		if (verbose) fprintf(tree,"Cycle   1     =  SEQ:   1 (%9.5f) joins  SEQ:   2 (%9.5f)",tmat[first_seq][first_seq+1],tmat[first_seq][first_seq+1]);		return;	}	mini = minj = 0;	left_branch 	= (double *) ckalloc( (nseqs+2) * sizeof (double)   );	right_branch    = (double *) ckalloc( (nseqs+2) * sizeof (double)   );	tkill 		= (sint *) ckalloc( (nseqs+1) * sizeof (sint) );	av   		= (double *) ckalloc( (nseqs+1) * sizeof (double)   );	for(i=1;i<=last_seq-first_seq+1;++i) 		{		tmat[i][i] = av[i] = 0.0;		tkill[i] = 0;		}/*********************** Enter The Main Cycle ***************************/ /*	for(nc=1; nc<=(last_seq-first_seq+1-3); ++nc) {  */            	/**start main cycle**/	for(nc=1; nc<=(last_seq-first_seq+1-3); ++nc) {		sumd = 0.0;		for(j=2; j<=last_seq-first_seq+1; ++j)			for(i=1; i<j; ++i) {				tmat[j][i] = tmat[i][j];				sumd = sumd + tmat[i][j];			}		tmin = 99999.0;/*.................compute SMATij values and find the smallest one ........*/		for(jj=2; jj<=last_seq-first_seq+1; ++jj) 			if(tkill[jj] != 1) 				for(ii=1; ii<jj; ++ii)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -