⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trees.c

📁 clustalw1.83.DOS.ZIP,用于多序列比对的软件
💻 C
📖 第 1 页 / 共 5 页
字号:
/* Phyle of filogenetic tree calculating functions for CLUSTAL W */
/* DES was here  FEB. 1994 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include "clustalw.h"
#include "dayhoff.h"    /* set correction for amino acid distances >= 75% */


/*
 *   Prototypes
 */
Boolean transition(sint base1, sint base2);
void tree_gap_delete(void);
void distance_matrix_output(FILE *ofile);
void nj_tree(char **tree_description, FILE *tree);
void compare_tree(char **tree1, char **tree2, sint *hits, sint n);
void print_phylip_tree(char **tree_description, FILE *tree, sint bootstrap);
void print_nexus_tree(char **tree_description, FILE *tree, sint bootstrap);
sint two_way_split(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap);
sint two_way_split_nexus(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap);
void print_tree(char **tree_description, FILE *tree, sint *totals);
static Boolean is_ambiguity(char c);
static void overspill_message(sint overspill,sint total_dists);


/*
 *   Global variables
 */

extern sint max_names;

extern double **tmat;     /* general nxn array of reals; allocated from main */
                          /* this is used as a distance matrix */
extern Boolean dnaflag;   /* TRUE for DNA seqs; FALSE for proteins */
extern Boolean tossgaps;  /* Ignore places in align. where ANY seq. has a gap*/
extern Boolean kimura;    /* Use correction for multiple substitutions */
extern Boolean output_tree_clustal;   /* clustal text output for trees */
extern Boolean output_tree_phylip;    /* phylip nested parentheses format */
extern Boolean output_tree_distances; /* phylip distance matrix */
extern Boolean output_tree_nexus;     /* nexus format tree */
extern Boolean output_pim;     /* perc identity matrix output Ramu */

extern sint    bootstrap_format;      /* bootstrap file format */
extern Boolean empty;                 /* any sequences in memory? */
extern Boolean usemenu;   /* interactive (TRUE) or command line (FALSE) */
extern sint nseqs;
extern sint max_aln_length;
extern sint *seqlen_array; /* the lengths of the sequences */
extern char **seq_array;   /* the sequences */
extern char **names;       /* the seq. names */
extern char seqname[];		/* name of input file */
extern sint gap_pos1,gap_pos2;
extern Boolean use_ambiguities;
extern char *amino_acid_codes;

static double 	*av;
static double 	*left_branch, *right_branch;
static double 	*save_left_branch, *save_right_branch;
static sint	*boot_totals;
static sint 	*tkill;
/*  
  The next line is a fossil from the days of using the cc ran()
static int 	ran_factor;
*/
static sint 	*boot_positions;
static FILE 	*phylip_phy_tree_file;
static FILE 	*clustal_phy_tree_file;
static FILE 	*distances_phy_tree_file;
static FILE 	*nexus_phy_tree_file;
static FILE     *pim_file; /* Ramu */
static Boolean 	verbose;
static char 	*tree_gaps;
static sint first_seq, last_seq;
                     /* array of weights; 1 for use this posn.; 0 don't */

extern sint boot_ntrials;		/* number of bootstrap trials */
extern unsigned sint boot_ran_seed;	/* random number generator seed */

void phylogenetic_tree(char *phylip_name,char *clustal_name,char *dist_name, char *nexus_name, char *pim_name)
/* 
   Calculate a tree using the distances in the nseqs*nseqs array tmat.
   This is the routine for getting the REAL trees after alignment.
*/
{	char path[FILENAMELEN+1];
	sint i, j;
	sint overspill = 0;
	sint total_dists;
	static char **standard_tree;
	static char **save_tree;
	char lin2[10];

	if(empty) {
		error("You must load an alignment first");
		return;
	}

	if(nseqs<2) {
		error("Alignment has only %d sequences",nseqs);
		return;
	}
	first_seq=1;
	last_seq=nseqs;

	get_path(seqname,path);
	
if(output_tree_clustal) {
        if (clustal_name[0]!=EOS) {
                if((clustal_phy_tree_file = open_explicit_file(
                clustal_name))==NULL) return;
        }
        else {
		if((clustal_phy_tree_file = open_output_file(
		"\nEnter name for CLUSTAL    tree output file  ",path,
		clustal_name,"nj")) == NULL) return;
        }
}

if(output_tree_phylip) {
        if (phylip_name[0]!=EOS) {
                if((phylip_phy_tree_file = open_explicit_file(
                phylip_name))==NULL) return;
        }
        else {
                 if((phylip_phy_tree_file = open_output_file(
		"\nEnter name for PHYLIP     tree output file  ",path,
                phylip_name,"ph")) == NULL) return;
        }
}

if(output_tree_distances)
{
        if (dist_name[0]!=EOS) {
                if((distances_phy_tree_file = open_explicit_file(
                dist_name))==NULL) return;
        }
        else {
		if((distances_phy_tree_file = open_output_file(
		"\nEnter name for distance matrix output file  ",path,
		dist_name,"dst")) == NULL) return;
        }
}

if(output_tree_nexus)
{
        if (nexus_name[0]!=EOS) {
                if((nexus_phy_tree_file = open_explicit_file(
                nexus_name))==NULL) return;
        }
        else {
		if((nexus_phy_tree_file = open_output_file(
		"\nEnter name for NEXUS tree output file  ",path,
		nexus_name,"tre")) == NULL) return;
        }
}

if(output_pim)
{
        if (pim_name[0]!=EOS) {
        	if((pim_file = open_explicit_file(
		pim_name))==NULL) return;
      }
      else {
        	if((pim_file = open_output_file(
		"\nEnter name for % Identity matrix output file  ",path,
                pim_name,"pim")) == NULL) return;
      }
}

	boot_positions = (sint *)ckalloc( (seqlen_array[first_seq]+2) * sizeof (sint) );

	for(j=1; j<=seqlen_array[first_seq]; ++j) 
		boot_positions[j] = j;		

	if(output_tree_clustal) {
		verbose = TRUE;     /* Turn on file output */
		if(dnaflag)
			overspill = dna_distance_matrix(clustal_phy_tree_file);
		else 
			overspill = prot_distance_matrix(clustal_phy_tree_file);
	}

	if(output_tree_phylip) {
		verbose = FALSE;     /* Turn off file output */
		if(dnaflag)
			overspill = dna_distance_matrix(phylip_phy_tree_file);
		else 
			overspill = prot_distance_matrix(phylip_phy_tree_file);
	}

	if(output_tree_nexus) {
		verbose = FALSE;     /* Turn off file output */
		if(dnaflag)
			overspill = dna_distance_matrix(nexus_phy_tree_file);
		else 
			overspill = prot_distance_matrix(nexus_phy_tree_file);
	}

        if(output_pim) { /* Ramu  */
          	verbose = FALSE;     /* Turn off file output */
          	if(dnaflag)
           		calc_percidentity(pim_file);
          	else
            		calc_percidentity(pim_file);
        }


	if(output_tree_distances) {
		verbose = FALSE;     /* Turn off file output */
		if(dnaflag)
			overspill = dna_distance_matrix(distances_phy_tree_file);
		else 
			overspill = prot_distance_matrix(distances_phy_tree_file);
      		distance_matrix_output(distances_phy_tree_file);
	}

/* check if any distances overflowed the distance corrections */
	if ( overspill > 0 ) {
		total_dists = (nseqs*(nseqs-1))/2;
		overspill_message(overspill,total_dists);
	}

	if(output_tree_clustal) verbose = TRUE;     /* Turn on file output */

	standard_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
	for(i=0; i<nseqs+1; i++) 
		standard_tree[i]  = (char *) ckalloc( (nseqs+1) * sizeof(char) );
	save_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
	for(i=0; i<nseqs+1; i++) 
		save_tree[i]  = (char *) ckalloc( (nseqs+1) * sizeof(char) );

	if(output_tree_clustal || output_tree_phylip || output_tree_nexus) 
		nj_tree(standard_tree,clustal_phy_tree_file);

	for(i=1; i<nseqs+1; i++) 
		for(j=1; j<nseqs+1; j++) 
			save_tree[i][j]  = standard_tree[i][j];

	if(output_tree_phylip) 
		print_phylip_tree(standard_tree,phylip_phy_tree_file,0);

	for(i=1; i<nseqs+1; i++) 
		for(j=1; j<nseqs+1; j++) 
			standard_tree[i][j]  = save_tree[i][j];

	if(output_tree_nexus) 
		print_nexus_tree(standard_tree,nexus_phy_tree_file,0);

/*
	print_tree(standard_tree,phy_tree_file);
*/
	tree_gaps=ckfree((void *)tree_gaps);
	boot_positions=ckfree((void *)boot_positions);
	if (left_branch != NULL) left_branch=ckfree((void *)left_branch);
	if (right_branch != NULL) right_branch=ckfree((void *)right_branch);
	if (tkill != NULL) tkill=ckfree((void *)tkill);
	if (av != NULL) av=ckfree((void *)av);
	for (i=0;i<nseqs+1;i++)
		standard_tree[i]=ckfree((void *)standard_tree[i]);
	standard_tree=ckfree((void *)standard_tree);

	for (i=0;i<nseqs+1;i++)
		save_tree[i]=ckfree((void *)save_tree[i]);
	save_tree=ckfree((void *)save_tree);

if(output_tree_clustal) {
	fclose(clustal_phy_tree_file);	
	info("Phylogenetic tree file created:   [%s]",clustal_name);
}

if(output_tree_phylip) {
	fclose(phylip_phy_tree_file);	
	info("Phylogenetic tree file created:   [%s]",phylip_name);
}

if(output_tree_distances) {
	fclose(distances_phy_tree_file);	
	info("Distance matrix  file  created:   [%s]",dist_name);
}

if(output_tree_nexus) {
	fclose(nexus_phy_tree_file);	
	info("Nexus tree file  created:   [%s]",nexus_name);
}

if(output_pim) {
	fclose(pim_file);
	info(" perc identity matrix file  created:   [%s]",pim_name);
}

}

static void overspill_message(sint overspill,sint total_dists)
{
	char err_mess[1024]="";

	sprintf(err_mess,"%d of the distances out of a total of %d",
	(pint)overspill,(pint)total_dists);
	strcat(err_mess,"\n were out of range for the distance correction.");
	strcat(err_mess,"\n");
	strcat(err_mess,"\n SUGGESTIONS: 1) remove the most distant sequences");
	strcat(err_mess,"\n           or 2) use the PHYLIP package");
	strcat(err_mess,"\n           or 3) turn off the correction.");
	strcat(err_mess,"\n Note: Use option 3 with caution! With this degree");
	strcat(err_mess,"\n of divergence you will have great difficulty");
	strcat(err_mess,"\n getting robust and reliable trees.");
	strcat(err_mess,"\n\n");
	warning(err_mess);
}



Boolean transition(sint base1, sint base2) /* TRUE if transition; else FALSE */
/* 

   assumes that the bases of DNA sequences have been translated as
   a,A = 0;   c,C = 1;   g,G = 2;   t,T,u,U = 3;  N = 4;  
   a,A = 0;   c,C = 2;   g,G = 6;   t,T,u,U =17;  

   A <--> G  and  T <--> C  are transitions;  all others are transversions.

*/
{
	if( ((base1 == 0) && (base2 == 6)) || ((base1 == 6) && (base2 == 0)) )
		return TRUE;                                     /* A <--> G */
	if( ((base1 ==17) && (base2 == 2)) || ((base1 == 2) && (base2 ==17)) )
		return TRUE;                                     /* T <--> C */
    return FALSE;
}


void tree_gap_delete(void)   /* flag all positions in alignment that have a gap */
{			  /* in ANY sequence */
	sint seqn;
	sint posn;

	tree_gaps = (char *)ckalloc( (max_aln_length+1) * sizeof (char) );
        
	for(posn=1; posn<=seqlen_array[first_seq]; ++posn) {
		tree_gaps[posn] = 0;
     	for(seqn=1; seqn<=last_seq-first_seq+1; ++seqn)  {
			if((seq_array[seqn+first_seq-1][posn] == gap_pos1) ||
			   (seq_array[seqn+first_seq-1][posn] == gap_pos2)) {
			   tree_gaps[posn] = 1;
				break;
			}
		}
	}

}

void distance_matrix_output(FILE *ofile)
{
	sint i,j;
	
	fprintf(ofile,"%6d",(pint)last_seq-first_seq+1);
	for(i=1;i<=last_seq-first_seq+1;i++) {
		fprintf(ofile,"\n%-*s ",max_names,names[i]);
		for(j=1;j<=last_seq-first_seq+1;j++) {
			fprintf(ofile,"%6.3f ",tmat[i][j]);
			if(j % 8 == 0) {
				if(j!=last_seq-first_seq+1) fprintf(ofile,"\n"); 
				if(j != last_seq-first_seq+1 ) fprintf(ofile,"          ");
			}
		}
	}
}



#ifdef ORIGINAL_NJ_TREE
void nj_tree(char **tree_description, FILE *tree)
{
	register int i;
	sint l[4],nude,k;
	sint nc,mini,minj,j,ii,jj;
	double fnseqs,fnseqs2=0,sumd;
	double diq,djq,dij,d2r,dr,dio,djo,da;
	double tmin,total,dmin;
	double bi,bj,b1,b2,b3,branch[4];
	sint typei,typej;             /* 0 = node; 1 = OTU */
	
	fnseqs = (double)last_seq-first_seq+1;

/*********************** First initialisation ***************************/
	
	if(verbose)  {
		fprintf(tree,"\n\n\t\t\tNeighbor-joining Method\n");
		fprintf(tree,"\n Saitou, N. and Nei, M. (1987)");
		fprintf(tree," The Neighbor-joining Method:");
		fprintf(tree,"\n A New Method for Reconstructing Phylogenetic Trees.");
		fprintf(tree,"\n Mol. Biol. Evol., 4(4), 406-425\n");
		fprintf(tree,"\n\n This is an UNROOTED tree\n");
		fprintf(tree,"\n Numbers in parentheses are branch lengths\n\n");
	}	

	if (fnseqs == 2) {
		if (verbose) fprintf(tree,"Cycle   1     =  SEQ:   1 (%9.5f) joins  SEQ:   2 (%9.5f)",tmat[first_seq][first_seq+1],tmat[first_seq][first_seq+1]);
		return;
	}

	mini = minj = 0;

	left_branch 	= (double *) ckalloc( (nseqs+2) * sizeof (double)   );
	right_branch    = (double *) ckalloc( (nseqs+2) * sizeof (double)   );
	tkill 		= (sint *) ckalloc( (nseqs+1) * sizeof (sint) );
	av   		= (double *) ckalloc( (nseqs+1) * sizeof (double)   );

	for(i=1;i<=last_seq-first_seq+1;++i) 
		{
		tmat[i][i] = av[i] = 0.0;
		tkill[i] = 0;
		}

/*********************** Enter The Main Cycle ***************************/

 /*	for(nc=1; nc<=(last_seq-first_seq+1-3); ++nc) {  */            	/**start main cycle**/
	for(nc=1; nc<=(last_seq-first_seq+1-3); ++nc) {
		sumd = 0.0;
		for(j=2; j<=last_seq-first_seq+1; ++j)
			for(i=1; i<j; ++i) {
				tmat[j][i] = tmat[i][j];
				sumd = sumd + tmat[i][j];
			}

		tmin = 99999.0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -