⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trees.c

📁 是有关基因比对的经典算法的实现。这对于初学计算生物学的人是非常重要的算法。
💻 C
📖 第 1 页 / 共 3 页
字号:
			

/* 
   Here is where the -0.00005 branch lengths come from for 3 or more
   identical seqs.
*/
/*		if(dmin <= 0.0) dmin = 0.0001; */
                if(dmin <= 0.0) dmin = 0.000001;
		av[mini] = dmin * 0.5;

/*........................Re-initialisation................................*/

		fnseqs = fnseqs - 1.0;
		tkill[minj] = 1;

		for(j=1; j<=last_seq-first_seq+1; ++j) 
			if( tkill[j] != 1 ) {
				da = ( tmat[mini][j] + tmat[minj][j] ) * 0.5;
				if( (mini - j) < 0 ) 
					tmat[mini][j] = da;
				if( (mini - j) > 0)
					tmat[j][mini] = da;
			}

		for(j=1; j<=last_seq-first_seq+1; ++j)
			tmat[minj][j] = tmat[j][minj] = 0.0;


/****/	}						/**end main cycle**/

/******************************Last Cycle (3 Seqs. left)********************/

	nude = 1;

	for(i=1; i<=last_seq-first_seq+1; ++i)
		if( tkill[i] != 1 ) {
			l[nude] = i;
			nude = nude + 1;
		}

	b1 = (tmat[l[1]][l[2]] + tmat[l[1]][l[3]] - tmat[l[2]][l[3]]) * 0.5;
	b2 =  tmat[l[1]][l[2]] - b1;
	b3 =  tmat[l[1]][l[3]] - b1;
 
	branch[1] = b1 - av[l[1]];
	branch[2] = b2 - av[l[2]];
	branch[3] = b3 - av[l[3]];

/* Reset tiny negative and positive branch lengths to zero */
	if( fabs(branch[1]) < 0.0001) branch[1] = 0.0;
	if( fabs(branch[2]) < 0.0001) branch[2] = 0.0;
	if( fabs(branch[3]) < 0.0001) branch[3] = 0.0;

	left_branch[last_seq-first_seq+1-2] = branch[1];
	left_branch[last_seq-first_seq+1-1] = branch[2];
	left_branch[last_seq-first_seq+1]   = branch[3];

	for(i=1; i<=last_seq-first_seq+1; i++)
		tree_description[last_seq-first_seq+1-2][i] = 0;

	if(verbose)
		fprintf(tree,"\n Cycle%4d (Last cycle, trichotomy):\n",(pint)nc);

	for(i=1; i<=3; ++i) {
	   if( av[l[i]] > 0.0) {
	      	if(verbose)
	      	    fprintf(tree,"\n\t\t Node:%4d (%9.5f) ",(pint)l[i],branch[i]);
		for(k=last_seq-first_seq+1-3; k>=1; k--)
			if(tree_description[k][l[i]] == 1) {
				for(j=1; j<=last_seq-first_seq+1; j++)
				 	if(tree_description[k][j] == 1)
					    tree_description[last_seq-first_seq+1-2][j] = i;
				break;
			}
	   }
	   else  {
	      	if(verbose)
	   	    fprintf(tree,"\n\t\t  SEQ:%4d (%9.5f) ",(pint)l[i],branch[i]);
		tree_description[last_seq-first_seq+1-2][l[i]] = i;
	   }
	   if(i < 3) {
	      	if(verbose)
	            fprintf(tree,"joins");
	   }
	}

	if(verbose)
		fprintf(tree,"\n");

}




void bootstrap_tree(char *phylip_name,char *clustal_name, char *nexus_name)
{
	sint i,j;
	int ranno;
	char path[MAXLINE+1];
    char dummy[10];
	char err_mess[1024];
	static char **sample_tree;
	static char **standard_tree;
	static char **save_tree;
	sint total_dists, overspill = 0, total_overspill = 0;
	sint nfails = 0;

	if(empty) {
		error("You must load an alignment first");
		return;
	}

        if(nseqs<4) {
                error("Alignment has only %d sequences",nseqs);
                return;
        }

	if(!output_tree_clustal && !output_tree_phylip && !output_tree_nexus) {
		error("You must select either clustal or phylip or nexus tree output format");
		return;
	}
	get_path(seqname, path);
	
	if (output_tree_clustal) {
        if (clustal_name[0]!=EOS) {
                if((clustal_phy_tree_file = open_explicit_file(
                clustal_name))==NULL) return;
        }
        else {
		if((clustal_phy_tree_file = open_output_file(
		"\nEnter name for bootstrap output file  ",path,
		clustal_name,"njb")) == NULL) return;
        }
	}

	first_seq=1;
	last_seq=nseqs;

	if (output_tree_phylip) {
        if (phylip_name[0]!=EOS) {
                if((phylip_phy_tree_file = open_explicit_file(
                phylip_name))==NULL) return;
        }
	else {
		if((phylip_phy_tree_file = open_output_file(
		"\nEnter name for bootstrap output file  ",path,
		phylip_name,"phb")) == NULL) return;
	}
	}

	if (output_tree_nexus) {
        if (nexus_name[0]!=EOS) {
                if((nexus_phy_tree_file = open_explicit_file(
                nexus_name))==NULL) return;
        }
	else {
		if((nexus_phy_tree_file = open_output_file(
		"\nEnter name for bootstrap output file  ",path,
		nexus_name,"treb")) == NULL) return;
	}
	}

	boot_totals    = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
	for(i=0;i<nseqs+1;i++)
		boot_totals[i]=0;
		
	boot_positions = (sint *)ckalloc( (seqlen_array[first_seq]+2) * sizeof (sint) );

	for(j=1; j<=seqlen_array[first_seq]; ++j)  /* First select all positions for */
		boot_positions[j] = j;	   /* the "standard" tree */

	if(output_tree_clustal) {
		verbose = TRUE;     /* Turn on file output */
		if(dnaflag)
			overspill = dna_distance_matrix(clustal_phy_tree_file);
		else 
			overspill = prot_distance_matrix(clustal_phy_tree_file);
	}

	if(output_tree_phylip) {
		verbose = FALSE;     /* Turn off file output */
		if(dnaflag)
			overspill = dna_distance_matrix(phylip_phy_tree_file);
		else 
			overspill = prot_distance_matrix(phylip_phy_tree_file);
	}

	if(output_tree_nexus) {
		verbose = FALSE;     /* Turn off file output */
		if(dnaflag)
			overspill = dna_distance_matrix(nexus_phy_tree_file);
		else 
			overspill = prot_distance_matrix(nexus_phy_tree_file);
	}

/* check if any distances overflowed the distance corrections */
	if ( overspill > 0 ) {
		total_dists = (nseqs*(nseqs-1))/2;
		overspill_message(overspill,total_dists);
	}

	tree_gaps=ckfree((void *)tree_gaps);

	if (output_tree_clustal) verbose = TRUE;   /* Turn on screen output */

	standard_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
	for(i=0; i<nseqs+1; i++) 
		standard_tree[i]   = (char *) ckalloc( (nseqs+1) * sizeof(char) );

/* compute the standard tree */

	if(output_tree_clustal || output_tree_phylip || output_tree_nexus)
		nj_tree(standard_tree,clustal_phy_tree_file);

	if (output_tree_clustal)
		fprintf(clustal_phy_tree_file,"\n\n\t\t\tBootstrap Confidence Limits\n\n");

/* save the left_branch and right_branch for phylip output */
	save_left_branch = (double *) ckalloc( (nseqs+2) * sizeof (double)   );
	save_right_branch = (double *) ckalloc( (nseqs+2) * sizeof (double)   );
	for (i=1;i<=nseqs;i++) {
		save_left_branch[i] = left_branch[i];
		save_right_branch[i] = right_branch[i];
	}
/*  
  The next line is a fossil from the days of using the cc ran()
	ran_factor = RAND_MAX / seqlen_array[first_seq]; 
*/

	if(usemenu) 
   		boot_ran_seed = 
getint("\n\nEnter seed no. for random number generator ",1,1000,boot_ran_seed);

/* do not use the native cc ran()
	srand(boot_ran_seed);
*/
       	addrandinit((unsigned long) boot_ran_seed);

	if (output_tree_clustal)
		fprintf(clustal_phy_tree_file,"\n Random number generator seed = %7u\n",
		boot_ran_seed);

	if(usemenu) 
  		boot_ntrials = 
getint("\n\nEnter number of bootstrap trials ",1,10000,boot_ntrials);

	if (output_tree_clustal) {
  		fprintf(clustal_phy_tree_file,"\n Number of bootstrap trials   = %7d\n",
	(pint)boot_ntrials);

		fprintf(clustal_phy_tree_file,
		"\n\n Diagrammatic representation of the above tree: \n");
		fprintf(clustal_phy_tree_file,"\n Each row represents 1 tree cycle;");
		fprintf(clustal_phy_tree_file," defining 2 groups.\n");
		fprintf(clustal_phy_tree_file,"\n Each column is 1 sequence; ");
		fprintf(clustal_phy_tree_file,"the stars in each line show 1 group; ");
		fprintf(clustal_phy_tree_file,"\n the dots show the other\n");
		fprintf(clustal_phy_tree_file,"\n Numbers show occurences in bootstrap samples.");
	}
/*
	print_tree(standard_tree, clustal_phy_tree_file, boot_totals);
*/
	verbose = FALSE;                   /* Turn OFF screen output */

	left_branch=ckfree((void *)left_branch);
	right_branch=ckfree((void *)right_branch);
	tkill=ckfree((void *)tkill);
	av=ckfree((void *)av);

	sample_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
	for(i=0; i<nseqs+1; i++) 
		sample_tree[i]   = (char *) ckalloc( (nseqs+1) * sizeof(char) );

	if (usemenu)
	fprintf(stdout,"\n\nEach dot represents 10 trials\n\n");
        total_overspill = 0;
	nfails = 0;
	for(i=1; i<=boot_ntrials; ++i) {
		for(j=1; j<=seqlen_array[first_seq]; ++j) { /* select alignment */
							    /* positions for */
			ranno = addrand( (unsigned long) seqlen_array[1]) + 1;
			boot_positions[j] = ranno; 	    /* bootstrap sample */
		}
		if(output_tree_clustal) {
			if(dnaflag)
				overspill = dna_distance_matrix(clustal_phy_tree_file);
			else 
				overspill = prot_distance_matrix(clustal_phy_tree_file);
		}
	
		if(output_tree_phylip) {
			if(dnaflag)
				overspill = dna_distance_matrix(phylip_phy_tree_file);
			else 
				overspill = prot_distance_matrix(phylip_phy_tree_file);
		}

		if(output_tree_nexus) {
			if(dnaflag)
				overspill = dna_distance_matrix(nexus_phy_tree_file);
			else 
				overspill = prot_distance_matrix(nexus_phy_tree_file);
		}

		if( overspill > 0) {
			total_overspill = total_overspill + overspill;
			nfails++;
		}			

		tree_gaps=ckfree((void *)tree_gaps);

		if(output_tree_clustal || output_tree_phylip || output_tree_nexus) 
			nj_tree(sample_tree,clustal_phy_tree_file);

	 	left_branch=ckfree((void *)left_branch);
		right_branch=ckfree((void *)right_branch);
		tkill=ckfree((void *)tkill);
		av=ckfree((void *)av);

		compare_tree(standard_tree, sample_tree, boot_totals, last_seq-first_seq+1);
		if (usemenu) {
			if(i % 10  == 0) fprintf(stdout,".");
			if(i % 100 == 0) fprintf(stdout,"\n");
		}
	}

/* check if any distances overflowed the distance corrections */
	if ( nfails > 0 ) {
		total_dists = (nseqs*(nseqs-1))/2;
		fprintf(stdout,"\n");
		fprintf(stdout,"\n WARNING: %ld of the distances out of a total of %ld times %ld",
		(long)total_overspill,(long)total_dists,(long)boot_ntrials);
		fprintf(stdout,"\n were out of range for the distance correction.");
		fprintf(stdout,"\n This affected %d out of %d bootstrap trials.",
		(pint)nfails,(pint)boot_ntrials);
		fprintf(stdout,"\n This may not be fatal but you have been warned!");
		fprintf(stdout,"\n");
		fprintf(stdout,"\n SUGGESTIONS: 1) turn off the correction");
		fprintf(stdout,"\n           or 2) remove the most distant sequences");
		fprintf(stdout,"\n           or 3) use the PHYLIP package.");
		fprintf(stdout,"\n\n");
		if (usemenu) 
			getstr("Press [RETURN] to continue",dummy);
	}


	boot_positions=ckfree((void *)boot_positions);

	for (i=1;i<nseqs+1;i++)
		sample_tree[i]=ckfree((void *)sample_tree[i]);
	sample_tree=ckfree((void *)sample_tree);
/*
	fprintf(clustal_phy_tree_file,"\n\n Bootstrap totals for each group\n");
*/
	if (output_tree_clustal)
		print_tree(standard_tree, clustal_phy_tree_file, boot_totals);

	save_tree   = (char **) ckalloc( (nseqs+1) * sizeof (char *) );
	for(i=0; i<nseqs+1; i++) 
		save_tree[i]   = (char *) ckalloc( (nseqs+1) * sizeof(char) );

	for(i=1; i<nseqs+1; i++) 
		for(j=1; j<nseqs+1; j++) 
			save_tree[i][j]  = standard_tree[i][j];

	if(output_tree_phylip) {
		left_branch 	= (double *) ckalloc( (nseqs+2) * sizeof (double)   );
		right_branch    = (double *) ckalloc( (nseqs+2) * sizeof (double)   );
		for (i=1;i<=nseqs;i++) {
			left_branch[i] = save_left_branch[i];
			right_branch[i] = save_right_branch[i];
		}
		print_phylip_tree(standard_tree,phylip_phy_tree_file,
						 bootstrap_format);
		left_branch=ckfree((void *)left_branch);
		right_branch=ckfree((void *)right_branch);
	}

	for(i=1; i<nseqs+1; i++) 
		for(j=1; j<nseqs+1; j++) 
			standard_tree[i][j]  = save_tree[i][j];

	if(output_tree_nexus) {
		left_branch 	= (double *) ckalloc( (nseqs+2) * sizeof (double)   );
		right_branch    = (double *) ckalloc( (nseqs+2) * sizeof (double)   );
		for (i=1;i<=nseqs;i++) {
			left_branch[i] = save_left_branch[i];
			right_branch[i] = save_right_branch[i];
		}
		print_nexus_tree(standard_tree,nexus_phy_tree_file,
						 bootstrap_format);
		left_branch=ckfree((void *)left_branch);
		right_branch=ckfree((void *)right_branch);
	}

	boot_totals=ckfree((void *)boot_totals);
	save_left_branch=ckfree((void *)save_left_branch);
	save_right_branch=ckfree((void *)save_right_branch);

	for (i=1;i<nseqs+1;i++)
		standard_tree[i]=ckfree((void *)standard_tree[i]);
	standard_tree=ckfree((void *)standard_tree);

	for (i=0;i<nseqs+1;i++)
		save_tree[i]=ckfree((void *)save_tree[i]);
	save_tree=ckfree((void *)save_tree);

	if (output_tree_clustal)
		fclose(clustal_phy_tree_file);

	if (output_tree_phylip)
		fclose(phylip_phy_tree_file);

	if (output_tree_nexus)
		fclose(nexus_phy_tree_file);

	if (output_tree_clustal)
		info("Bootstrap output file completed       [%s]"
		,clustal_name);
	if (output_tree_phylip)
		info("Bootstrap output file completed       [%s]"
		,phylip_name);
	if (output_tree_nexus)
		info("Bootstrap output file completed       [%s]"
		,nexus_name);
}


void compare_tree(char **tree1, char **tree2, sint *hits, sint n)
{	
	sint i,j,k;
	sint nhits1, nhits2;

	for(i=1; i<=n-3; i++)  {
		for(j=1; j<=n-3; j++)  {
			nhits1 = 0;
			nhits2 = 0;
			for(k=1; k<=n; k++) {
				if(tree1[i][k] == tree2[j][k]) nhits1++;
				if(tree1[i][k] != tree2[j][k]) nhits2++;
			}
			if((nhits1 == last_seq-first_seq+1) || (nhits2 == last_seq-first_seq+1)) hits[i]++;
		}
	}
}


void print_nexus_tree(char **tree_description, FILE *tree, sint bootstrap)
{
	sint i;
	sint old_row;
	
	fprintf(tree,"#NEXUS\n\n");

	fprintf(tree,"BEGIN TREES;\n\n");
	fprintf(tree,"\tTRANSLATE\n");
	for(i=1;i<nseqs;i++) {
		fprintf(tree,"\t\t%d	%s,\n",(pint)i,names[i]);
	}
	fprintf(tree,"\t\t%d	%s\n",(pint)nseqs,names[nseqs]);
	fprintf(tree,"\t\t;\n");

	fprintf(tree,"\tUTREE PAUP_1= ");

	if(last_seq-first_seq+1==2) {
		fprintf(tree,"(%d:%7.5f,%d:%7.5f);",first_seq,tmat[first_seq][first_seq+1],first_seq+1,tmat[first_seq][first_seq+1]);
	}
	else {

	fprintf(tree,"(");
 
	old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,1,bootstrap);
	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-2]);
	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
	fprintf(tree,",");

	old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,2,bootstrap);
	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-1]);
	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
	fprintf(tree,",");

	old_row=two_way_split_nexus(tree_description, tree, last_seq-first_seq+1-2,3,bootstrap);
	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1]);
	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
	fprintf(tree,")");
        if (bootstrap==BS_NODE_LABELS) fprintf(tree,"TRICHOTOMY");
	fprintf(tree,";");
	}
	fprintf(tree,"\nENDBLOCK;\n");
}


sint two_way_split_nexus
(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap)
{
	sint row, new_row = 0, old_row, col, test_col = 0;
	Boolean single_seq;

	if(start_row != last_seq-first_seq+1-2) fprintf(tree,"("); 

	for(col=1; col<=last_seq-first_seq+1; col++) {
		if(tree_description[start_row][col] == flag) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -