⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trees.c

📁 在任务级并行平台P2HP上
💻 C
📖 第 1 页 / 共 5 页
字号:
		}

	if(single_seq) {
		tree_description[start_row][test_col] = 0;
		fprintf(tree,"%d",test_col+first_seq-1);
		fprintf(tree,":%7.5f)",right_branch[start_row]);
	}
	else {
		for(col=1; col<=last_seq-first_seq+1; col++) {
		    if((tree_description[start_row][col]==1)&&
		       (tree_description[new_row][col]==1))
				tree_description[start_row][col] = 0;
		}
		old_row=two_way_split_nexus(tree_description, tree, new_row, (sint)1, bootstrap);
		fprintf(tree,":%7.5f",right_branch[start_row]);
		if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
			fprintf(tree,"[%d]",(pint)boot_totals[old_row]);

		fprintf(tree,")");
	}
	if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0))
			fprintf(tree,"%d",(pint)boot_totals[start_row]);
	
	return(start_row);
}


void print_phylip_tree(char **tree_description, FILE *tree, sint bootstrap)
{
	sint old_row;
	
	if(last_seq-first_seq+1==2) {
		fprintf(tree,"(%s:%7.5f,%s:%7.5f);",names[first_seq],tmat[first_seq][first_seq+1],names[first_seq+1],tmat[first_seq][first_seq+1]);
		return;
	}

	fprintf(tree,"(\n");
 
	old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,1,bootstrap);
	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-2]);
	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
	fprintf(tree,",\n");

	old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,2,bootstrap);
	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-1]);
	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
	fprintf(tree,",\n");

	old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,3,bootstrap);
	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1]);
	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))
		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);
	fprintf(tree,")");
        if (bootstrap==BS_NODE_LABELS) fprintf(tree,"TRICHOTOMY");
	fprintf(tree,";\n");
}


sint two_way_split
(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap)
{
	sint row, new_row = 0, old_row, col, test_col = 0;
	Boolean single_seq;

	if(start_row != last_seq-first_seq+1-2) fprintf(tree,"(\n"); 

	for(col=1; col<=last_seq-first_seq+1; col++) {
		if(tree_description[start_row][col] == flag) {
			test_col = col;
			break;
		}
	}

	single_seq = TRUE;
	for(row=start_row-1; row>=1; row--) 
		if(tree_description[row][test_col] == 1) {
			single_seq = FALSE;
			new_row = row;
			break;
		}

	if(single_seq) {
		tree_description[start_row][test_col] = 0;
		fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]);
		if(start_row == last_seq-first_seq+1-2) {
			return(0);
		}

		fprintf(tree,":%7.5f,\n",left_branch[start_row]);
	}
	else {
		for(col=1; col<=last_seq-first_seq+1; col++) {
		    if((tree_description[start_row][col]==1)&&
		       (tree_description[new_row][col]==1))
				tree_description[start_row][col] = 0;
		}
		old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap);
		if(start_row == last_seq-first_seq+1-2) {
			return(new_row);
		}

		fprintf(tree,":%7.5f",left_branch[start_row]);
		if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
			fprintf(tree,"[%d]",(pint)boot_totals[old_row]);

		fprintf(tree,",\n");
	}


	for(col=1; col<=last_seq-first_seq+1; col++) 
		if(tree_description[start_row][col] == flag) {
			test_col = col;
			break;
		}
	
	single_seq = TRUE;
	new_row = 0;
	for(row=start_row-1; row>=1; row--) 
		if(tree_description[row][test_col] == 1) {
			single_seq = FALSE;
			new_row = row;
			break;
		}

	if(single_seq) {
		tree_description[start_row][test_col] = 0;
		fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]);
		fprintf(tree,":%7.5f)\n",right_branch[start_row]);
	}
	else {
		for(col=1; col<=last_seq-first_seq+1; col++) {
		    if((tree_description[start_row][col]==1)&&
		       (tree_description[new_row][col]==1))
				tree_description[start_row][col] = 0;
		}
		old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap);
		fprintf(tree,":%7.5f",right_branch[start_row]);
		if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))
			fprintf(tree,"[%d]",(pint)boot_totals[old_row]);

		fprintf(tree,")\n");
	}
	if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0))
			fprintf(tree,"%d",(pint)boot_totals[start_row]);
	
	return(start_row);
}



void print_tree(char **tree_description, FILE *tree, sint *totals)
{
	sint row,col;

	fprintf(tree,"\n");

	for(row=1; row<=last_seq-first_seq+1-3; row++)  {
		fprintf(tree," \n");
		for(col=1; col<=last_seq-first_seq+1; col++) { 
			if(tree_description[row][col] == 0)
				fprintf(tree,"*");
			else
				fprintf(tree,".");
		}
		if(totals[row] > 0)
			fprintf(tree,"%7d",(pint)totals[row]);
	}
	fprintf(tree," \n");
	for(col=1; col<=last_seq-first_seq+1; col++) 
		fprintf(tree,"%1d",(pint)tree_description[last_seq-first_seq+1-2][col]);
	fprintf(tree,"\n");
}



sint dna_distance_matrix(FILE *tree)
{   
	sint m,n;
	sint j,i;
	sint res1, res2;
    sint overspill = 0;
	double p,q,e,a,b,k;	

	tree_gap_delete();  /* flag positions with gaps (tree_gaps[i] = 1 ) */
	
	if(verbose) {
		fprintf(tree,"\n");
		fprintf(tree,"\n DIST   = percentage divergence (/100)");
		fprintf(tree,"\n p      = rate of transition (A <-> G; C <-> T)");
		fprintf(tree,"\n q      = rate of transversion");
		fprintf(tree,"\n Length = number of sites used in comparison");
		fprintf(tree,"\n");
	    if(tossgaps) {
		fprintf(tree,"\n All sites with gaps (in any sequence) deleted!");
		fprintf(tree,"\n");
	    }
	    if(kimura) {
		fprintf(tree,"\n Distances corrected by Kimura's 2 parameter model:");
		fprintf(tree,"\n\n Kimura, M. (1980)");
		fprintf(tree," A simple method for estimating evolutionary ");
		fprintf(tree,"rates of base");
		fprintf(tree,"\n substitutions through comparative studies of ");
		fprintf(tree,"nucleotide sequences.");
		fprintf(tree,"\n J. Mol. Evol., 16, 111-120.");
		fprintf(tree,"\n\n");
	    }
	}

	for(m=1;   m<last_seq-first_seq+1;  ++m)     /* for every pair of sequence */
	for(n=m+1; n<=last_seq-first_seq+1; ++n) {
		p = q = e = 0.0;
		tmat[m][n] = tmat[n][m] = 0.0;
		for(i=1; i<=seqlen_array[first_seq]; ++i) {
			j = boot_positions[i];
                    	if(tossgaps && (tree_gaps[j] > 0) ) 
				goto skip;          /* gap position */
			res1 = seq_array[m+first_seq-1][j];
			res2 = seq_array[n+first_seq-1][j];
			if( (res1 == gap_pos1)     || (res1 == gap_pos2) ||
                            (res2 == gap_pos1) || (res2 == gap_pos2)) 
				goto skip;          /* gap in a seq*/
			if(!use_ambiguities)
			if( is_ambiguity(res1) || is_ambiguity(res2))
				goto skip;          /* ambiguity code in a seq*/
			e = e + 1.0;
                        if(res1 != res2) {
				if(transition(res1,res2))
					p = p + 1.0;
				else
					q = q + 1.0;
			}
		        skip:;
		}


	/* Kimura's 2 parameter correction for multiple substitutions */

		if(!kimura) {
			if (e == 0) {
				fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n);
				k = 0.0;
				p = 0.0;
				q = 0.0;
			}
			else {
				k = (p+q)/e;
				if(p > 0.0)
					p = p/e;
				else
					p = 0.0;
				if(q > 0.0)
					q = q/e;
				else
					q = 0.0;
			}
			tmat[m][n] = tmat[n][m] = k;
			if(verbose)                    /* if screen output */
				fprintf(tree,        
 	     "%4d vs.%4d:  DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n"
        	                 ,(pint)m,(pint)n,k,p,q,e);
		}
		else {
			if (e == 0) {
				fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n);
				p = 0.0;
				q = 0.0;
			}
			else {
				if(p > 0.0)
					p = p/e;
				else
					p = 0.0;
				if(q > 0.0)
					q = q/e;
				else
					q = 0.0;
			}

			if( ((2.0*p)+q) == 1.0 )
				a = 0.0;
			else
				a = 1.0/(1.0-(2.0*p)-q);

			if( q == 0.5 )
				b = 0.0;
			else
				b = 1.0/(1.0-(2.0*q));

/* watch for values going off the scale for the correction. */
			if( (a<=0.0) || (b<=0.0) ) {
				overspill++;
				k = 3.5;  /* arbitrary high score */ 
			}
			else 
				k = 0.5*log(a) + 0.25*log(b);
			tmat[m][n] = tmat[n][m] = k;
			if(verbose)                      /* if screen output */
	   			fprintf(tree,
             "%4d vs.%4d:  DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n"
        	                ,(pint)m,(pint)n,k,p,q,e);

		}
	}
	return overspill;	/* return the number of off-scale values */
}


sint prot_distance_matrix(FILE *tree)
{
	sint m,n;
	sint j,i;
	sint res1, res2;
    sint overspill = 0;
	double p,e,k, table_entry;	


	tree_gap_delete();  /* flag positions with gaps (tree_gaps[i] = 1 ) */
	
	if(verbose) {
		fprintf(tree,"\n");
		fprintf(tree,"\n DIST   = percentage divergence (/100)");
		fprintf(tree,"\n Length = number of sites used in comparison");
		fprintf(tree,"\n\n");
	        if(tossgaps) {
			fprintf(tree,"\n All sites with gaps (in any sequence) deleted");
			fprintf(tree,"\n");
		}
	    	if(kimura) {
			fprintf(tree,"\n Distances up tp 0.75 corrected by Kimura's empirical method:");
			fprintf(tree,"\n\n Kimura, M. (1983)");
			fprintf(tree," The Neutral Theory of Molecular Evolution.");
			fprintf(tree,"\n Page 75. Cambridge University Press, Cambridge, England.");
			fprintf(tree,"\n\n");
	    	}
	}

	for(m=1;   m<nseqs;  ++m)     /* for every pair of sequence */
	for(n=m+1; n<=nseqs; ++n) {
		p = e = 0.0;
		tmat[m][n] = tmat[n][m] = 0.0;
		for(i=1; i<=seqlen_array[1]; ++i) {
			j = boot_positions[i];
	            	if(tossgaps && (tree_gaps[j] > 0) ) goto skip; /* gap position */
			res1 = seq_array[m][j];
			res2 = seq_array[n][j];
			if( (res1 == gap_pos1)     || (res1 == gap_pos2) ||
                            (res2 == gap_pos1) || (res2 == gap_pos2)) 
                                    goto skip;   /* gap in a seq*/
			e = e + 1.0;
                        if(res1 != res2) p = p + 1.0;
		        skip:;
		}

		if(p <= 0.0) 
			k = 0.0;
		else
			k = p/e;

/* DES debug */
/* fprintf(stdout,"Seq1=%4d Seq2=%4d  k =%7.4f \n",(pint)m,(pint)n,k); */
/* DES debug */

		if(kimura) {
			if(k < 0.75) { /* use Kimura's formula */
				if(k > 0.0) k = - log(1.0 - k - (k * k/5.0) );
			}
			else {
				if(k > 0.930) {
				   overspill++;
				   k = 10.0; /* arbitrarily set to 1000% */
				}
				else {
				   table_entry = (k*1000.0) - 750.0;
                                   k = (double)dayhoff_pams[(int)table_entry];
                                   k = k/100.0;
				}
			}
		}

		tmat[m][n] = tmat[n][m] = k;
		    if(verbose)                    /* if screen output */
			fprintf(tree,        
 	                 "%4d vs.%4d  DIST = %6.4f;  length = %6.0f\n",
 	                 (pint)m,(pint)n,k,e);
	}
	return overspill;
}


void guide_tree(FILE *tree,sint firstseq,sint numseqs)
/* 
   Routine for producing unrooted NJ trees from seperately aligned
   pairwise distances.  This produces the GUIDE DENDROGRAMS in
   PHYLIP format.
*/
{
        static char **standard_tree;
        sint i;
	float dist;

	phylip_phy_tree_file=tree;
        verbose = FALSE;
	first_seq=firstseq;
	last_seq=first_seq+numseqs-1;
  
	if(numseqs==2) {
		dist=tmat[firstseq][firstseq+1]/2.0;
		fprintf(tree,"(%s:%0.5f,%s:%0.5f);\n",
			names[firstseq],dist,names[firstseq+1],dist);
	}
	else {
        standard_tree   = (char **) ckalloc( (last_seq-first_seq+2) * sizeof (char *) );
        for(i=0; i<last_seq-first_seq+2; i++)
                standard_tree[i]  = (char *) ckalloc( (last_seq-first_seq+2) * sizeof(char));

        nj_tree(standard_tree,clustal_phy_tree_file);

        print_phylip_tree(standard_tree,phylip_phy_tree_file,0);

        if(left_branch != NULL) left_branch=ckfree((void *)left_branch);
        if(right_branch != NULL) right_branch=ckfree((void *)right_branch);
        if(tkill != NULL) tkill=ckfree((void *)tkill);
        if(av != NULL) av=ckfree((void *)av);
        for (i=1

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -