⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trees.c

📁 经典生物信息学多序列比对工具clustalw
💻 C
📖 第 1 页 / 共 5 页
字号:
	if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0))			fprintf(tree,"%d",(pint)boot_totals[start_row]);		return(start_row);}void print_phylip_tree(char **tree_description, FILE *tree, sint bootstrap){	sint old_row;		if(last_seq-first_seq+1==2) {		fprintf(tree,"(%s:%7.5f,%s:%7.5f);",names[first_seq],tmat[first_seq][first_seq+1],names[first_seq+1],tmat[first_seq][first_seq+1]);		return;	}	fprintf(tree,"(\n"); 	old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,1,bootstrap);	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-2]);	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);	fprintf(tree,",\n");	old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,2,bootstrap);	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1-1]);	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);	fprintf(tree,",\n");	old_row=two_way_split(tree_description, tree, last_seq-first_seq+1-2,3,bootstrap);	fprintf(tree,":%7.5f",left_branch[last_seq-first_seq+1]);	if ((bootstrap==BS_BRANCH_LABELS) && (old_row>0) && (boot_totals[old_row]>0))		fprintf(tree,"[%d]",(pint)boot_totals[old_row]);	fprintf(tree,")");        if (bootstrap==BS_NODE_LABELS) fprintf(tree,"TRICHOTOMY");	fprintf(tree,";\n");}sint two_way_split(char **tree_description, FILE *tree, sint start_row, sint flag, sint bootstrap){	sint row, new_row = 0, old_row, col, test_col = 0;	Boolean single_seq;	if(start_row != last_seq-first_seq+1-2) fprintf(tree,"(\n"); 	for(col=1; col<=last_seq-first_seq+1; col++) {		if(tree_description[start_row][col] == flag) {			test_col = col;			break;		}	}	single_seq = TRUE;	for(row=start_row-1; row>=1; row--) 		if(tree_description[row][test_col] == 1) {			single_seq = FALSE;			new_row = row;			break;		}	if(single_seq) {		tree_description[start_row][test_col] = 0;		fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]);		if(start_row == last_seq-first_seq+1-2) {			return(0);		}		fprintf(tree,":%7.5f,\n",left_branch[start_row]);	}	else {		for(col=1; col<=last_seq-first_seq+1; col++) {		    if((tree_description[start_row][col]==1)&&		       (tree_description[new_row][col]==1))				tree_description[start_row][col] = 0;		}		old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap);		if(start_row == last_seq-first_seq+1-2) {			return(new_row);		}		fprintf(tree,":%7.5f",left_branch[start_row]);		if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))			fprintf(tree,"[%d]",(pint)boot_totals[old_row]);		fprintf(tree,",\n");	}	for(col=1; col<=last_seq-first_seq+1; col++) 		if(tree_description[start_row][col] == flag) {			test_col = col;			break;		}		single_seq = TRUE;	new_row = 0;	for(row=start_row-1; row>=1; row--) 		if(tree_description[row][test_col] == 1) {			single_seq = FALSE;			new_row = row;			break;		}	if(single_seq) {		tree_description[start_row][test_col] = 0;		fprintf(tree,"%.*s",max_names,names[test_col+first_seq-1]);		fprintf(tree,":%7.5f)\n",right_branch[start_row]);	}	else {		for(col=1; col<=last_seq-first_seq+1; col++) {		    if((tree_description[start_row][col]==1)&&		       (tree_description[new_row][col]==1))				tree_description[start_row][col] = 0;		}		old_row=two_way_split(tree_description, tree, new_row, (sint)1, bootstrap);		fprintf(tree,":%7.5f",right_branch[start_row]);		if ((bootstrap==BS_BRANCH_LABELS) && (boot_totals[old_row]>0))			fprintf(tree,"[%d]",(pint)boot_totals[old_row]);		fprintf(tree,")\n");	}	if ((bootstrap==BS_NODE_LABELS) && (boot_totals[start_row]>0))			fprintf(tree,"%d",(pint)boot_totals[start_row]);		return(start_row);}void print_tree(char **tree_description, FILE *tree, sint *totals){	sint row,col;	fprintf(tree,"\n");	for(row=1; row<=last_seq-first_seq+1-3; row++)  {		fprintf(tree," \n");		for(col=1; col<=last_seq-first_seq+1; col++) { 			if(tree_description[row][col] == 0)				fprintf(tree,"*");			else				fprintf(tree,".");		}		if(totals[row] > 0)			fprintf(tree,"%7d",(pint)totals[row]);	}	fprintf(tree," \n");	for(col=1; col<=last_seq-first_seq+1; col++) 		fprintf(tree,"%1d",(pint)tree_description[last_seq-first_seq+1-2][col]);	fprintf(tree,"\n");}sint dna_distance_matrix(FILE *tree){   	sint m,n;	sint j,i;	sint res1, res2;    sint overspill = 0;	double p,q,e,a,b,k;		tree_gap_delete();  /* flag positions with gaps (tree_gaps[i] = 1 ) */		if(verbose) {		fprintf(tree,"\n");		fprintf(tree,"\n DIST   = percentage divergence (/100)");		fprintf(tree,"\n p      = rate of transition (A <-> G; C <-> T)");		fprintf(tree,"\n q      = rate of transversion");		fprintf(tree,"\n Length = number of sites used in comparison");		fprintf(tree,"\n");	    if(tossgaps) {		fprintf(tree,"\n All sites with gaps (in any sequence) deleted!");		fprintf(tree,"\n");	    }	    if(kimura) {		fprintf(tree,"\n Distances corrected by Kimura's 2 parameter model:");		fprintf(tree,"\n\n Kimura, M. (1980)");		fprintf(tree," A simple method for estimating evolutionary ");		fprintf(tree,"rates of base");		fprintf(tree,"\n substitutions through comparative studies of ");		fprintf(tree,"nucleotide sequences.");		fprintf(tree,"\n J. Mol. Evol., 16, 111-120.");		fprintf(tree,"\n\n");	    }	}	for(m=1;   m<last_seq-first_seq+1;  ++m)     /* for every pair of sequence */	for(n=m+1; n<=last_seq-first_seq+1; ++n) {		p = q = e = 0.0;		tmat[m][n] = tmat[n][m] = 0.0;		for(i=1; i<=seqlen_array[first_seq]; ++i) {			j = boot_positions[i];                    	if(tossgaps && (tree_gaps[j] > 0) ) 				goto skip;          /* gap position */			res1 = seq_array[m+first_seq-1][j];			res2 = seq_array[n+first_seq-1][j];			if( (res1 == gap_pos1)     || (res1 == gap_pos2) ||                            (res2 == gap_pos1) || (res2 == gap_pos2)) 				goto skip;          /* gap in a seq*/			if(!use_ambiguities)			if( is_ambiguity(res1) || is_ambiguity(res2))				goto skip;          /* ambiguity code in a seq*/			e = e + 1.0;                        if(res1 != res2) {				if(transition(res1,res2))					p = p + 1.0;				else					q = q + 1.0;			}		        skip:;		}	/* Kimura's 2 parameter correction for multiple substitutions */		if(!kimura) {			if (e == 0) {				fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n);				k = 0.0;				p = 0.0;				q = 0.0;			}			else {				k = (p+q)/e;				if(p > 0.0)					p = p/e;				else					p = 0.0;				if(q > 0.0)					q = q/e;				else					q = 0.0;			}			tmat[m][n] = tmat[n][m] = k;			if(verbose)                    /* if screen output */				fprintf(tree,         	     "%4d vs.%4d:  DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n"        	                 ,(pint)m,(pint)n,k,p,q,e);		}		else {			if (e == 0) {				fprintf(stdout,"\n WARNING: sequences %d and %d are non-overlapping\n",m,n);				p = 0.0;				q = 0.0;			}			else {				if(p > 0.0)					p = p/e;				else					p = 0.0;				if(q > 0.0)					q = q/e;				else					q = 0.0;			}			if( ((2.0*p)+q) == 1.0 )				a = 0.0;			else				a = 1.0/(1.0-(2.0*p)-q);			if( q == 0.5 )				b = 0.0;			else				b = 1.0/(1.0-(2.0*q));/* watch for values going off the scale for the correction. */			if( (a<=0.0) || (b<=0.0) ) {				overspill++;				k = 3.5;  /* arbitrary high score */ 			}			else 				k = 0.5*log(a) + 0.25*log(b);			tmat[m][n] = tmat[n][m] = k;			if(verbose)                      /* if screen output */	   			fprintf(tree,             "%4d vs.%4d:  DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n"        	                ,(pint)m,(pint)n,k,p,q,e);		}	}	return overspill;	/* return the number of off-scale values */}sint prot_distance_matrix(FILE *tree){	sint m,n;	sint j,i;	sint res1, res2;    sint overspill = 0;	double p,e,k, table_entry;		tree_gap_delete();  /* flag positions with gaps (tree_gaps[i] = 1 ) */		if(verbose) {		fprintf(tree,"\n");		fprintf(tree,"\n DIST   = percentage divergence (/100)");		fprintf(tree,"\n Length = number of sites used in comparison");		fprintf(tree,"\n\n");	        if(tossgaps) {			fprintf(tree,"\n All sites with gaps (in any sequence) deleted");			fprintf(tree,"\n");		}	    	if(kimura) {			fprintf(tree,"\n Distances up tp 0.75 corrected by Kimura's empirical method:");			fprintf(tree,"\n\n Kimura, M. (1983)");			fprintf(tree," The Neutral Theory of Molecular Evolution.");			fprintf(tree,"\n Page 75. Cambridge University Press, Cambridge, England.");			fprintf(tree,"\n\n");	    	}	}	for(m=1;   m<nseqs;  ++m)     /* for every pair of sequence */	for(n=m+1; n<=nseqs; ++n) {		p = e = 0.0;		tmat[m][n] = tmat[n][m] = 0.0;		for(i=1; i<=seqlen_array[1]; ++i) {			j = boot_positions[i];	            	if(tossgaps && (tree_gaps[j] > 0) ) goto skip; /* gap position */			res1 = seq_array[m][j];			res2 = seq_array[n][j];			if( (res1 == gap_pos1)     || (res1 == gap_pos2) ||                            (res2 == gap_pos1) || (res2 == gap_pos2))                                     goto skip;   /* gap in a seq*/			e = e + 1.0;                        if(res1 != res2) p = p + 1.0;		        skip:;		}		if(p <= 0.0) 			k = 0.0;		else			k = p/e;/* DES debug *//* fprintf(stdout,"Seq1=%4d Seq2=%4d  k =%7.4f \n",(pint)m,(pint)n,k); *//* DES debug */		if(kimura) {			if(k < 0.75) { /* use Kimura's formula */				if(k > 0.0) k = - log(1.0 - k - (k * k/5.0) );			}			else {				if(k > 0.930) {				   overspill++;				   k = 10.0; /* arbitrarily set to 1000% */				}				else {				   table_entry = (k*1000.0) - 750.0;                                   k = (double)dayhoff_pams[(int)table_entry];                                   k = k/100.0;				}			}		}		tmat[m][n] = tmat[n][m] = k;		    if(verbose)                    /* if screen output */			fprintf(tree,         	                 "%4d vs.%4d  DIST = %6.4f;  length = %6.0f\n", 	                 (pint)m,(pint)n,k,e);	}	return overspill;}void guide_tree(FILE *tree,sint firstseq,sint numseqs)/*    Routine for producing unrooted NJ trees from seperately aligned   pairwise distances.  This produces the GUIDE DENDROGRAMS in   PHYLIP format.*/{        static char **standard_tree;        sint i;	float dist;	phylip_phy_tree_file=tree;        verbose = FALSE;	first_seq=firstseq;	last_seq=first_seq+numseqs-1;  	if(numseqs==2) {		dist=tmat[firstseq][firstseq+1]/2.0;		fprintf(tree,"(%s:%0.5f,%s:%0.5f);\n",			names[firstseq],dist,names[firstseq+1],dist);	}	else {        standard_tree   = (char **) ckalloc( (last_seq-first_seq+2) * sizeof (char *) );        for(i=0; i<last_seq-first_seq+2; i++)                standard_tree[i]  = (char *) ckalloc( (last_seq-first_seq+2) * sizeof(char));        nj_tree(standard_tree,clustal_phy_tree_file);        print_phylip_tree(standard_tree,phylip_phy_tree_file,0);        if(left_branch != NULL) left_branch=ckfree((void *)left_branch);        if(right_branch != NULL) right_branch=ckfree((void *)right_branch);        if(tkill != NULL) tkill=ckfree((void *)tkill);        if(av != NULL) av=ckfree((void *)av);        for (i=1;i<last_seq-first_seq+2;i++)                standard_tree[i]=ckfree((void *)standard_tree[i]);        standard_tree=ckfree((void *)standard_tree);	}        fclose(phylip_phy_tree_file);}static Boolean is_ambiguity(char c){        int i;	char codes[]="ACGTU";        if(use_ambiguities==TRUE)        {         return FALSE;        }	for(i=0;i<5;i++)        	if(amino_acid_codes[c]==codes[i])             	   return FALSE;        return TRUE;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -