⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 prfalign.c

📁 clustalx用来做基因序列分析
💻 C
📖 第 1 页 / 共 3 页
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include "clustalw.h"#define ENDALN 127#define MAX(a,b) ((a)>(b)?(a):(b))#define MIN(a,b) ((a)<(b)?(a):(b))/* *   Prototypes */static lint 	pdiff(sint A,sint B,sint i,sint j,sint go1,sint go2);static lint 	prfscore(sint n, sint m);static sint 	gap_penalty1(sint i, sint j,sint k);static sint 	open_penalty1(sint i, sint j);static sint 	ext_penalty1(sint i, sint j);static sint 	gap_penalty2(sint i, sint j,sint k);static sint 	open_penalty2(sint i, sint j);static sint 	ext_penalty2(sint i, sint j);static void 	padd(sint k);static void 	pdel(sint k);static void 	palign(void);static void 	ptracepath(sint *alen);static void 	add_ggaps(void);static char *     add_ggaps_mask(char *mask, int len, char *path1, char *path2);/* *   Global variables */extern double 	**tmat;extern float 	gap_open, gap_extend;extern float    transition_weight;extern sint 	gap_pos1, gap_pos2;extern sint 	max_aa;extern sint 	nseqs;extern sint 	*seqlen_array;extern sint 	*seq_weight;extern sint    	debug;extern Boolean 	neg_matrix;extern sint 	mat_avscore;extern short  	blosum30mt[], blosum40mt[], blosum45mt[];extern short  	blosum62mt2[], blosum80mt[];extern short  	pam20mt[], pam60mt[];extern short  	pam120mt[], pam160mt[], pam350mt[];extern short  	gon40mt[], gon80mt[];extern short    gon120mt[], gon160mt[], gon250mt[], gon350mt[];extern short  	clustalvdnamt[],swgapdnamt[];extern short  	idmat[];extern short    usermat[];extern short    userdnamat[];extern Boolean user_series;extern UserMatSeries matseries;extern short	def_dna_xref[],def_aa_xref[],dna_xref[],aa_xref[];extern sint		max_aln_length;extern Boolean	distance_tree;extern Boolean	dnaflag;extern char 	mtrxname[];extern char 	dnamtrxname[];extern char 	**seq_array;extern char 	*amino_acid_codes;extern char     *gap_penalty_mask1,*gap_penalty_mask2;extern char     *sec_struct_mask1,*sec_struct_mask2;extern sint     struct_penalties1, struct_penalties2;extern Boolean  use_ss1, use_ss2;extern Boolean endgappenalties;static sint 	print_ptr,last_print;static sint 		*displ;static char   	**alignment;static sint    	*aln_len;static sint    *aln_weight;static char   	*aln_path1, *aln_path2;static sint    	alignment_len;static sint    	**profile1, **profile2;static lint 	    *HH, *DD, *RR, *SS;static lint 	    *gS;static sint    matrix[NUMRES][NUMRES];static sint    nseqs1, nseqs2;static sint    	prf_length1, prf_length2;static sint    *gaps;static sint    gapcoef1,gapcoef2;static sint    lencoef1,lencoef2;static Boolean switch_profiles;lint prfalign(sint *group, sint *aligned){  static Boolean found;  static Boolean negative;  static Boolean error_given=FALSE;  static sint    i, j, count = 0;  static sint  NumSeq;  static sint    len, len1, len2, is, minlen;  static sint   se1, se2, sb1, sb2;  static sint  maxres;  static sint int_scale;  static short  *matptr;  static short	*mat_xref;  static char   c;  static lint    score;  static float  scale;  static double logmin,logdiff;  static double pcid;  alignment = (char **) ckalloc( nseqs * sizeof (char *) );  aln_len = (sint *) ckalloc( nseqs * sizeof (sint) );  aln_weight = (sint *) ckalloc( nseqs * sizeof (sint) );  for (i=0;i<nseqs;i++)     if (aligned[i+1] == 0) group[i+1] = 0;  nseqs1 = nseqs2 = 0;  for (i=0;i<nseqs;i++)    {        if (group[i+1] == 1) nseqs1++;        else if (group[i+1] == 2) nseqs2++;    }  if ((nseqs1 == 0) || (nseqs2 == 0)) return(0.0);  if (nseqs2 > nseqs1)    {     switch_profiles = TRUE;     for (i=0;i<nseqs;i++)       {          if (group[i+1] == 1) group[i+1] = 2;          else if (group[i+1] == 2) group[i+1] = 1;       }    }  else  	switch_profiles = FALSE;  int_scale = 100;/*   calculate the mean of the sequence pc identities between the two groups*/        count = 0;        pcid = 0.0;	negative=neg_matrix;        for (i=0;i<nseqs;i++)          {             if (group[i+1] == 1)             for (j=0;j<nseqs;j++)               if (group[j+1] == 2)                    {                       count++;                       pcid += tmat[i+1][j+1];                    }          }  pcid = pcid/(float)count;if (debug > 0) fprintf(stdout,"mean tmat %3.1f\n", pcid);/*  Make the first profile.*/  prf_length1 = 0;  for (i=0;i<nseqs;i++)       if (group[i+1] == 1)		if(seqlen_array[i+1]>prf_length1) prf_length1=seqlen_array[i+1];  nseqs1 = 0;if (debug>0) fprintf(stdout,"sequences profile 1:\n");  for (i=0;i<nseqs;i++)    {       if (group[i+1] == 1)          {if (debug>0) {extern char **names;fprintf(stdout,"%s\n",names[i+1]);}             len = seqlen_array[i+1];             alignment[nseqs1] = (char *) ckalloc( (prf_length1+2) * sizeof (char) );             for (j=0;j<len;j++)               alignment[nseqs1][j] = seq_array[i+1][j+1];		for(j=len;j<prf_length1;j++)			alignment[nseqs1][j]=gap_pos1;             alignment[nseqs1][j] = ENDALN;             aln_len[nseqs1] = prf_length1;             aln_weight[nseqs1] = seq_weight[i];             nseqs1++;          }    }/*  Make the second profile.*/  prf_length2 = 0;  for (i=0;i<nseqs;i++)       if (group[i+1] == 2)		if(seqlen_array[i+1]>prf_length2) prf_length2=seqlen_array[i+1];  nseqs2 = 0;if (debug>0) fprintf(stdout,"sequences profile 2:\n");  for (i=0;i<nseqs;i++)    {       if (group[i+1] == 2)          {if (debug>0) {extern char **names;fprintf(stdout,"%s\n",names[i+1]);}             len = seqlen_array[i+1];             alignment[nseqs1+nseqs2] =                   (char *) ckalloc( (prf_length2+2) * sizeof (char) );             for (j=0;j<len;j++)               alignment[nseqs1+nseqs2][j] = seq_array[i+1][j+1];		for(j=len;j<prf_length2;j++)			alignment[nseqs1+nseqs2][j]=gap_pos1;             alignment[nseqs1+nseqs2][j] = ENDALN;             aln_len[nseqs1+nseqs2] = prf_length2;             aln_weight[nseqs1+nseqs2] = seq_weight[i];             nseqs2++;          }    }  max_aln_length = prf_length1 + prf_length2+2;  /*   calculate real length of profiles - removing gaps!*/  len1=0;  for (i=0;i<nseqs1;i++)    {       is=0;       for (j=0; j<MIN(aln_len[i],prf_length1); j++)	  {            c = alignment[i][j];      	    if ((c !=gap_pos1) && (c != gap_pos2)) is++;          }       len1+=is;    }  len1/=(float)nseqs1;     len2=0;  for (i=nseqs1;i<nseqs2+nseqs1;i++)    {       is=0;       for (j=0; j<MIN(aln_len[i],prf_length2); j++)	  {            c = alignment[i][j];      	    if ((c !=gap_pos1) && (c != gap_pos2)) is++;          }       len2+=is;    }  len2/=(float)nseqs2;  if (dnaflag)     {       scale=1.0;       if (strcmp(dnamtrxname, "iub") == 0)	{            matptr = swgapdnamt;            mat_xref = def_dna_xref;	}       else if (strcmp(dnamtrxname, "clustalw") == 0)	{            matptr = clustalvdnamt;            mat_xref = def_dna_xref;            scale=0.66;	}       else         {           matptr = userdnamat;           mat_xref = dna_xref;        }            maxres = get_matrix(matptr, mat_xref, matrix, neg_matrix, int_scale);            if (maxres == 0) return((sint)-1);/*            matrix[0][4]=transition_weight*matrix[0][0];            matrix[4][0]=transition_weight*matrix[0][0];            matrix[2][11]=transition_weight*matrix[0][0];            matrix[11][2]=transition_weight*matrix[0][0];            matrix[2][12]=transition_weight*matrix[0][0];            matrix[12][2]=transition_weight*matrix[0][0];*//* fix suggested by Chanan Rubin at Compugen */           matrix[mat_xref[0]][mat_xref[4]]=transition_weight*matrix[0][0];            matrix[mat_xref[4]][mat_xref[0]]=transition_weight*matrix[0][0];            matrix[mat_xref[2]][mat_xref[11]]=transition_weight*matrix[0][0];            matrix[mat_xref[11]][mat_xref[2]]=transition_weight*matrix[0][0];            matrix[mat_xref[2]][mat_xref[12]]=transition_weight*matrix[0][0];            matrix[mat_xref[12]][mat_xref[2]]=transition_weight*matrix[0][0];           gapcoef1 = gapcoef2 = 100.0 * gap_open *scale;          lencoef1 = lencoef2 = 100.0 * gap_extend *scale;    }  else    {  	if(len1==0 || len2==0) {  		logmin=1.0;  		logdiff=1.0;  	}    	else {  		minlen = MIN(len1,len2); 		logmin = 1.0/log10((double)minlen); 		if (len2<len1)    	 		logdiff = 1.0+0.5*log10((double)((float)len2/(float)len1));  		else if (len1<len2)  	   		logdiff = 1.0+0.5*log10((double)((float)len1/(float)len2));  		else logdiff=1.0;		if(logdiff<0.9) logdiff=0.9;  	}if(debug>0) fprintf(stdout,"%d %d logmin %f   logdiff %f\n",(pint)len1,(pint)len2, logmin,logdiff);       scale=0.75;       if (strcmp(mtrxname, "blosum") == 0)        {           scale=0.75;           if (negative || distance_tree == FALSE) matptr = blosum40mt;           else if (pcid > 80.0)             {                matptr = blosum80mt;             }           else if (pcid > 60.0)             {                matptr = blosum62mt2;             }           else if (pcid > 40.0)             {                matptr = blosum45mt;             }           else if (pcid > 30.0)             {                scale=0.5;                matptr = blosum45mt;             }           else if (pcid > 20.0)             {                scale=0.6;                matptr = blosum45mt;             }           else              {                scale=0.6;                matptr = blosum30mt;             }           mat_xref = def_aa_xref;        }       else if (strcmp(mtrxname, "pam") == 0)        {           scale=0.75;           if (negative || distance_tree == FALSE) matptr = pam120mt;           else if (pcid > 80.0) matptr = pam20mt;           else if (pcid > 60.0) matptr = pam60mt;           else if (pcid > 40.0) matptr = pam120mt;           else matptr = pam350mt;           mat_xref = def_aa_xref;        }       else if (strcmp(mtrxname, "gonnet") == 0)        {	   scale/=2.0;           if (negative || distance_tree == FALSE) matptr = gon250mt;           else if (pcid > 35.0)             {                matptr = gon80mt;		scale/=2.0;             }           else if (pcid > 25.0)             {                if(minlen<100) matptr = gon250mt;                else matptr = gon120mt;             }           else             {                if(minlen<100) matptr = gon350mt;		else matptr = gon160mt;             }           mat_xref = def_aa_xref;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -