📄 tools.c
字号:
/* tools.c
*/
#include "paml.h"
/************************
sequences
*************************/
char BASEs[]="TCAGUYRMKSWHBVDN?-";
char nBASEs[]={1,1,1,1, 1,2,2,2,2,2,2, 3,3,3,3, 4,4,4};
char *EquateNUC[]={"T","C","A","G", "T", "TC","AG","CA","TG","CG","TA",
"TCA","TCG","CAG","TAG", "TCAG","TCAG","TCAG"};
char AAs[] = "ARNDCQEGHILKMFPSTWYV*-?";
char AA3Str[]=
{"AlaArgAsnAspCysGlnGluGlyHisIleLeuLysMetPheProSerThrTrpTyrVal***"};
char BINs[] ="TC";
char Nsensecodon[]={61, 60, 61, 62, 62, 63, 62, 62, 61, 62, 62};
int GeneticCode[][64] =
{{13,13,10,10,15,15,15,15,18,18,-1,-1, 4, 4,-1,17,
10,10,10,10,14,14,14,14, 8, 8, 5, 5, 1, 1, 1, 1,
9, 9, 9,12,16,16,16,16, 2, 2,11,11,15,15, 1, 1,
19,19,19,19, 0, 0, 0, 0, 3, 3, 6, 6, 7, 7, 7, 7}, /* 0:universal */
{13,13,10,10,15,15,15,15,18,18,-1,-1, 4, 4,17,17,
10,10,10,10,14,14,14,14, 8, 8, 5, 5, 1, 1, 1, 1,
9, 9,12,12,16,16,16,16, 2, 2,11,11,15,15,-1,-1,
19,19,19,19, 0, 0, 0, 0, 3, 3, 6, 6, 7, 7, 7, 7}, /* 1:vertebrate mt.*/
{13,13,10,10,15,15,15,15,18,18,-1,-1, 4, 4,-1,17,
16,16,16,16,14,14,14,14, 8, 8, 5, 5, 1, 1, 1, 1,
9, 9,12,12,16,16,16,16, 2, 2,11,11,15,15, 1, 1,
19,19,19,19, 0, 0, 0, 0, 3, 3, 6, 6, 7, 7, 7, 7}, /* 2:yeast mt. */
{13,13,10,10,15,15,15,15,18,18,-1,-1, 4, 4,17,17,
10,10,10,10,14,14,14,14, 8, 8, 5, 5, 1, 1, 1, 1,
9, 9, 9,12,16,16,16,16, 2, 2,11,11,15,15, 1, 1,
19,19,19,19, 0, 0, 0, 0, 3, 3, 6, 6, 7, 7, 7, 7}, /* 3:mold mt. */
{13,13,10,10,15,15,15,15,18,18,-1,-1, 4, 4,17,17,
10,10,10,10,14,14,14,14, 8, 8, 5, 5, 1, 1, 1, 1,
9, 9,12,12,16,16,16,16, 2, 2,11,11,15,15,15,15,
19,19,19,19, 0, 0, 0, 0, 3, 3, 6, 6, 7, 7, 7, 7}, /* 4:invertebrate mt. */
{13,13,10,10,15,15,15,15,18,18, 5, 5, 4, 4,-1,17,
10,10,10,10,14,14,14,14, 8, 8, 5, 5, 1, 1, 1, 1,
9, 9, 9,12,16,16,16,16, 2, 2,11,11,15,15, 1, 1,
19,19,19,19, 0, 0, 0, 0, 3, 3, 6, 6, 7, 7, 7, 7}, /* 5:ciliate nuclear*/
{13,13,10,10,15,15,15,15,18,18,-1,-1, 4, 4,17,17,
10,10,10,10,14,14,14,14, 8, 8, 5, 5, 1, 1, 1, 1,
9, 9, 9,12,16,16,16,16, 2, 2, 2,11,15,15,15,15,
19,19,19,19, 0, 0, 0, 0, 3, 3, 6, 6, 7, 7, 7, 7}, /* 6:echinoderm mt.*/
{13,13,10,10,15,15,15,15,18,18,-1,-1, 4, 4, 4,17,
10,10,10,10,14,14,14,14, 8, 8, 5, 5, 1, 1, 1, 1,
9, 9, 9,12,16,16,16,16, 2, 2,11,11,15,15, 1, 1,
19,19,19,19, 0, 0, 0, 0, 3, 3, 6, 6, 7, 7, 7, 7}, /* 7:euplotid mt. */
{13,13,10,10,15,15,15,15,18,18,-1,-1, 4, 4,-1,17,
10,10,10,15,14,14,14,14, 8, 8, 5, 5, 1, 1, 1, 1,
9, 9, 9,12,16,16,16,16, 2, 2,11,11,15,15, 1, 1,
19,19,19,19, 0, 0, 0, 0, 3, 3, 6, 6, 7, 7, 7, 7},
/* 8:alternative yeast nu.*/
{13,13,10,10,15,15,15,15,18,18,-1,-1, 4, 4,17,17,
10,10,10,10,14,14,14,14, 8, 8, 5, 5, 1, 1, 1, 1,
9, 9,12,12,16,16,16,16, 2, 2,11,11,15,15, 7, 7,
19,19,19,19, 0, 0, 0, 0, 3, 3, 6, 6, 7, 7, 7, 7}, /* 9:ascidian mt. */
{13,13,10,10,15,15,15,15,18,18,-1, 5, 4, 4,-1,17,
10,10,10,10,14,14,14,14, 8, 8, 5, 5, 1, 1, 1, 1,
9, 9, 9,12,16,16,16,16, 2, 2,11,11,15,15, 1, 1,
19,19,19,19, 0, 0, 0, 0, 3, 3, 6, 6, 7, 7, 7, 7} /* 10:blepharisma nu.*/
} ; /* GeneticCode[icode][#codon] */
int noisy=0, Iround=0, NFunCall=0, NEigenQ, NPMatUVRoot;
double SIZEp=0;
int picksite (char *z, int l, int begin, int gap, char *result)
{
/* pick every gap-th site, e.g., the third codon position for example.
*/
int il=begin;
for (il=0, z+=begin; il<l; il+=gap,z+=gap) *result++ = *z;
return(0);
}
int CodeChara (char b, int seqtype)
{
/* This codes nucleotides or amino acids into 0, 1, 2, ...
*/
int i, n=(seqtype<=1?4:(seqtype==2?20:2));
char *pch=(seqtype<=1?BASEs:(seqtype==2?AAs:BINs));
if (seqtype<=1)
switch (b) {
case 'T': case 'U': return(0);
case 'C': return(1);
case 'A': return(2);
case 'G': return(3);
}
else
FOR(i,n) if (b==pch[i]) return (i);
if(noisy>=9) printf ("\nwarning: strange character '%c' ", b);
return (-1);
}
int dnamaker (char z[], int ls, double pi[])
{
/* sequences z[] are coded 0,1,2,3
*/
int i, j;
double p[4], r;
xtoy(pi, p, 4);
for (i=1; i<4; i++) p[i]+=p[i-1];
if (fabs(p[3]-1) > 1e-5) error2("sum pi != 1..");
for (i=0; i<ls; i++) {
for(j=0,r=rndu();j<4;j++) if(r<p[j]) break;
z[i]=(char)j;
}
return (0);
}
int transform (char *z, int ls, int direction, int seqtype)
{
/* direction==1 from TCAG to 0123, ==0 from 0123 to TCGA.
*/
int il, status=0;
char *p, *pch=((seqtype==0||seqtype==1)?BASEs:(seqtype==2?AAs:BINs));
if (direction)
for (il=0,p=z; il<ls; il++,p++)
{ if ((*p=(char)CodeChara(*p, seqtype))==(char)(-1)) status=-1; }
else
for (il=0,p=z; il<ls; il++,p++) *p=pch[*p];
return (status);
}
int f_mono_di (FILE *fout, char *z, int ls, int iring,
double fb1[], double fb2[], double CondP[])
{
/* get mono- di- nucleitide frequencies.
*/
int i,j, il;
char *s;
double t1, t2;
t1 = 1./(double) ls;
t2=1./(double) (ls-1+iring);
for (i=0; i<4; fb1[i++]=0.0) for (j=0; j<4; fb2[i*4+j++]=0.0) ;
for (il=0, s=z; il<ls-1; il++, s++) {
fb1[*s-1] += t1;
fb2[(*s-1)* 4 + *(s+1)-1 ] += t2;
}
fb1[*s-1] += t1;
if (iring) fb2[(*s-1)*4 + z[0]-1] += t2;
for (i=0; i<4; i++) for (j=0; j<4; j++) CondP[i*4+j] = fb2[i*4+j]/fb1[i];
fprintf(fout, "\nmono-\n") ;
FOR (i,4) fprintf(fout, "%12.4f", fb1[i]) ;
fprintf(fout, "\n\ndi- & conditional P\n") ;
FOR (i,4) {
FOR (j,4) fprintf(fout, "%9.4f%7.4f ", fb2[i*4+j], CondP[i*4+j]) ;
FPN(fout);
}
FPN(fout);
return (0);
}
int PickExtreme (FILE *fout, char *z, int ls, int iring, int lfrag, int *ffrag)
{
/* picking up (lfrag)-tuples with extreme frequencies.
*/
char *pz=z;
int i, j, isf, n=(1<<2*lfrag), lvirt=ls-(lfrag-1)*(1-iring);
double fb1[4], fb2[4*4], p_2[4*4];
double prob1, prob2, ne1, ne2, u1, u2, ualpha=2.0;
int ib[10];
f_mono_di(fout, z, ls, iring, fb1, fb2, p_2 );
if (iring) {
error2("change PickExtreme()");
FOR (i, lfrag-1) z[ls+i]=z[i]; /* dangerous */
z[ls+i]=(char) 0;
}
printf ("\ncounting %d tuple frequencies", lfrag);
FOR (i, n) ffrag[i]=0;
for (i=0; i<lvirt; i++, pz++) {
for (j=0,isf=0; j<lfrag; j++) isf=isf*4+(int)pz[j]-1;
ffrag[isf] ++;
}
/* analyze */
for (i=0; i<n; i++) {
for (j=0,isf=i; j<lfrag; ib[lfrag-1-j]=isf%4,isf=isf/4,j++) ;
for (j=0,prob1=1.0; j<lfrag; prob1 *= fb1[ ib[j++] ] ) ;
for (j=0,prob2=fb1[ib[0]]; j<lfrag-1; j++)
prob2 *= p_2[ib[j]*4+ib[j+1]];
ne1 = (double) lvirt * prob1;
ne2 = (double) lvirt * prob2;
if (ne1<=0.0) ne1=0.5;
if (ne2<=0.0) ne2=0.5;
u1=((double) ffrag[i]-ne1) / sqrt (ne1);
u2=((double) ffrag[i]-ne2) / sqrt (ne2);
if ( fabs(u1)>ualpha /* && fabs(u2)>ualpha */ ) {
fprintf (fout,"\n");
FOR (j, lfrag) fprintf (fout,"%1c", BASEs[ib[j]]);
fprintf (fout,"%6d %8.1f%7.2f %8.1f%7.2f ",ffrag[i],ne1,u1,ne2,u2);
if (u1<-ualpha && u2<-ualpha) fprintf (fout, " %c", '-');
else if (u1>ualpha && u2>ualpha) fprintf (fout, " %c", '+');
else if (u1*u2<0 && fabs(u1) > ualpha && fabs(u2) > ualpha)
fprintf (fout, " %c", '?');
else
fprintf (fout, " %c", ' ');
}
}
return (0);
}
int zztox ( int n31, int l, char *z1, char *z2, double *x )
{
/* x[n31][4][4] */
double t = 1./(double) (l / n31);
int i, ib[2];
int il;
zero (x, n31*16);
for (i=0; i<n31; i++) {
for (il=0; il<l; il += n31) {
ib[0] = z1[il+i] - 1;
ib[1] = z2[il+i] - 1;
x [ i*16+ib[0]*4+ib[1] ] += t;
}
/*
fprintf (f1, "\nThe difference matrix X %6d\tin %6d\n", i+1,n31);
for (j=0; j<4; j++) {
for (k=0; k<4; k++) fprintf(f1, "%10.2f", x[i][j][k]);
fputc ('\n', f1);
}
*/
}
return (0);
}
int testXMat (double x[])
{
/* test whether X matrix is acceptable (0) or not (-1) */
int it=0, i,j;
double t;
for (i=0,t=0; i<4; i++) FOR (j,4) {
if (x[i*4+j]<0 || x[i*4+j]>1) it=-1;
t += x[i*4+j];
}
if (fabs(t-1) > 1e-4) it =-1;
return(it);
}
int difcodonNG (char codon1[], char codon2[], double *SynSite,double *AsynSite,
double *SynDif, double *AsynDif, int transfed, int icode)
{
/* # of synonymous and non-synonymous sites and differences.
Nei, M. and T. Gojobori (1986)
returns the number of differences between two codons.
The two codons (codon1 & codon2) do not contain ambiguity characters.
dmark[i] (=0,1,2) is the i_th different codon position, with i=0,1,ndiff
step[j] (=0,1,2) is the codon position to be changed at step j (j=0,1,ndiff)
b[i][j] (=0,1,2,3) is the nucleotide at position j (0,1,2) in codon i (0,1)
I made some arbitrary decisions when the two codons have ambiguity characters
20 September 2002.
*/
int i,j,k, i1,i2, iy[2], iaa[2],ic[2];
int ndiff,npath,nstop,sdpath,ndpath,dmark[3],step[3],b[2][3],bt1[3],bt2[3];
int by[3] = {16, 4, 1};
char str[4]="";
for (i=0,*SynSite=0,nstop=0; i<2; i++) {
for (j=0,iy[0]=iy[1]=0; j<3; j++) {
if (transfed) b[i][j]=(i?codon1[j]:codon2[j]);
else b[i][j]=(int)CodeChara((char)(i?codon1[j]:codon2[j]),0);
iy[i]+=by[j]*b[i][j];
if(b[i][j]<0||b[i][j]>3) {
if(noisy>=9)
printf("\nwarning ambiguity in difcodonNG: %s %s", codon1,codon2);
*SynSite = 0.5; *AsynSite = 2.5;
*SynDif = (codon1[2]!=codon2[2])/2;
*AsynDif = *SynDif + (codon1[0]!=codon2[0])+(codon1[1]!=codon2[1]);
return((int)(*SynDif + *AsynDif));
}
}
iaa[i]=GeneticCode[icode][iy[i]];
if(iaa[i]==-1) printf("\nNG86: stop codon %s.\n",getcodon(str,iy[i]));
for(j=0; j<3; j++)
FOR (k,4) {
if (k==b[i][j]) continue;
i1=GeneticCode[icode][iy[i] + (k-b[i][j])*by[j]];
if (i1==-1) nstop++;
else if (i1==iaa[i]) (*SynSite)++;
}
}
*SynSite*=3/18.; /* 2 codons, 2*9 possibilities. */
*AsynSite=3*(1-nstop/18.) - *SynSite;
#if 0 /* MEGA 1.1 error */
*AsynSite=3 - *SynSite;
#endif
ndiff=0; *SynDif=*AsynDif=0;
FOR (k,3) dmark[k]=-1;
FOR (k,3) if (b[0][k]-b[1][k]) dmark[ndiff++]=k;
if (ndiff==0) return(0);
npath=1; nstop=0; if(ndiff>1) npath=(ndiff==2)?2:6 ;
if (ndiff==1) {
if (iaa[0]==iaa[1]) (*SynDif)++;
else (*AsynDif)++;
}
else { /* ndiff=2 or 3 */
FOR (k, npath) {
FOR (i1,3) step[i1]=-1;
if (ndiff==2)
{ step[0]=dmark[k]; step[1]=dmark[1-k]; }
else {
step[0]=k/2; step[1]=k%2;
if (step[0]<=step[1]) step[1]++;
step[2]=3-step[0]-step[1];
}
FOR (i1,3) bt1[i1]=bt2[i1]=b[0][i1];
sdpath=ndpath=0; /* mutations for each path */
FOR (i1, ndiff) { /* mutation steps for each path */
bt2[step[i1]] = b[1][step[i1]];
for (i2=0,ic[0]=ic[1]=0; i2<3; i2++) {
ic[0]+=bt1[i2]*by[i2];
ic[1]+=bt2[i2]*by[i2];
}
FOR (i2,2) iaa[i2]=GeneticCode[icode][ic[i2]];
if (iaa[1]==-1) { nstop++; sdpath=ndpath=0; break; }
if (iaa[0]==iaa[1]) sdpath++;
else ndpath++;
FOR (i2,3) bt1[i2]=bt2[i2];
}
*SynDif+=(double)sdpath; *AsynDif+=(double)ndpath;
}
}
if (npath==nstop) {
puts ("NG86: All paths are through stop codons..");
if (ndiff==2) { *SynDif=0; *AsynDif=2; }
else { *SynDif=1; *AsynDif=2; }
}
else {
*SynDif/=(double)(npath-nstop); *AsynDif/=(double)(npath-nstop);
}
return (ndiff);
}
int testTransP (double P[], int n)
{
int i,j, status=0;
double sum, small=1e-7;
for (i=0; i<n; i++) {
for (j=0,sum=0; j<n; sum+=P[i*n+j++])
if (P[i*n+j]<-small) status=-1;
if (fabs(sum-1)>small && status==0) {
printf ("\nrow sum (#%2d) = 1 = %10.6f", i+1, sum);
status=-1;
}
}
return (status);
}
int PMatUVRoot (double P[], double t, int n, double U[], double V[],
double Root[])
{
/* P(t) = U * exp{Root*t} * V
*/
int i,j,k;
double expt, uexpt, *pP;
NPMatUVRoot++;
if (t<-0.1) printf ("\nt = %.5f in PMatUVRoot", t);
if (t<1e-100) { identity (P, n); return(0); }
for (k=0,zero(P,n*n); k<n; k++)
for (i=0,pP=P,expt=exp(t*Root[k]); i<n; i++)
for (j=0,uexpt=U[i*n+k]*expt; j<n; j++)
*pP++ += uexpt*V[k*n+j];
for(i=0;i<n*n;i++) if(P[i]<0) P[i]=0;
#if (DEBUG>=5)
if (testTransP(P,n)) {
/* matout(F0,P,n,n); */
printf("\nP(%.6f) err in PMatUVRoot.\n", t);
exit(-1);
}
#endif
return (0);
}
void pijJC69 (double pij[2], double t)
{
if (t<-0.01) printf ("\nt = %.5f in pijJC69", t);
if (t<1e-100)
{ pij[0]=1; pij[1]=0; }
else
{ pij[0] = (1.+3*exp(-4*t/3.))/4; pij[1] = (1-pij[0])/3; }
}
int PMatK80 (double P[], double t, double kappa)
{
/* PMat for JC69 and K80
*/
int i,j;
double e1, e2;
if (t<-0.1) printf ("\nt = %.5f in PMatK80", t);
if (t<1e-100) { identity (P, 4); return(0); }
e1=exp(-4*t/(kappa+2));
if (fabs(kappa-1)<1e-5) {
FOR (i,4) FOR (j,4)
if (i==j) P[i*4+j]=.25*(1+3*e1);
else P[i*4+j]=.25*(1-e1);
}
else {
e2=exp(-2*t*(kappa+1)/(kappa+2));
FOR (i,4) P[i*4+i]=.25*(1+e1+2*e2);
P[0*4+1]=P[1*4+0]=P[2*4+3]=P[3*4+2]=.25*(1+e1-2*e2);
P[0*4+2]=P[0*4+3]=P[2*4+0]=P[3*4+0]=
P[1*4+2]=P[1*4+3]=P[2*4+1]=P[3*4+1]=.25*(1-e1);
}
return (0);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -