📄 tools.c
字号:
int PMatT92 (double P[], double t, double kappa, double pGC)
{
/* PMat for Tamura'92
t is branch lnegth, number of changes per site.
*/
double e1, e2;
t/=(pGC*(1-pGC)*kappa + .5);
if (t<-0.1) printf ("\nt = %.5f in PMatT92", t);
if (t<1e-100) { identity (P, 4); return(0); }
e1=exp(-t); e2=exp(-(kappa+1)*t/2);
P[0*4+0]=P[2*4+2] = (1-pGC)/2*(1+e1)+pGC*e2;
P[1*4+1]=P[3*4+3] = pGC/2*(1+e1)+(1-pGC)*e2;
P[1*4+0]=P[3*4+2] = (1-pGC)/2*(1+e1)-(1-pGC)*e2;
P[0*4+1]=P[2*4+3] = pGC/2*(1+e1)-pGC*e2;
P[0*4+2]=P[2*4+0]=P[3*4+0]=P[1*4+2] = (1-pGC)/2*(1-e1);
P[1*4+3]=P[3*4+1]=P[0*4+3]=P[2*4+1] = pGC/2*(1-e1);
return (0);
}
int PMatTN93 (double P[], double a1t, double a2t, double bt, double pi[])
{
double T=pi[0],C=pi[1],A=pi[2],G=pi[3], Y=T+C, R=A+G;
double e1, e2, e3, small=-1e-4;
if (a1t<small||a2t<small||bt<small)
printf ("\nat=%12.6f %12.6f bt=%12.6f", a1t,a2t,bt);
if (a1t+a2t+bt<1e-200) { identity(P,4); return(0); }
e1=exp(-bt); e2=e3=exp(-(R*a2t+Y*bt));
if (fabs(R*a2t+Y*bt -Y*a1t-R*bt)>1e-5) e3=exp(-(Y*a1t+R*bt));
P[0*4+0] = T+R*T/Y*e1+C/Y*e3;
P[0*4+1] = C+R*C/Y*e1-C/Y*e3;
P[1*4+0] = T+R*T/Y*e1-T/Y*e3;
P[1*4+1] = C+R*C/Y*e1+T/Y*e3;
P[0*4+2] = P[1*4+2] = A*(1-e1);
P[0*4+3] = P[1*4+3] = G*(1-e1);
P[2*4+0] = P[3*4+0] = T*(1-e1);
P[2*4+1] = P[3*4+1] = C*(1-e1);
P[2*4+2] = A+Y*A/R*e1+G/R*e2;
P[2*4+3] = G+Y*G/R*e1-G/R*e2;
P[3*4+2] = A+Y*A/R*e1-A/R*e2;
P[3*4+3] = G+Y*G/R*e1+A/R*e2;
return(0);
}
int EvolveHKY85 (char source[], char target[], int ls, double t,
double rates[], double pi[4], double kappa, int isHKY85)
{
/* isHKY85=1 if HKY85, =0 if F84
Use NULL for rates if rates are identical among sites.
*/
int i,j,h,n=4;
double TransP[16],a1t,a2t,bt,r, Y=pi[0]+pi[1],R=pi[2]+pi[3];
if (isHKY85) a1t=a2t=kappa;
else { a1t=1+kappa/Y; a2t=1+kappa/R; }
bt=t/(2*(pi[0]*pi[1]*a1t+pi[2]*pi[3]*a2t)+2*Y*R);
a1t*=bt; a2t*=bt;
FOR (h, ls) {
if (h==0 || (rates && rates[h]!=rates[h-1])) {
r=(rates?rates[h]:1);
PMatTN93 (TransP, a1t*r, a2t*r, bt*r, pi);
for (i=0;i<n;i++) {
for (j=1;j<n;j++) TransP[i*n+j]+=TransP[i*n+j-1];
if (fabs(TransP[i*n+n-1]-1)>1e-5) error2("TransP err");
}
}
for (j=0,i=source[h],r=rndu();j<n-1;j++) if (r<TransP[i*n+j]) break;
target[h] = (char)j;
}
return (0);
}
int Rates4Sites (double rates[],double alpha,int ncatG,int ls, int cdf,
double space[])
{
/* Rates for sites from the gamma (ncatG=0) or discrete-gamma (ncatG>1).
Rates are converted into the c.d.f. if cdf=1, which is useful for
simulation under JC69-like models.
space[ncatG*2]
*/
int h, ir,j, *counts=(int*)(space+2*ncatG);
double *rK=space, *freqK=space+ncatG;
if (alpha==0)
{ if(rates) FOR(h,ls) rates[h]=1; }
else {
if (ncatG>1) {
DiscreteGamma (freqK, rK, alpha, alpha, ncatG, 0);
MultiNomial (ls, ncatG, freqK, counts, space+3*ncatG);
for (ir=0,h=0; ir<ncatG; ir++)
for (j=0; j<counts[ir]; j++) rates[h++]=rK[ir];
}
else
for (h=0; h<ls; h++) rates[h]=rndgamma(alpha)/alpha;
if (cdf) {
for (h=1; h<ls; h++) rates[h]+=rates[h-1];
abyx (1/rates[ls-1], rates, ls);
}
}
return (0);
}
char *getcodon(char codon[], int icodon)
{
/* id : (0,63) */
if (icodon<0||icodon>63) {
printf("\ncodon %d\n", icodon);
error2("getcodon.");
}
codon[0]=BASEs[icodon/16];
codon[1]=BASEs[(icodon%16)/4];
codon[2]=BASEs[icodon%4];
codon[3]=0;
return (codon);
}
char *getAAstr(char *AAstr, int iaa)
{
/* iaa (0,20) with 20 meaning termination */
if (iaa<0 || iaa>20) error2("getAAstr: iaa err. \n");
strncpy (AAstr, AA3Str+iaa*3, 3);
return (AAstr);
}
int NucListall(char b, int *nb, int ib[4])
{
/* Resolve an ambiguity nucleotide b into all possibilities.
nb is number of bases and ib (0,1,2,3) list all of them.
Data are complete if (nb==1).
*/
int j,k;
k=strchr(BASEs,b)-BASEs;
if(k<0)
{ printf("NucListall: strange character %c\n",b); return(-1);}
if(k<4) { *nb=1; ib[0]=k; }
else {
*nb=nBASEs[k];
FOR(j,*nb) ib[j]=strchr(BASEs,EquateNUC[k][j])-BASEs;
}
return(0);
}
int Codon2AA(char codon[3], char aa[3], int icode, int *iaa)
{
/* translate a triplet codon[] into amino acid (aa[] and iaa), using
genetic code icode. This deals with ambiguity nucleotides.
*iaa=(0,...,19), 20 for stop or missing data.
Distinquish between stop codon and missing data?
naa=0: only stop codons; 1: one AA; 2: more than 1 AA.
Returns 0: if one amino acid
1: if multiple amino acids (ambiguity data)
-1: if stop codon
*/
int nb[3],ib[3][4], ic, i, i0,i1,i2, iaa0=-1,naa=0;
for(i=0;i<3;i++) NucListall(codon[i], &nb[i], ib[i]);
FOR(i0,nb[0]) FOR(i1,nb[1]) FOR(i2,nb[2]) {
ic=ib[0][i0]*16+ib[1][i1]*4+ib[2][i2];
*iaa=GeneticCode[icode][ic];
if(*iaa==-1) continue;
if(naa==0) { iaa0=*iaa; naa++; }
else if (*iaa!=iaa0) naa=2;
}
if(naa==0)
{ printf("stop codon %c%c%c\n",codon[0],codon[1],codon[2]); *iaa=20; }
else if(naa==2) *iaa=20;
else *iaa=iaa0;
strncpy(aa, AA3Str+*iaa*3, 3);
return(naa==1?0: (naa==0?-1:1));
}
int DNA2protein(char dna[], char protein[], int lc, int icode)
{
/* translate a DNA into a protein, using genetic code icode, with lc codons.
dna[] and protein[] can be the same string.
*/
int h, iaa, k;
char aa3[4];
for(h=0; h<lc; h++) {
k=Codon2AA(dna+h*3,aa3,icode,&iaa);
if(k==-1) printf(" stop codon at %d out of %d\n",h+1,lc);
protein[h]=AAs[iaa];
}
return(0);
}
int printcu (FILE *fout, double fcodon[], int icode)
{
/* output codon usage table and other related statistics
space[20+1+3*5]
Outputs the genetic code table if fcodon==NULL
*/
int wc=8, wd=0; /* wc: for codon, wd: decimal */
int it, i,j,k, iaa;
double faa[21], fb3x4[3*5]; /* chi34, Ic, lc, */
char *word="|-", aa3[4]=" ",codon[4]=" ", ss3[4][4], *noodle;
static double aawt[]={89.1, 174.2, 132.1, 133.1, 121.2, 146.2,
147.1, 75.1, 155.2, 131.2, 131.2, 146.2, 149.2, 165.2, 115.1,
105.1, 119.1, 204.2, 181.2, 117.1};
if (fcodon) { zero(faa,21); zero(fb3x4,12); }
else wc=0;
FOR (i,4) strcpy(ss3[i],"\0\0\0");
noodle = strc(4*(10+2+wc)-2,word[1]);
fprintf(fout, "\n%s\n", noodle);
for(i=0; i<4; i++,FPN(fout)) {
FOR (j,4) {
FOR (k,4) {
it=i*16+k*4+j;
iaa=GeneticCode[icode][it];
if(iaa==-1) iaa=20;
getcodon(codon, it); getAAstr(aa3,iaa);
if (!strcmp(ss3[k],aa3) && j>0) fprintf(fout, " ");
else {
fprintf(fout, "%s %c", aa3,(iaa<20?AAs[iaa]:'*'));
strcpy(ss3[k], aa3);
}
fprintf(fout, " %s", codon);
if (fcodon) fprintf(fout, "%*.*f", wc,wd, fcodon[it] );
if (k<3) fprintf(fout, " %c ", word[0]);
}
FPN (fout);
}
fputs (noodle, fout);
}
return(0);
}
int printcums (FILE *fout, int ns, double fcodons[], int icode)
{
int neach0=6, neach=neach0, wc=3,wd=0; /* wc: for codon, wd: decimal */
int iaa,it, i,j,k, i1, ngroup, igroup;
char *word="|-", aa3[4]=" ",codon[4]=" ", ss3[4][4], *noodle;
ngroup=(ns-1)/neach+1;
for(igroup=0; igroup<ngroup; igroup++,FPN(fout)) {
if (igroup==ngroup-1) neach=ns-neach0*igroup;
noodle = strc(4*(10+wc*neach)-2, word[1]);
strcat (noodle, "\n");
fputs(noodle, fout);
FOR (i,4) strcpy (ss3[i]," ");
FOR (i,4) {
FOR (j,4) {
FOR (k,4) {
it = i*16+k*4+j;
iaa=GeneticCode[icode][it];
if(iaa==-1) iaa=20;
getcodon(codon, it); getAAstr(aa3,iaa);
if ( ! strcmp(ss3[k], aa3) && j>0) fprintf(fout, " ");
else { fprintf(fout, "%s", aa3); strcpy(ss3[k], aa3); }
fprintf(fout, " %s", codon);
FOR (i1,neach) fprintf(fout, " %*.*f",
wc-1, wd, fcodons[(igroup*neach0+i1)*64+it] );
if (k<3) fprintf(fout, " %c ", word[0]);
}
FPN (fout);
}
fputs (noodle, fout);
}
}
return(0);
}
int PtoPi (double P[], double pi[], int n, double *space)
{
/* from transition probability P[ij] to pi, the stationary frequencies
(I-P)' * pi = 0 pi * 1 = 1
space[] is of size n*(n+1).
*/
int i,j;
double *T = space; /* T[n*(n+1)] */
FOR (i,n) {
FOR (j,n)
T[i*(n+1)+j] = (double)(i==j) - P[j*n+i]; /* transpose */
T[i*(n+1)+n] = 0.;
}
fillxc (T, 1., n+1);
matinv( T, n, n+1, pi);
FOR (i,n) pi[i] = T[i*(n+1)+n];
return (0);
}
int PtoX (double P1[], double P2[], double pi[], double X[])
{
/* from P1 & P2 to X. X = P1' diag{pi} P2
*/
int i, j, k;
FOR (i,4)
FOR (j,4)
for (k=0,X[i*4+j]=0.0; k<4; k++) {
X[i*4+j] += pi[k] * P1[k*4+i] * P2[k*4+j];
}
return (0);
}
int printaSeq (FILE *fout, char z[], int ls, int lline, int gap)
{
int i;
FOR (i, ls) {
fprintf (fout, "%c", z[i]);
if (gap && (i+1)%gap==0) fprintf (fout, " ");
if ((i+1)%lline==0) { fprintf (fout, "%7d", i+1); FPN (fout); }
}
i=ls%lline;
if (i) fprintf (fout, "%*d\n", 7+lline+lline/gap-i-i/gap, ls);
FPN (fout);
return (0);
}
int printsma(FILE*fout, char*spname[], char*z[],
int ns, int l, int lline, int gap, int seqtype,
int transformed, int simple, int pose[])
{
/* print multiple aligned sequences.
use spname==NULL if no seq names available.
pose[h] marks the position of the h_th site in z[], useful for
printing out the original sequences after site patterns are collapsed.
Sequences z[] are coded if(transformed) and not if otherwise.
*/
int igroup, ngroup, lt, h,hp, i, b,b0=-1,igap, lspname=20, lseqlen=7;
char indel='-', ambi='?', equal='.';
char *pch=(seqtype<=1 ? BASEs : (seqtype==2?AAs:BINs));
char codon[4]=" ";
codon[0]=-1; /* to avoid warning */
if (gap==0) gap=lline+1;
ngroup=(l-1)/lline+1;
for (igroup=0,FPN(fout); igroup<ngroup; igroup++,FPN(fout)) {
lt=min2(l,(igroup+1)*lline); /* seqlen mark at the end of block */
igap=lline+(lline/gap)+lspname+1-lseqlen-1; /* spaces */
if(igroup+1==ngroup)
igap=(l-igroup*lline)+(l-igroup*lline)/gap+lspname+1-lseqlen-1;
/* fprintf (fout,"%*s[%*d]\n", igap, "", lseqlen,lt); */
FOR(i,ns) {
if(spname) fprintf(fout,"%-*s ", lspname,spname[i]);
for (h=igroup*lline,lt=0,igap=0; lt<lline && h<l; h++,lt++) {
hp=(pose?pose[h]:h);
#ifdef CODEML
if(com.seqtype==1 && com.cleandata && transformed) {
ic=FROM61[com.z[i][hp]];
codon[0]=BASEs[is/16];
codon[1]=BASEs[(ic%16)/4];
codon[2]=BASEs[ic%4];
fprintf(fout," %s", codon);
continue;
}
#endif
b0=(int)z[0][hp]; b=(int)z[i][hp];
if(transformed) {
b0=pch[b0]; b=pch[b];
}
if(i&&simple && b==b0 && b!=indel && b!=ambi) b=equal;
fputc(b, fout);
if (++igap==gap) { fputc(' ', fout); igap=0; }
}
FPN (fout);
}
}
FPN(fout);
return(0);
}
/* ***************************
Simple tools
******************************/
static time_t time_start;
void starttime (void)
{
time_start=time(NULL);
}
char* printtime (char timestr[])
{
/* print time elapsed since last call to starttime()
*/
time_t t;
t=time(NULL)-time_start;
sprintf(timestr,"%02ld:%02ld:%02ld", t/3600,(t%3600)/60, t-(t/60)*60);
return(timestr);
}
void sleep2(int wait)
{
/* Pauses for a specified number of seconds. */
time_t t_cur=time(NULL);
while(time(NULL)<t_cur+wait) ;
}
char *strc (int n, char c)
{
static char s[256];
int i;
if (n>255) error2("line >255 in strc");
FOR (i,n) s[i]=c; s[n]=0;
return (s);
}
int putdouble(FILE*fout, double a)
{
double aa=fabs(a);
return fprintf(fout, (aa<1e-5||aa>1e6 ? " %11.4e" : " %11.6f"), a);
}
void strcase (char *str, int direction)
{
/* direction = 0: to lower; 1: to upper */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -