📄 pamp.c
字号:
/* PAMP.c, Copyright, Ziheng Yang, April 1995.
Specify the sequence type in the file pamp.ctl. Results go into mp.
gcc -o pamp pamp.c tools.o eigen.o
pamp <SequenceFileName>
*/
#include "paml.h"
#ifdef macintosh
/* Added by Andrew Rambaut to accommodate Macs -
(1) Brings up dialog box to allow command line parameters.
(2) Reduces the size of statically allocated data. */
#include <console.h>
#endif
#define NS 2000
#define NBRANCH (NS*2-2)
#define NNODE (NS*2-1)
#define NGENE 2
#define LSPNAME 30
#define NCODE 20
#define NCATG 16
double DistanceREV (double Ft[], int n, double alpha, double Root[], double U[],
double V[], double pi[], double space[], int *cond);
int PMatBranch (double Ptb[], int n, double branch[],
double Root[], double U[], double V[], double space[]);
int PatternLS (FILE *fout, double Ft[],double alpha, double space[], int *cond);
int testx (double x[], int np);
int GetOptions (char *ctlf);
int AlphaMP (FILE* fout);
int PatternMP (FILE *fout, double Ft[]);
int PathwayMP1 (FILE *fout, int *maxchange, int NSiteChange[],
double Ft[], double space[], int job);
double lfunAlpha_Sullivan (double x);
double lfunAlpha_YK96 (double x);
struct CommonInfo {
char *z[NS], *spname[NS], seqf[32],outf[32],treef[32];
int seqtype, ns, ls, ngene, posG[NGENE+1],lgene[NGENE],*pose,npatt,nhomo;
int np, ntime, ncode,fix_kappa,fix_rgene,fix_alpha, clock, model, ncatG, cleandata;
int print;
double *fpatt, *lkl;
double lmax,pi[NCODE], kappa,alpha,rou, rgene[NGENE],piG[NGENE][NCODE];
int npi0;
double pi_sqrt[NCODE];
} com;
struct TREEB {
int nbranch, nnode, root, branches[NBRANCH][2];
double lnL;
} tree;
struct TREEN {
int father, nson, sons[NS], ibranch;
double branch, divtime, label, *lkl;
} *nodes;
#define NCATCHANGE 100
extern int noisy, *ancestor;
extern double *SeqDistance;
int maxchange, NSiteChange[NCATCHANGE];
double MuChange;
int LASTROUND=0; /* no use for this */
#define EIGEN
#define LSDISTANCE
#define REALSEQUENCE
#define NODESTRUCTURE
#define RECONSTRUCTION
#include "treesub.c"
int main (int argc, char *argv[])
{
FILE *ftree, *fout, *fseq;
char ctlf[32]="pamp.ctl";
char *Seqstr[]={"nucleotide", "", "amino-acid", "Binary"};
int itree, ntree, i, j, s3;
double *space, *Ft;
#ifdef __MWERKS__
/* Added by Andrew Rambaut to accommodate Macs -
Brings up dialog box to allow command line parameters.
*/
argc=ccommand(&argv);
#endif
com.nhomo=1; com.print=1;
noisy=2; com.ncatG=8; com.clock=0; com.cleandata=1;
GetOptions (ctlf);
if(argc>1) { strcpy(ctlf, argv[1]); printf("\nctlfile set to %s.\n",ctlf);}
if ((fseq=fopen(com.seqf, "r"))==NULL) error2 ("seqfile err.");
if ((fout=fopen (com.outf, "w"))==NULL) error2("outfile creation err.");
if((fseq=fopen (com.seqf,"r"))==NULL) error2("No sequence file!");
ReadSeq (NULL, fseq);
i=(com.ns*2-1)*sizeof(struct TREEN);
if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
fprintf (fout,"PAMP %15s, %s sequences\n", com.seqf, Seqstr[com.seqtype]);
if (com.nhomo) fprintf (fout, "nonhomogeneous model\n");
space = (double*)malloc(50000*sizeof(double)); /* *** */
SeqDistance=(double*)malloc(com.ns*(com.ns-1)/2*sizeof(double));
ancestor=(int*)malloc(com.ns*(com.ns-1)/2*sizeof(int));
if (SeqDistance==NULL||ancestor==NULL) error2("oom");
i = com.ns*(com.ns-1)/2;
s3 = sizeof(double)*((com.ns*2-2)*(com.ns*2-2 + 4 + i) + i);
s3 = max2(s3, com.ncode*com.ncode*(2*com.ns-2+1)*(int)sizeof(double));
Ft = (double*) malloc(s3);
if (space==NULL || Ft==NULL) error2 ("oom space");
Initialize (fout);
if (com.ngene>1) error2 ("option G not allowed yet");
/*
PatternLS (fout, Ft, 0., space, &i);
printf ("\nPairwise estimation of rate matrix done..\n");
fflush(fout);
*/
ftree=gfopen (com.treef,"r");
fscanf (ftree, "%d%d", &i, &ntree);
if (i!=com.ns) error2 ("ns in the tree file");
FOR (itree, ntree) {
printf ("\nTREE # %2d\n", itree+1);
fprintf (fout,"\nTREE # %2d\n", itree+1);
if (ReadaTreeN (ftree, &i,&j, 1)) error2 ("err tree..");
OutaTreeN (F0, 0, 0); FPN (F0);
OutaTreeN (fout, 0, 0); FPN (fout);
for (i=0,maxchange=0; i<NCATCHANGE; i++) NSiteChange[i]=0;
PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 0);
printf ("\nHartigan reconstruction done..\n");
fprintf (fout, "\n\n(1) Branch lengths and substitution pattern\n");
PatternMP (fout, Ft);
printf ("pattern done..\n"); fflush(fout);
fprintf (fout, "\n\n(2) Gamma parameter\n");
AlphaMP (fout);
printf ("gamma done..\n"); fflush(fout);
fprintf (fout, "\n\n(3) Parsimony reconstructions\n");
PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 1);
printf ("Yang reconstruction done..\n"); fflush(fout);
}
free(nodes);
return (0);
}
int GetOptions (char *ctlf)
{
int i, nopt=6, lline=255, t;
char line[255], *pline, opt[20], comment='*';
char *optstr[] = {"seqfile","outfile","treefile", "seqtype", "ncatG", "nhomo"};
FILE *fctl=fopen (ctlf, "r");
if (fctl) {
for (;;) {
if (fgets (line, lline, fctl) == NULL) break;
for (i=0,t=0; i<lline&&line[i]; i++)
if (isalnum(line[i])) { t=1; break; }
else if (line[i]==comment) break;
if (t==0) continue;
sscanf (line, "%s%*s%d", opt, &t);
if ((pline=strstr(line, "="))==NULL) error2 ("option file.");
for (i=0; i<nopt; i++) {
if (strncmp(opt, optstr[i], 8)==0) {
if (noisy>2)
printf ("\n%3d %15s | %-20s %6d", i+1,optstr[i],opt,t);
switch (i) {
case ( 0): sscanf(pline+2, "%s", com.seqf); break;
case ( 1): sscanf(pline+2, "%s", com.outf); break;
case ( 2): sscanf(pline+2, "%s", com.treef); break;
case (3): com.seqtype=t; break;
case (4): com.ncatG=t; break;
case (5): com.nhomo=t; break;
}
break;
}
}
if (i==nopt)
{ printf ("\nopt %s in %s\n", opt, ctlf); exit (-1); }
}
fclose (fctl);
}
else
if (noisy) printf ("\nno ctl file..");
if (com.seqtype==0) com.ncode=4;
else if (com.seqtype==2) com.ncode=20;
else if (com.seqtype==3) com.ncode=2;
else error2 ("seqtype");
if (com.ncatG>NCATG) error2 ("raise NCATG?");
return (0);
}
int AlphaMP (FILE* fout)
{
int k, ntotal;
double x, xb[2], lnL, var;
xb[0]=1e-3; xb[1]=99; /* alpha */
fprintf (fout, "\n# changes .. # sites");
for (k=0,ntotal=0,MuChange=var=0; k<maxchange+1; k++) {
fprintf (fout, "\n%6d%10d", k, NSiteChange[k]);
ntotal+=NSiteChange[k]; MuChange+=k*NSiteChange[k];
var+=k*k*NSiteChange[k];
}
MuChange/=ntotal;
var=(var-MuChange*MuChange*ntotal)/(ntotal-1.);
x=MuChange*MuChange/(var-MuChange);
fprintf (fout, "\n\n# sites%6d, total changes%6d\nmean-var%9.4f%9.4f",
ntotal, (int)(ntotal*MuChange+.5), MuChange, var);
fprintf (fout, "\nalpha (method of moments)%9.4f", x);
if (x<=0) x=9;
LineSearch(lfunAlpha_Sullivan, &lnL, &x, xb, 0.02);
fprintf (fout, "\nalpha (Sullivan et al. 1995)%9.4f\n", x);
MuChange/=tree.nbranch;
LineSearch(lfunAlpha_YK96, &lnL, &x, xb, 0.02);
fprintf (fout, "alpha (Yang & Kumar 1995, ncatG= %d)%9.4f\n", com.ncatG,x);
return (0);
}
double lfunAlpha_Sullivan (double x)
{
int k;
double lnL=0, a=x, t;
FOR (k, maxchange+1) {
if (NSiteChange[k]==0) continue;
t=-a*log(1+MuChange/a);
if (k)
t+=LnGamma(k+a)-LnGamma(k+1.) - LnGamma(a)
+ k*log(MuChange/a/(1+MuChange/a));
lnL += NSiteChange[k]*t;
}
return(-lnL);
}
double lfunAlpha_YK96 (double x)
{
int k, ir, b=tree.nbranch, n=com.ncode;
double lnL=0, prob, a=x, t=MuChange, p;
double freqK[NCATG], rK[NCATG];
DiscreteGamma (freqK, rK, a, a, com.ncatG, 0);
FOR (k, maxchange+1) {
if (NSiteChange[k]==0) continue;
for (ir=0,prob=0; ir<com.ncatG; ir++) {
p=1./n+(n-1.)/n*exp(-n/(n-1.)*rK[ir]*t);
prob+=freqK[ir]*pow(p,(double)(b-k))*pow((1-p)/(n-1.),(double)k);
}
lnL += NSiteChange[k]*log(prob);
}
return (-lnL);
}
int OutQ (FILE *fout, int n, double Q[], double pi[], double Root[],
double U[], double V[], double space[])
{
char aa3[4]="";
int i,j;
double *T1=space, t;
fprintf(fout,"\nrate matrix Q: Qij*dt = prob(i->j; dt)\n");
if (n<=4) {
/* matout (fout, pi, 1, n); */
matout (fout, Q, n, n);
if (n==4) {
fprintf (fout, "Order: T, C, A, G");
t=pi[0]*Q[0*4+1]+pi[1]*Q[1*4+0]+pi[2]*Q[2*4+3]+pi[3]*Q[3*4+2];
fprintf (fout, "\nAverage Ts/Tv =%9.4f\n", t/(1-t));
}
}
else if (n==20) {
for (i=0; i<n; i++,FPN(fout))
FOR (j,n) fprintf (fout, "%6.0f", Q[i*n+j]*100);
/*
FOR (i,n) {
fprintf (fout,"\n%-4s", getAAstr(aa3,i));
FOR (j,i) fprintf (fout, "%4.0f", Q[i*n+j]/pi[j]*100);
fprintf (fout, "%4.0f", -Q[i*n+i]*100);
}
fputs("\n ",fout); FOR(i,naa) fprintf(fout,"%5s",getAAstr(aa3,i));
*/
fprintf (fout, "\n\nPAM matrix, P(0.01)\n");
FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*exp(0.01*Root[j]);
matby (T1, V, Q, n, n, n);
FOR (i,n*n) if (Q[i]<0) Q[i]=0;
FOR (i,n) {
fprintf (fout,"\n%-4s", getAAstr(aa3,i));
FOR(j,n) fprintf(fout, "%6.0f", Q[i*n+j]*10000);
}
fputs("\n ",fout); FOR(i,n) fprintf(fout,"%5s",getAAstr(aa3,i));
}
return (0);
}
int PMatBranch (double Ptb[], int n, double branch[],
double Root[], double U[], double V[], double space[])
{
/* homogeneised transition prob matrix, with one Q assumed for the whole tree
*/
int i, j, k;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -