📄 codeml.c
字号:
/* CODEML.c (AAML.c & CODONML.c)
Maximum likelihood parameter estimation for codon sequences (seqtype=1)
or amino-acid sequences (seqtype=2)
Copyright, Ziheng YANG, 1993-2002
cc -o codeml -fast codeml.c tools.o -lm
codeml <ControlFileName>
*/
/*
#define NSSITES_K1_K2_CLASSES
#define DSDN_MC 1
#define DSDN_MC_SITES 1
*/
#include "paml.h"
#ifdef macintosh
/* Added by Andrew Rambaut to accommodate Macs -
(1) Brings up dialog box to allow command line parameters.
(2) Reduces the size of statically allocated data. */
#include <console.h>
#endif
#define NS 1000
#define NBRANCH (NS*2-2)
#define NNODE (NS*2-1)
#define NGENE 100
#define LSPNAME 30
#define NCODE 64
#define NCATG 40
#define NP (NBRANCH*2+NGENE-1+2)
/*
#define NP (NBRANCH+NGENE-1+189+2)
*/
extern char BASEs[],AAs[],Nsensecodon[];
extern int noisy, NFunCall, NEigenQ, NPMatUVRoot, *ancestor, GeneticCode[][64];
extern double *SeqDistance;
char *progname; //Jingcai
int Forestry (FILE *fout);
int sortwM3(double x[]);
void DetailOutput(FILE *fout, double x[], double var[]);
int GetOptions (char *ctlf);
int testx (double x[], int np);
int SetxBound (int np, double xb[][2]);
int SetxInitials (double x[]);
int GetInitials (double x[], int*fromfile);
int SetParameters (double x[]);
int SetPGene (int igene, int _pi, int _UVRoot, int _alpha, double xcom[]);
int SetPSiteClass(int iclass, double xcom[]);
int PMatJC69like (double P[], double t, int n);
int setmark_61_64 (void);
int printfcode (FILE *fout, double fb61[], double space[]);
int InitializeCodon (FILE *fout, double space[]);
int CodonListall(char codon[], int nb[3], int ib[3][4]);
int AA2Codonf (double faa[20], double fcodon[]);
int DistanceMatAA (FILE *fout);
int DistanceMatNG86 (FILE *fout, FILE*fds, FILE*fdn, FILE*dt, double alpha);
int GetDaa(FILE *fout, double daa[]);
void getpcodonClass(double x[], double pcodonClass[]);
int EigenQc(int getstats, double branchl, double *S, double *dS, double *dN,
double Root[], double U[], double V[],
double kappa[], double omega, double Q[]);
int EigenQaa(FILE *fout, double Root[], double U[], double V[],double rate[]);
int Qcodon2aa (double Qc[], double pic[], double Qaa[], double piaa[]);
int SetAA1STEP (void);
int GetOmegaAA(int OmegaAA[]);
int TestModelQc(FILE *fout, double x[]);
double lfun2dSdN (double x[], int np);
int VariancedSdN(double t, double omega, double vtw[2*2], double vdSdN[2*2]);
int GetCodonFreqs(double pi[]);
int PairwiseCodon (FILE *fout, FILE*fds, FILE*fdn, FILE*dt, double space[]);
int PairwiseAA (FILE *fout, FILE *f2AA);
int lfunNSsites_rate (FILE* fout, double x[], int np);
int PartialLikelihood (int inode, int igene);
double CDFdN_dS(double x,double par[]);
int DiscreteNSsites(double par[]);
int mergeSeqs(FILE*fout);
int SlidingWindow(FILE*fout, double space[]);
void Get4foldSites(void);
void SimulateData2s61(void);
void Ina(void);
void d4dSdN(FILE*fout);
struct common_info {
char *z[NS], *spname[NS], seqf[96],outf[96],treef[96],daafile[96], cleandata;
int seqtype, ns, ls, ngene, posG[NGENE+1], lgene[NGENE], npatt,*pose;
int runmode,clock,verbose,print, codonf,aaDist,model,NSsites;
int method, icode, ncode, Mgene, ndata;
int fix_rgene,fix_kappa,fix_omega,fix_alpha,fix_rho,nparK,fix_blength,getSE;
int np, ntime, nrgene, nkappa, nrate, nalpha, ncatG, sspace, slkl1;
int npi0, hkyREV;
double *fpatt, *space, kappa,omega,alpha,rho,rgene[NGENE];
double pi[NCODE], pi_sqrt[NCODE], piG[NGENE][64],fb61[64];
double f3x4MG[NGENE][12],*pf3x4MG;
double freqK[NCATG], rK[NCATG], MK[NCATG*NCATG],daa[20*20], *lkl,*lkl0,*fhK;
double (*plfun)(double x[],int np);
} com;
struct TREEB {
int nbranch, nnode, root, branches[NBRANCH][2];
double lnL;
} tree;
struct TREEN {
int father, nson, sons[NS], ibranch;
double branch, divtime, omega, label, *lkl;
} *nodes;
extern double Small_Diff;
int FROM61[64], FROM64[64];
int ChangedInIteration; /* 1: t changed, update P(t); 2: paras changed, update UVRoot */
double *PMat,*U,*V,*Root, *_UU[6],*_VV[6],*_Root[6];
/* for 5 sets for branchsite models */
double xcom[NP-NBRANCH];
double *POMEGA, *PKAPPA;
/* communication between SetParameters & PartialLikelihood & EigenQc.
Remove it? */
double pcodon0[64],paa0[20], *pcodonClass; /* for aaDist=FIT1 or FIT2 */
int LASTROUND, UVRootChanged=1;
int IClass=-1;
double OMEGA_FIX=-1;
/* fix the last dN/dS in the NSbranchB, NSbranch2 models with variable dN/dS ratios
for lineages. Useful for testing whether w>1 for particular lineages.
*/
int N_OMEGA=-1; /* number of omega for branches or in AAClass */
int OmegaAA[190], AA1STEP[190];
double _rateSite=1;
int *rateBranch=NULL, N_rateBranch=-1; /* rates for branches, local clocks */
double Qfactor_NS, Qfactor_NS_branch[2], freqK_NS;
/* is lkl memory allocated for each class?
=0 if (method==0) and =1 if (method==1) */
int lklSiteClass=0;
char _oldlkl[NNODE]; /* update lkl for nodes? to save computation */
int _nnodeScale;
char _nodeScale[NNODE]; /* nScale[ns-1] for interior nodes */
double *_nodeScaleF=NULL; /* nScaleF[npatt] for scale factors */
double AAchem[][20+1]={ /* last element is the max */
{8.1, 10.5, 11.6, 13, 5.5, 10.5, 12.3, 9, 10.4, 5.2,
4.9, 11.3, 5.7, 5.2, 8, 9.2, 8.6, 5.4, 6.2, 5.9, 13}, /* p */
{ 31, 124, 56, 54, 55, 85, 83, 3, 96, 111,
111, 119, 105, 132, 32.5, 32, 61, 170, 136, 84, 170}, /* v */
{0, 0.65, 1.33, 1.38, 2.75, 0.89, 0.92, 0.74, 0.58,
0, 0, 0.33, 0, 0, 0.39, 1.42, 0.71, 0.13, 0.2, 0, -999},/* c */
{-0.11, 0.079, -0.136, -0.285, -0.184, -0.067, -0.246, -0.073, 0.32, 0.001,
-0.008, 0.049, -0.041, 0.438, -0.016, -0.153, -0.208, 0.493, 0.381, -0.155} /* a */
}; /* in the order p, v, c, a */
FILE *frub, *flnf, *frst, *frst1, *fin;
char *ratef="rates";
enum {Fequal, F1x4, F3x4, Fcodon, F1x4_MG, F3x4_MG} CodonFreqs;
char *codonfreqs[]={"Fequal", "F1x4", "F3x4", "Fcodon", "F1x4_MG", "F3x4_MG"};
enum {NSbranchB=1, NSbranch2, NSbranch3} NSBranchModels; /* , AAClasses=7 */
char *NSbranchmodels[]={"One dN/dS ratio",
"free dN/dS Ratios for branches", "several dN/dS ratios for branches",
"NSbranch3", "", "", "","AAClasses"};
enum {Poisson, EqualInput, Empirical, Empirical_F,
FromCodon=6, AAClasses=7, REVaa_0=8, REVaa=9} AAModel;
char *aamodels[]={"Poisson", "EqualInput", "Empirical", "Empirical_F", "",
"", "FromCodon", "AAClasses", "REVaa_0", "REVaa"};
enum {NSneutral=1, NSselection, NSdiscrete, NSfreqs, NSgamma, NS2gamma,
NSbeta, NSbetaw, NSbetagamma, NSbeta1gamma, NSbeta1normal, NS02normal,
NS3normal} NSsitesModels;
char *NSsitesmodels[]={"one-ratio","neutral", "selection","discrete","freqs",
"gamma","2gamma","beta","beta&w","beta&gamma", "beta&gamma+1",
"beta&normal>1", "0&2normal>0", "3normal>0"};
enum {FIT1=11, FIT2=12} SiteClassModels;
#ifdef SITELABELS
char *sitelabels[]={"-17", "-16", "-15"};
#endif
#define CODEML 1
#include "treesub.c"
#include "treespace.c"
FILE *fout;
int ncatG0=10, insmodel=0, nnsmodels=1, nsmodels[14]={0};
int main (int argc, char *argv[])
{
FILE *fseq=NULL;
FILE *fpair[6];
char pairfs[6][32]={"2NG.dS","2NG.dN","2NG.t", "2ML.dS","2ML.dN","2ML.t"};
//James J. Cai char ctlf[96]="codeml.ctl", *pmodel, timestr[64];
char ctlf[96]="mbetoolbox_codeml.ctl", *pmodel, timestr[64];
char *seqtypestr[3]={"CODONML", "AAML", "CODON2AAML"};
char *Mgenestr[]={"diff. rate", "separate data", "diff. rate & pi",
"diff. rate & k&w", "diff. rate & pi & k&w"};
char *clockstr[]={"", "global clock", "local clock", "clock with dated seqs"};
int i, k, slkl0=0,s2=0, idata, nc, nUVR;
#ifdef __MWERKS__
/* Added by Andrew Rambaut to accommodate Macs -
Brings up dialog box to allow command line parameters.
*/
argc=ccommand(&argv);
#endif
printf("---------------------------------------\n");
printf(" MBETOOLBOX_CODEML\n");
printf(" The modified version of CODEML\n");
printf(" (in paml 3.13) for MEBToolbox\n");
printf("---------------------------------------\n\n");
progname = argv[0]; //Jingcai
if ((argc == 1) || (strcmp(argv[1],"-help") == 0)) {
fprintf(stderr,"Usage is %s.exe <GeneticCode=0,1,...10>\n", progname);
fprintf(stderr,"\nGenetic codes:\n");
fprintf(stderr,"0:universal, 1:mammalian mt., 2:yeast mt., 3:mold mt.,\n");
fprintf(stderr,"4: invertebrate mt., 5: ciliate nuclear, 6: echinoderm mt.,\n");
fprintf(stderr,"7: euplotid mt., 8: alternative yeast nu. 9: ascidian mt.,\n");
fprintf(stderr,"10: blepharisma nu.\n");
exit (8);
}; //Jingcai
starttime();
com.ndata=1;
noisy=0; com.runmode=0;
com.clock=0; com.fix_rgene=0; /* 0: estimate rate factors for genes */
com.cleandata=0; /* 1: delete; 0:use missing data */
com.seqtype=AAseq;
com.model=Empirical_F; strcpy(com.daafile, "jones.dat");
com.icode=0; com.nrate=0;
com.fix_kappa=0; com.kappa=1; com.omega=2.1;
com.fix_alpha=1; com.alpha=0.; com.ncatG=4; /* alpha=0 := inf */
com.fix_rho=1; com.rho=0.;
com.getSE=0; com.print=0; com.verbose=1; com.fix_blength=0;
com.method=0; com.space=NULL;
frub=gfopen("rub","w"); frst=gfopen("rst","w"); frst1=gfopen("rst1","w");
/*
mergeSeqs(frst); exit(0);
Ina();
*/
SetSeed ((int)time(NULL));
#if (DSDN_MC || DSDN_MC_SITES)
SimulateData2s61();
#endif
// if(argc>1) strcpy(ctlf,argv[1]);
GetOptions(ctlf);
if(argc>1) {
com.icode = atoi(argv[1]);
printf ("\nGenetic code=%d\n", com.icode);
}
fout=gfopen(com.outf, "w");
if((fseq=fopen (com.seqf,"r"))==NULL) {
printf ("\n\nSequence file %s not found!\n", com.seqf);
exit (-1);
}
if(noisy && com.seqtype==CODONseq)
{ printcu(F0,NULL,com.icode); puts("Nice code, uuh?"); }
/* space for P&U&V&Root */
nUVR=1; nc=20;
if(com.seqtype==1) { nc=64; if(com.model>=2) nUVR=6; }
else if (com.seqtype==CODONseq||com.model==FromCodon) nc=64;
PMat=(double*)malloc((nc*nc+nUVR*nc*nc*2+nUVR*nc)*sizeof(double));
if(PMat==NULL) error2("oom getting P&U&V&Root");
U=_UU[0]=PMat+nc*nc; V=_VV[0]=_UU[0]+nc*nc; Root=_Root[0]=_VV[0]+nc*nc;
for(i=1; i<nUVR; i++) {
_UU[i]=_UU[i-1]+nc*nc*2+nc; _VV[i]=_VV[i-1]+nc*nc*2+nc;
_Root[i]=_Root[i-1]+nc*nc*2+nc;
}
/* d4dSdN(fout); */
if (com.model==AAClasses) {
SetAA1STEP();
GetOmegaAA(OmegaAA);
}
else if (com.seqtype==AAseq && com.model==REVaa_0)
SetAA1STEP();
if(com.seqtype==1) {
for(i=0; i<3; i++)
fpair[i]=(FILE*)gfopen(pairfs[i],"w");
if(com.runmode==-2)
for(; i<6;i++) fpair[i]=(FILE*)fopen(pairfs[i],"w");
}
else if(com.runmode==-2)
fpair[0]=(FILE*)fopen("2AA.t","w");
for (idata=0; idata<com.ndata; idata++) {
if (com.ndata>1) {
printf ("\n\nData set %d\n", idata+1);
fprintf (fout, "\n\nData set %d\n", idata+1);
fprintf(frst,"\t%d",idata+1);
fprintf(frst1,"%5d ",idata+1);
}
if(idata) GetOptions(ctlf); /* Is this necessary? */
if(nnsmodels>1) {
if(com.fix_omega) error2("fix omega during batch run?");
if(com.model) error2("model should be 0 in this batch run?");
if(com.runmode) error2("runmode?");
com.NSsites=NSbetaw; com.ncatG=ncatG0+1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -