⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 codeml.c

📁 一个神经网络工具箱
💻 C
📖 第 1 页 / 共 5 页
字号:
/* CODEML.c  (AAML.c & CODONML.c)

   Maximum likelihood parameter estimation for codon sequences (seqtype=1) 
                    or amino-acid sequences (seqtype=2)
                Copyright, Ziheng YANG, 1993-2002

               cc -o codeml -fast codeml.c tools.o -lm
                         codeml <ControlFileName>
*/
/*
#define NSSITES_K1_K2_CLASSES

#define DSDN_MC  1
#define DSDN_MC_SITES  1
*/

#include "paml.h"

#ifdef macintosh
/* Added by Andrew Rambaut to accommodate Macs -
   (1) Brings up dialog box to allow command line parameters.
   (2) Reduces the size of statically allocated data.    */
#include <console.h>
#endif

#define NS            1000
#define NBRANCH       (NS*2-2)
#define NNODE         (NS*2-1)
#define NGENE         100
#define LSPNAME       30
#define NCODE         64
#define NCATG         40

#define NP            (NBRANCH*2+NGENE-1+2)
/*
#define NP            (NBRANCH+NGENE-1+189+2)
*/
extern char BASEs[],AAs[],Nsensecodon[];
extern int noisy, NFunCall, NEigenQ, NPMatUVRoot, *ancestor, GeneticCode[][64];
extern double *SeqDistance;


char *progname;			//Jingcai

int Forestry (FILE *fout);
int sortwM3(double x[]);
void DetailOutput(FILE *fout, double x[], double var[]);
int GetOptions (char *ctlf);
int testx (double x[], int np);
int SetxBound (int np, double xb[][2]);
int SetxInitials (double x[]);
int GetInitials (double x[], int*fromfile);
int SetParameters (double x[]);
int SetPGene (int igene, int _pi, int _UVRoot, int _alpha, double xcom[]);
int SetPSiteClass(int iclass, double xcom[]);
int PMatJC69like (double P[], double t, int n);
int setmark_61_64 (void);
int printfcode (FILE *fout, double fb61[], double space[]);
int InitializeCodon (FILE *fout, double space[]);
int CodonListall(char codon[], int nb[3], int ib[3][4]);
int AA2Codonf (double faa[20], double fcodon[]);
int DistanceMatAA (FILE *fout);
int DistanceMatNG86 (FILE *fout, FILE*fds, FILE*fdn, FILE*dt, double alpha);
int GetDaa(FILE *fout, double daa[]);
void getpcodonClass(double x[], double pcodonClass[]);
int EigenQc(int getstats, double branchl, double *S, double *dS, double *dN,
    double Root[], double U[], double V[],
    double kappa[], double omega, double Q[]);
int EigenQaa(FILE *fout, double Root[], double U[], double V[],double rate[]);
int Qcodon2aa (double Qc[], double pic[], double Qaa[], double piaa[]);
int SetAA1STEP (void);
int GetOmegaAA(int OmegaAA[]);
int TestModelQc(FILE *fout, double x[]);
double lfun2dSdN (double x[], int np);
int VariancedSdN(double t, double omega, double vtw[2*2], double vdSdN[2*2]);
int GetCodonFreqs(double pi[]);
int PairwiseCodon (FILE *fout, FILE*fds, FILE*fdn, FILE*dt, double space[]);
int PairwiseAA (FILE *fout, FILE *f2AA);
int lfunNSsites_rate (FILE* fout, double x[], int np);
int PartialLikelihood (int inode, int igene);
double CDFdN_dS(double x,double par[]);
int DiscreteNSsites(double par[]);
int mergeSeqs(FILE*fout);
int SlidingWindow(FILE*fout, double space[]);
void Get4foldSites(void);

void SimulateData2s61(void);
void Ina(void);
void d4dSdN(FILE*fout);

struct common_info {
   char *z[NS], *spname[NS], seqf[96],outf[96],treef[96],daafile[96], cleandata;
   int seqtype, ns, ls, ngene, posG[NGENE+1], lgene[NGENE], npatt,*pose;
   int runmode,clock,verbose,print, codonf,aaDist,model,NSsites;
   int method, icode, ncode, Mgene, ndata;
   int fix_rgene,fix_kappa,fix_omega,fix_alpha,fix_rho,nparK,fix_blength,getSE;
   int np, ntime, nrgene, nkappa, nrate, nalpha, ncatG, sspace, slkl1;
   int npi0, hkyREV;
   double *fpatt, *space, kappa,omega,alpha,rho,rgene[NGENE];
   double pi[NCODE], pi_sqrt[NCODE], piG[NGENE][64],fb61[64];
   double f3x4MG[NGENE][12],*pf3x4MG;
   double freqK[NCATG], rK[NCATG], MK[NCATG*NCATG],daa[20*20], *lkl,*lkl0,*fhK;
   double (*plfun)(double x[],int np);
}  com;
struct TREEB {
   int  nbranch, nnode, root, branches[NBRANCH][2];
   double lnL;
}  tree;
struct TREEN {
   int father, nson, sons[NS], ibranch;
   double branch, divtime, omega, label, *lkl;
}  *nodes;


extern double Small_Diff;
int FROM61[64], FROM64[64];
int ChangedInIteration;  /* 1: t changed, update P(t); 2: paras changed, update UVRoot */
double *PMat,*U,*V,*Root, *_UU[6],*_VV[6],*_Root[6];
/* for 5 sets for branchsite models */

double xcom[NP-NBRANCH];
double *POMEGA, *PKAPPA;
  /* communication between SetParameters & PartialLikelihood & EigenQc.  
     Remove it? */
double pcodon0[64],paa0[20], *pcodonClass;  /* for aaDist=FIT1 or FIT2 */

int LASTROUND, UVRootChanged=1;
int IClass=-1;

double OMEGA_FIX=-1;  
/* fix the last dN/dS in the NSbranchB, NSbranch2 models with variable dN/dS ratios 
   for lineages.  Useful for testing whether w>1 for particular lineages. 
*/
int N_OMEGA=-1; /* number of omega for branches or in AAClass */
int OmegaAA[190], AA1STEP[190];

double _rateSite=1;
int *rateBranch=NULL, N_rateBranch=-1; /* rates for branches, local clocks */
double Qfactor_NS, Qfactor_NS_branch[2], freqK_NS;

/* is lkl memory allocated for each class? 
   =0 if (method==0) and =1 if (method==1) */
int lklSiteClass=0;
char _oldlkl[NNODE];       /* update lkl for nodes? to save computation */
int _nnodeScale;
char _nodeScale[NNODE];    /* nScale[ns-1] for interior nodes */
double *_nodeScaleF=NULL;  /* nScaleF[npatt] for scale factors */

double AAchem[][20+1]={  /* last element is the max */
{8.1, 10.5, 11.6, 13, 5.5, 10.5, 12.3, 9, 10.4, 5.2, 
 4.9, 11.3,  5.7, 5.2,  8,  9.2,  8.6, 5.4, 6.2, 5.9,    13}, /* p */
{ 31, 124,  56,  54,   55, 85, 83,   3, 96, 111, 
 111, 119, 105, 132, 32.5, 32, 61, 170, 136, 84,        170}, /* v */
{0, 0.65, 1.33, 1.38, 2.75, 0.89, 0.92, 0.74, 0.58,
 0, 0, 0.33, 0, 0, 0.39, 1.42, 0.71, 0.13, 0.2, 0,      -999},/* c */
{-0.11, 0.079, -0.136, -0.285, -0.184, -0.067, -0.246, -0.073, 0.32, 0.001,
 -0.008, 0.049, -0.041, 0.438, -0.016, -0.153, -0.208, 0.493, 0.381, -0.155} /* a */
};   /* in the order p, v, c, a */


FILE *frub, *flnf, *frst, *frst1, *fin;
char *ratef="rates";
enum {Fequal, F1x4, F3x4, Fcodon, F1x4_MG, F3x4_MG} CodonFreqs;
char *codonfreqs[]={"Fequal", "F1x4", "F3x4", "Fcodon", "F1x4_MG", "F3x4_MG"};
enum {NSbranchB=1, NSbranch2, NSbranch3} NSBranchModels; /* , AAClasses=7 */
char *NSbranchmodels[]={"One dN/dS ratio", 
     "free dN/dS Ratios for branches", "several dN/dS ratios for branches",
     "NSbranch3", "", "", "","AAClasses"};
enum {Poisson, EqualInput, Empirical, Empirical_F,
     FromCodon=6, AAClasses=7, REVaa_0=8, REVaa=9} AAModel;
char *aamodels[]={"Poisson", "EqualInput", "Empirical", "Empirical_F", "",
     "", "FromCodon", "AAClasses", "REVaa_0", "REVaa"};
enum {NSneutral=1, NSselection, NSdiscrete, NSfreqs, NSgamma, NS2gamma, 
     NSbeta, NSbetaw, NSbetagamma, NSbeta1gamma, NSbeta1normal, NS02normal, 
     NS3normal} NSsitesModels;
char *NSsitesmodels[]={"one-ratio","neutral", "selection","discrete","freqs", 
     "gamma","2gamma","beta","beta&w","beta&gamma", "beta&gamma+1", 
     "beta&normal>1", "0&2normal>0", "3normal>0"};
enum {FIT1=11, FIT2=12} SiteClassModels;


#ifdef SITELABELS
char *sitelabels[]={"-17", "-16", "-15"};
#endif


#define CODEML 1
#include "treesub.c"
#include "treespace.c"

FILE *fout;
int ncatG0=10, insmodel=0, nnsmodels=1, nsmodels[14]={0};


int main (int argc, char *argv[])
{
   FILE *fseq=NULL;
   FILE *fpair[6]; 
   char pairfs[6][32]={"2NG.dS","2NG.dN","2NG.t", "2ML.dS","2ML.dN","2ML.t"};
//James J. Cai   char ctlf[96]="codeml.ctl", *pmodel, timestr[64];
   char ctlf[96]="mbetoolbox_codeml.ctl", *pmodel, timestr[64];
   char *seqtypestr[3]={"CODONML", "AAML", "CODON2AAML"};
   char *Mgenestr[]={"diff. rate", "separate data", "diff. rate & pi", 
                     "diff. rate & k&w", "diff. rate & pi & k&w"};
   char *clockstr[]={"", "global clock", "local clock", "clock with dated seqs"};
   int i, k, slkl0=0,s2=0, idata, nc, nUVR;

#ifdef __MWERKS__
/* Added by Andrew Rambaut to accommodate Macs -
   Brings up dialog box to allow command line parameters.
*/
   argc=ccommand(&argv);
#endif
  printf("---------------------------------------\n");
  printf("  MBETOOLBOX_CODEML\n");
  printf("  The modified version of CODEML\n");
  printf("  (in paml 3.13) for MEBToolbox\n");
  printf("---------------------------------------\n\n");

  progname = argv[0];		//Jingcai
  if ((argc == 1) || (strcmp(argv[1],"-help") == 0)) {
    fprintf(stderr,"Usage is %s.exe <GeneticCode=0,1,...10>\n", progname);
    fprintf(stderr,"\nGenetic codes:\n");
    fprintf(stderr,"0:universal, 1:mammalian mt., 2:yeast mt., 3:mold mt.,\n");
    fprintf(stderr,"4: invertebrate mt., 5: ciliate nuclear, 6: echinoderm mt.,\n"); 
    fprintf(stderr,"7: euplotid mt., 8: alternative yeast nu. 9: ascidian mt.,\n");
    fprintf(stderr,"10: blepharisma nu.\n");
	exit (8);
  };	//Jingcai


   starttime();
   com.ndata=1;
   noisy=0;           com.runmode=0;
   com.clock=0;       com.fix_rgene=0; /* 0: estimate rate factors for genes */
   com.cleandata=0;  /* 1: delete; 0:use missing data */
   com.seqtype=AAseq;
   com.model=Empirical_F;  strcpy(com.daafile, "jones.dat");
   com.icode=0;       com.nrate=0;
   com.fix_kappa=0;   com.kappa=1;    com.omega=2.1;
   com.fix_alpha=1;   com.alpha=0.;   com.ncatG=4;   /* alpha=0 := inf */
   com.fix_rho=1;     com.rho=0.;
   com.getSE=0;       com.print=0;    com.verbose=1;  com.fix_blength=0;
   com.method=0;      com.space=NULL;

   frub=gfopen("rub","w");   frst=gfopen("rst","w");  frst1=gfopen("rst1","w");
/*
mergeSeqs(frst);  exit(0);
Ina();
*/
   SetSeed ((int)time(NULL));

#if (DSDN_MC || DSDN_MC_SITES)
   SimulateData2s61();
#endif

   

//   if(argc>1) strcpy(ctlf,argv[1]);
   GetOptions(ctlf);
   if(argc>1) {
	   com.icode = atoi(argv[1]);
       printf ("\nGenetic code=%d\n", com.icode);
   }


   fout=gfopen(com.outf, "w");
   if((fseq=fopen (com.seqf,"r"))==NULL) {
      printf ("\n\nSequence file %s not found!\n", com.seqf);
      exit (-1);
   }
   if(noisy && com.seqtype==CODONseq) 
      { printcu(F0,NULL,com.icode); puts("Nice code, uuh?"); }

   /* space for P&U&V&Root */
   nUVR=1; nc=20;
   if(com.seqtype==1) { nc=64; if(com.model>=2) nUVR=6; }
   else if (com.seqtype==CODONseq||com.model==FromCodon) nc=64;

   PMat=(double*)malloc((nc*nc+nUVR*nc*nc*2+nUVR*nc)*sizeof(double));
   if(PMat==NULL) error2("oom getting P&U&V&Root");
   U=_UU[0]=PMat+nc*nc;  V=_VV[0]=_UU[0]+nc*nc; Root=_Root[0]=_VV[0]+nc*nc;
   for(i=1; i<nUVR; i++) {
      _UU[i]=_UU[i-1]+nc*nc*2+nc; _VV[i]=_VV[i-1]+nc*nc*2+nc; 
      _Root[i]=_Root[i-1]+nc*nc*2+nc;
   }

   /* d4dSdN(fout); */

   if (com.model==AAClasses) {
      SetAA1STEP();
      GetOmegaAA(OmegaAA);
   }
   else if (com.seqtype==AAseq && com.model==REVaa_0)
      SetAA1STEP();

   if(com.seqtype==1) {
      for(i=0; i<3; i++) 
         fpair[i]=(FILE*)gfopen(pairfs[i],"w");
      if(com.runmode==-2)
         for(; i<6;i++) fpair[i]=(FILE*)fopen(pairfs[i],"w");
   }
   else if(com.runmode==-2)
      fpair[0]=(FILE*)fopen("2AA.t","w");

   for (idata=0; idata<com.ndata; idata++) {

      if (com.ndata>1) {
         printf ("\n\nData set %d\n", idata+1);
         fprintf (fout, "\n\nData set %d\n", idata+1);
         fprintf(frst,"\t%d",idata+1);
         fprintf(frst1,"%5d ",idata+1);
      }
      if(idata)  GetOptions(ctlf); /* Is this necessary? */

      if(nnsmodels>1) {
         if(com.fix_omega) error2("fix omega during batch run?");
         if(com.model) error2("model should be 0 in this batch run?");
         if(com.runmode) error2("runmode?");
         com.NSsites=NSbetaw;  com.ncatG=ncatG0+1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -