⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 meme.h

📁 EM算法的改进
💻 H
📖 第 1 页 / 共 2 页
字号:
/* * $Id: meme.h 1339 2006-09-21 19:46:28Z tbailey $ *  * $Log$ * Revision 1.2  2006/01/09 08:17:35  tbailey * *** empty log message *** * * Revision 1.1.1.1  2005/07/29 18:44:21  nadya * Importing from meme-3.0.14, and adding configure/make * */#ifndef MEME_H#define MEME_H#include "config.h"#include <user.h>#include <macros.h>#include <mtype.h>#include <logs.h>#include <prior.h>#include <hash.h>#include <gamma.h>#include <chi.h>#include <logodds.h>#include <background.h>/* conventions:	ALL_CAPS	user defined type	Vname		enum type value	name()		macro*//* globals */DEXTERN(int, PAGEWIDTH, 80);		/* page width for printing */					/* must be > MSN + 40 (see user.h) */DEXTERN(BOOLEAN, VERBOSE, FALSE);	/* turn on verbose output mode */DEXTERN(BOOLEAN, TRACE, FALSE);		/* print each start tried */DEXTERN(BOOLEAN, PRINT_FASTA, FALSE);   /* print sites in BLOCKS format */DEXTERN(BOOLEAN, PRINTALL, FALSE);	/* for debug */DEXTERN(BOOLEAN, PRINT_W, FALSE);	/* print w_ij */ DEXTERN(BOOLEAN, PRINT_Z, FALSE);	/* print z_ij */ DEXTERN(BOOLEAN, PRINT_LL, FALSE);	/* print log likelihood */ DEXTERN(BOOLEAN, PRINT_STARTS, FALSE);	/* print starting subsequences */ DEXTERN(BOOLEAN, NO_STATUS, FALSE);     /* print run-time status to stderr */DEXTERN(BOOLEAN, DOC, FALSE);		/* print documentation */EXTERN char *OFFSET_FILE;		/* current name of offset file */DEXTERN(int, TIMER, 0);			/* Type of timing:					    0 : None					    1 : Per loop					    2 : Per start */EXTERN LO *los[MAXG];			/* logodds structure for each motif */EXTERN double *pv[MAXG];		/* p-value tables for each motif *//* macro to write a line of asterisks */#define PSTARS {int i; for (i=0;i<PAGEWIDTH;i++)printf("*");printf("\n");}/* type of negative motifs */typedef enum {Pair, Blend} NEGTYPE; /* type of sequence to theta mapping */typedef enum {Uni, Pam} MAP_TYPE; /* type of prior */typedef enum {Mega, MegaP, Dmix, Dirichlet, Addone} PTYPE;/* type of handling of DNA strands in MAST */typedef enum {Combine, Separate, Norc, Protein} STYPE;/* type of objective function */typedef enum {Pv, Ev} OBJTYPE;/* possible sites for a dataset, width combination */#define ps(d, w) MAX((d)->n_samples,((d)->total_res-((d)->n_samples * ((w)-1))))/* tlb 6-18-99; added with wgt_total_res */#define wps(d, w) ( MAX (wps1(d, w), 2) )#define wps1(d, w) ( (d)->wgt_total_res - ((d)->n_samples * ( (w) - 1) ) )/* number of occurrences of a motif based on dataset, w, lambda */#define nsites(d, w, l) ((l) * ps(d, w))/* number of independent columns in a model component */#define ind_cols(w, pal) ((pal) ? ((w) + 1)/2 : (w))/* DNA palindrome enforcer: combine columns and complementary residues */#define palindrome(theta1, theta2, w, alength) 				\{ 									\  int i, j;								\  for (i=0; i<=(w+1)/2; i++) {              /* create the palindrome */	\    for (j=0; j<(alength); j++) {					\      int ii = (w)-i-1;							\      int jj = hash(comp_dna(unhash(j)));				\      theta2[i][j] = (theta1[i][j] + theta1[ii][jj])/2;			\      theta2[ii][jj] = MAX(0, theta2[i][j] - 1e-6);			\    }									\  }									\}									\/* dataset sample */typedef struct sample {  char *sample_name;		/* name of sample */  long length;			/* length of sequence */  char *seq;			/* ascii sequence */  char *res;			/* integer-coded sequence */  char *resic;			/* integer-coded dna inverse complement */  double sw;			/* sequence weight */  double *weights;		/* Pr[pos not contained in previous site] */  double *not_o;		/* P[no site in [x_{i,j}...x_{i,j+w}] */  int *log_not_o;		/* log (not_o) */  int **pY;			/* p(Y_j | theta_g) scratch spaces */  char *pYic;			/* pY2 > pY1 (site on ic strand if != 0) */  double *z;			/* tlb 6-21-99; E[z_ij] */  double **sz;			/* tlb 6-21-99; E[sz_ijk] */  double *counts;		/* counts of each character (X causes frac.) */  double *logcumback;		/* log cumulative background probabilities: 				   logcumback[i] = 0, i=1						 =  log Pr(s_{i-1} | H_0), else 				*/  int nsites;			/* number of sites of current motif */  int *sites;			/* list of sites of current motif */  double minpv;			/* minimum p-value of sites of current motif */} SAMPLE;typedef double **THETA;#define theta_ref(a, b, c)	(a)[b][c]#define theta(b, c)		theta_ref(theta, b, c)#define theta0(b, c)		theta_ref(theta0, b, c)#define theta1(b, c)		theta_ref(theta1, b, c)#define logtheta(b, c)		theta_ref(logtheta, b, c)#define logtheta0(b, c)		theta_ref(logtheta0, b, c)#define logtheta1(b, c)		theta_ref(logtheta1, b, c)#define obs(b, c)		theta_ref(obs, b, c)#define obs1(b, c)		theta_ref(obs1, b, c)/* a site */typedef struct p_prob *P_PROB;typedef struct p_prob {  int x;			/* sequence # */  int y;			/* position # */  BOOLEAN ic;			/* on inverse complement strand */  double prob;			/* INT_LOG(probability of site) */} p_prob;/* a model */typedef struct Model {  MTYPE mtype;			/* type of model */  int min_w;			/* minimum allowed width */  int max_w;			/* maximum allowed width */  double pw;			/* prior estimate of width */  double min_nsites;		/* minimum allowed number of sites */  double max_nsites;		/* maximum allowed number of sites */  double psites;		/* prior estimate of number of sites */  P_PROB maxima;		/* list of sites */  BOOLEAN pal;			/* motif is a DNA palindrome */  BOOLEAN invcomp;		/* use inverse complement DNA strand, too */  int imotif;			/* number of motif */  int w;			/* width of motif */  THETA theta;			/* motif frequencies */  THETA logtheta;		/* log of theta */  THETA obs;			/* observed frequencies */  double lambda;		/* lambda */  double lambda_obs;		/* observed lambda */  double nsites;		/* estimated number of sites */  double nsites_obs;		/* observed number of sites */  int nsites_dis;		/* number of sites after discretization */  char cons[MAXSITE+1];		/* consensus sequence of motif */  char cons0[MAXSITE+1];	/* character initial consensus */  double rentropy[MAXSITE];	/* relative entropy of each column of motif */  double rel;			/* relative entropy per col */  double ll;			/* log likelihood of all data under model */  double mll_0;			/* motif log-likelihood under null model */  double mll_1;			/* motif log-likelihood under motif model */  double logpv;			/* log likelihood ratio of discrete motif */  double logev;			/* log E-value of motif */  double llr;			/* log likelihood ratio of motif */  int iter;			/* number of EM iterations used */  int ID;                       /* processor id */  int iseq;			/* start sequence */  int ioff;			/* start sequence offset */} MODEL;/* user-input starting points */typedef struct p_point {  int c;                        /* number of components */  int w[MAXG];                  /* widths for motifs */  double nsites[MAXG];          /* nsites for motif */  char *e_cons0[MAXG];          /* integer encoded starting subsequence */} P_POINT;/* starting point */typedef struct s_point {  double score;			/* log likelihood ratio of starting point */  int iseq;                     /* sequence number of starting point */  int ioff;                     /* sequence offset of starting point */  int w0;			/* start width for motif */  double nsites0;		/* start nsites0 for motif */  double wgt_nsites;		/* effective (weighted) number of sites */  char *e_cons0;		/* integer encoded starting subsequence */  char *cons0;			/* character initial consensus */} S_POINT;/* candidate final model */typedef struct candidate {  S_POINT *s_point;		/* starting point of model */  int w;			/* final width of motif */   BOOLEAN pal;			/* palindrome flag */  BOOLEAN invcomp;		/* use inverse complement DNA strand, too */  double lambda;		/* final lambda for motif */  char cons[MAXSITE+1];		/* final consensus of motif */  double rel;			/* relative entropy per col of each motif */  double ll;			/* log-likelihood */  double sig;			/* likelihood ratio test significance */} CANDIDATE;/* prior probabilities */typedef struct Priors {  PTYPE ptype;			/* type of prior to use */  double prior_count[MAXALPH];	/* ptype = Dirichlet: pseudo counts/letter */  PriorLib *plib;		/* ptype = Dmix, Mega, MegaP: dirichlet mix */  PriorLib *plib0;		/* ptype = MegaP: b=0 dirichlet mixture */} PRIORS;/* a known motif */typedef struct motif {  char name[MNAME];			/* names of motif */  int width;				/* (known) width of motif */  int pos;				/* # positive samples this motif */  double roc;				/* best roc for this motif */  int shift;				/* best shift for this motif */  int pass;				/* pass that found this motif */  double recall;			/* best recall this motif */  double precision;			/* best recall this motif */  double min_thresh;			/* minimum threshold for motif */  double max_thresh;			/* maximum threshold for motif */  double pal;				/* motif is DNA palindrome */  double like;				/* best likelihood this motif */  double sig;				/* best significance this motif */  double ic;				/* best info content this motif */  double sites;				/* best nsites this motif */  int w;				/* best width this motif */  double thresh;			/* best threshold this motif */  HASH_TABLE ht;			/* hash table of positives this motif */} MOTIF;/* a dataset */typedef struct Dataset {  /* set by read_seq_file */  int alength;			/* length of alphabet */  char *alphabet;		/* alphabet */  int total_res;		/* total size of dataset */  double wgt_total_res;		/* weighted (sw*slen) total size of dataset */  int n_samples;		/* number samples */  SAMPLE **samples;		/* array of (pointers to) samples */  long max_slength;		/* maximum length of sequences */  long min_slength;		/* minimum length of sequences */  double *res_freq;     	/* average letter frequencies */  /* *** MEME parameters *** */  BOOLEAN dna;			/* dataset used DNA alphabet */  BOOLEAN pal;			/* DNA palindrome flag:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -