📄 meme.h
字号:
/* * $Id: meme.h 1339 2006-09-21 19:46:28Z tbailey $ * * $Log$ * Revision 1.2 2006/01/09 08:17:35 tbailey * *** empty log message *** * * Revision 1.1.1.1 2005/07/29 18:44:21 nadya * Importing from meme-3.0.14, and adding configure/make * */#ifndef MEME_H#define MEME_H#include "config.h"#include <user.h>#include <macros.h>#include <mtype.h>#include <logs.h>#include <prior.h>#include <hash.h>#include <gamma.h>#include <chi.h>#include <logodds.h>#include <background.h>/* conventions: ALL_CAPS user defined type Vname enum type value name() macro*//* globals */DEXTERN(int, PAGEWIDTH, 80); /* page width for printing */ /* must be > MSN + 40 (see user.h) */DEXTERN(BOOLEAN, VERBOSE, FALSE); /* turn on verbose output mode */DEXTERN(BOOLEAN, TRACE, FALSE); /* print each start tried */DEXTERN(BOOLEAN, PRINT_FASTA, FALSE); /* print sites in BLOCKS format */DEXTERN(BOOLEAN, PRINTALL, FALSE); /* for debug */DEXTERN(BOOLEAN, PRINT_W, FALSE); /* print w_ij */ DEXTERN(BOOLEAN, PRINT_Z, FALSE); /* print z_ij */ DEXTERN(BOOLEAN, PRINT_LL, FALSE); /* print log likelihood */ DEXTERN(BOOLEAN, PRINT_STARTS, FALSE); /* print starting subsequences */ DEXTERN(BOOLEAN, NO_STATUS, FALSE); /* print run-time status to stderr */DEXTERN(BOOLEAN, DOC, FALSE); /* print documentation */EXTERN char *OFFSET_FILE; /* current name of offset file */DEXTERN(int, TIMER, 0); /* Type of timing: 0 : None 1 : Per loop 2 : Per start */EXTERN LO *los[MAXG]; /* logodds structure for each motif */EXTERN double *pv[MAXG]; /* p-value tables for each motif *//* macro to write a line of asterisks */#define PSTARS {int i; for (i=0;i<PAGEWIDTH;i++)printf("*");printf("\n");}/* type of negative motifs */typedef enum {Pair, Blend} NEGTYPE; /* type of sequence to theta mapping */typedef enum {Uni, Pam} MAP_TYPE; /* type of prior */typedef enum {Mega, MegaP, Dmix, Dirichlet, Addone} PTYPE;/* type of handling of DNA strands in MAST */typedef enum {Combine, Separate, Norc, Protein} STYPE;/* type of objective function */typedef enum {Pv, Ev} OBJTYPE;/* possible sites for a dataset, width combination */#define ps(d, w) MAX((d)->n_samples,((d)->total_res-((d)->n_samples * ((w)-1))))/* tlb 6-18-99; added with wgt_total_res */#define wps(d, w) ( MAX (wps1(d, w), 2) )#define wps1(d, w) ( (d)->wgt_total_res - ((d)->n_samples * ( (w) - 1) ) )/* number of occurrences of a motif based on dataset, w, lambda */#define nsites(d, w, l) ((l) * ps(d, w))/* number of independent columns in a model component */#define ind_cols(w, pal) ((pal) ? ((w) + 1)/2 : (w))/* DNA palindrome enforcer: combine columns and complementary residues */#define palindrome(theta1, theta2, w, alength) \{ \ int i, j; \ for (i=0; i<=(w+1)/2; i++) { /* create the palindrome */ \ for (j=0; j<(alength); j++) { \ int ii = (w)-i-1; \ int jj = hash(comp_dna(unhash(j))); \ theta2[i][j] = (theta1[i][j] + theta1[ii][jj])/2; \ theta2[ii][jj] = MAX(0, theta2[i][j] - 1e-6); \ } \ } \} \/* dataset sample */typedef struct sample { char *sample_name; /* name of sample */ long length; /* length of sequence */ char *seq; /* ascii sequence */ char *res; /* integer-coded sequence */ char *resic; /* integer-coded dna inverse complement */ double sw; /* sequence weight */ double *weights; /* Pr[pos not contained in previous site] */ double *not_o; /* P[no site in [x_{i,j}...x_{i,j+w}] */ int *log_not_o; /* log (not_o) */ int **pY; /* p(Y_j | theta_g) scratch spaces */ char *pYic; /* pY2 > pY1 (site on ic strand if != 0) */ double *z; /* tlb 6-21-99; E[z_ij] */ double **sz; /* tlb 6-21-99; E[sz_ijk] */ double *counts; /* counts of each character (X causes frac.) */ double *logcumback; /* log cumulative background probabilities: logcumback[i] = 0, i=1 = log Pr(s_{i-1} | H_0), else */ int nsites; /* number of sites of current motif */ int *sites; /* list of sites of current motif */ double minpv; /* minimum p-value of sites of current motif */} SAMPLE;typedef double **THETA;#define theta_ref(a, b, c) (a)[b][c]#define theta(b, c) theta_ref(theta, b, c)#define theta0(b, c) theta_ref(theta0, b, c)#define theta1(b, c) theta_ref(theta1, b, c)#define logtheta(b, c) theta_ref(logtheta, b, c)#define logtheta0(b, c) theta_ref(logtheta0, b, c)#define logtheta1(b, c) theta_ref(logtheta1, b, c)#define obs(b, c) theta_ref(obs, b, c)#define obs1(b, c) theta_ref(obs1, b, c)/* a site */typedef struct p_prob *P_PROB;typedef struct p_prob { int x; /* sequence # */ int y; /* position # */ BOOLEAN ic; /* on inverse complement strand */ double prob; /* INT_LOG(probability of site) */} p_prob;/* a model */typedef struct Model { MTYPE mtype; /* type of model */ int min_w; /* minimum allowed width */ int max_w; /* maximum allowed width */ double pw; /* prior estimate of width */ double min_nsites; /* minimum allowed number of sites */ double max_nsites; /* maximum allowed number of sites */ double psites; /* prior estimate of number of sites */ P_PROB maxima; /* list of sites */ BOOLEAN pal; /* motif is a DNA palindrome */ BOOLEAN invcomp; /* use inverse complement DNA strand, too */ int imotif; /* number of motif */ int w; /* width of motif */ THETA theta; /* motif frequencies */ THETA logtheta; /* log of theta */ THETA obs; /* observed frequencies */ double lambda; /* lambda */ double lambda_obs; /* observed lambda */ double nsites; /* estimated number of sites */ double nsites_obs; /* observed number of sites */ int nsites_dis; /* number of sites after discretization */ char cons[MAXSITE+1]; /* consensus sequence of motif */ char cons0[MAXSITE+1]; /* character initial consensus */ double rentropy[MAXSITE]; /* relative entropy of each column of motif */ double rel; /* relative entropy per col */ double ll; /* log likelihood of all data under model */ double mll_0; /* motif log-likelihood under null model */ double mll_1; /* motif log-likelihood under motif model */ double logpv; /* log likelihood ratio of discrete motif */ double logev; /* log E-value of motif */ double llr; /* log likelihood ratio of motif */ int iter; /* number of EM iterations used */ int ID; /* processor id */ int iseq; /* start sequence */ int ioff; /* start sequence offset */} MODEL;/* user-input starting points */typedef struct p_point { int c; /* number of components */ int w[MAXG]; /* widths for motifs */ double nsites[MAXG]; /* nsites for motif */ char *e_cons0[MAXG]; /* integer encoded starting subsequence */} P_POINT;/* starting point */typedef struct s_point { double score; /* log likelihood ratio of starting point */ int iseq; /* sequence number of starting point */ int ioff; /* sequence offset of starting point */ int w0; /* start width for motif */ double nsites0; /* start nsites0 for motif */ double wgt_nsites; /* effective (weighted) number of sites */ char *e_cons0; /* integer encoded starting subsequence */ char *cons0; /* character initial consensus */} S_POINT;/* candidate final model */typedef struct candidate { S_POINT *s_point; /* starting point of model */ int w; /* final width of motif */ BOOLEAN pal; /* palindrome flag */ BOOLEAN invcomp; /* use inverse complement DNA strand, too */ double lambda; /* final lambda for motif */ char cons[MAXSITE+1]; /* final consensus of motif */ double rel; /* relative entropy per col of each motif */ double ll; /* log-likelihood */ double sig; /* likelihood ratio test significance */} CANDIDATE;/* prior probabilities */typedef struct Priors { PTYPE ptype; /* type of prior to use */ double prior_count[MAXALPH]; /* ptype = Dirichlet: pseudo counts/letter */ PriorLib *plib; /* ptype = Dmix, Mega, MegaP: dirichlet mix */ PriorLib *plib0; /* ptype = MegaP: b=0 dirichlet mixture */} PRIORS;/* a known motif */typedef struct motif { char name[MNAME]; /* names of motif */ int width; /* (known) width of motif */ int pos; /* # positive samples this motif */ double roc; /* best roc for this motif */ int shift; /* best shift for this motif */ int pass; /* pass that found this motif */ double recall; /* best recall this motif */ double precision; /* best recall this motif */ double min_thresh; /* minimum threshold for motif */ double max_thresh; /* maximum threshold for motif */ double pal; /* motif is DNA palindrome */ double like; /* best likelihood this motif */ double sig; /* best significance this motif */ double ic; /* best info content this motif */ double sites; /* best nsites this motif */ int w; /* best width this motif */ double thresh; /* best threshold this motif */ HASH_TABLE ht; /* hash table of positives this motif */} MOTIF;/* a dataset */typedef struct Dataset { /* set by read_seq_file */ int alength; /* length of alphabet */ char *alphabet; /* alphabet */ int total_res; /* total size of dataset */ double wgt_total_res; /* weighted (sw*slen) total size of dataset */ int n_samples; /* number samples */ SAMPLE **samples; /* array of (pointers to) samples */ long max_slength; /* maximum length of sequences */ long min_slength; /* minimum length of sequences */ double *res_freq; /* average letter frequencies */ /* *** MEME parameters *** */ BOOLEAN dna; /* dataset used DNA alphabet */ BOOLEAN pal; /* DNA palindrome flag:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -