📄 meme.h

📁 EM算法的改进
💻 H
📖 第 1 页 / 共 2 页
字号:
上一页 12
 				   	0 : no palindromes                                   	1 : force DNA palindromes				*/  THETA map;			/* letter to frequency vector mapping */  THETA lomap;			/* letter to logodds vector mapping */  MOTIF motifs[NMOTIFS];	/* known motifs in dataset */  int nkmotifs;			/* number of known motifs in dataset */  NEGTYPE negtype;		/* how to use negative examples */  int back_order;		/* order of Markov background model */  double *back;			/* Markov background model: 				   back[s2i(wa)] = log Pr(a | wa) */  double log_total_prob;	/* total (log) cumulative background prob. */  PRIORS *priors;		/* the prior probabilities model */  P_POINT *p_point;		/* previously learned starting points */  double wnsites;		/* weight on prior on nsites */  BOOLEAN ma_adj;		/* adjust width/pos. using mult. algn. method */  double wg;			/* gap cost (initialization) */  double ws;			/* space cost (extension) */  BOOLEAN endgaps;		/* penalize end gaps if TRUE */  double distance;		/* convergence radius */  double prob;			/* sampling probability for subsq. starts */  int nmotifs;			/* number of motifs to find */  int maxiter;			/* maximum number of iterations for EM */  double evt;			/* E-value threshold */  char *mod;			/* name of model */  char *mapname;		/* name of spmap */  double map_scale;		/* scale of spmap */  char *priorname;		/* name of type of prior */  double beta;			/* beta for prior */  int seed;			/* random seed */  double seqfrac;		/* fraction of sequences to use */  char *plib_name;		/* name of file with prior library */  char *bfile;			/* name of background model file */  char *datafile;		/* name of the dataset file */  char *negfile;		/* name of negative examples file */  char *command;		/* command line */  OBJTYPE objfun;		/* objective function */  THETA pairwise;		/* contains score matrix for pairwise scores */  double min_ic;		/* min per-column information content */  char *meme_directory;		/* meme source directory */  double max_time;		/* maximum allowed CPU time */} DATASET;/* motif occurrence in sequence */typedef struct {  int seqno;		/* index in samples array */  int pos;		/* character position of site in sequence */  double zij;		/* value of z_ij */  int invcomp;		/* on inverse complement strand */} SITE;/* tiling of sequence with motifs */typedef struct {  int *hits;		/* hit[i] = m, motif m occurs at position i in seq 			   <0, on reverse strand, =0 means no hit */  double *pvalues;	/* pvalues[i] is p-value of match at position i */  double pv;		/* p-value of product of p-values of best hits */  char *diagram;	/* block diagram for printing */} TILING;/* subroutines */extern double exp(double x);extern int em_subseq(  THETA map,                    /* freq x letter map */  DATASET *dataset,             /* the dataset */  MODEL *model,                 /* the model */  PRIORS *priors,               /* the priors */  int w,                        /* width of motif */  int n_nsites0,                /* number of nsites0 values to try */  double alpha,                 /* sampling probability */  P_POINT *p_point,             /* starting point for previous components */  S_POINT s_points[]            /* array of starting points: 1 per nsites0 */);extern int subseq7(  MTYPE mtype,			/* type of model */  BOOLEAN revcomp,              /* use reverse complement strand of DNA */  THETA map,			/* freq x letter map */  DATASET *dataset,		/* the dataset */  int w,			/* width of motif */  int n_nsites0,		/* number of nsites0 values to try */  S_POINT s_points[]  		/* array of starting points: 1 per nsites0 */);extern int pY_compare(  const void *v1,  const void *v2);extern void get_not_o(  DATASET *dataset,			/* the dataset */  int w,				/* width of motif */  BOOLEAN get_log			/* compute log_not_o if true */);extern double get_log_sig(  double llr,					/* log likelihood ratio */  MTYPE mtype,					/* type of model */  int w,					/* width of motif */  double wN,					/* weighted number of sites */  int N,					/* number of sites */  BOOLEAN invcomp,				/* inv. compl. strand, too */  BOOLEAN pal, 					/* motif is DNA palindrome */  DATASET *dataset				/* the dataset */);extern void calc_entropy(  MODEL *model,			/* the model */  DATASET *dataset  		/* the dataset */);extern double log_comb(  int m,				/* length of sequence */  int n 				/* number of segments */);extern double get_log_nalign(  MTYPE mtype,					/* type of model */  int w, 					/* width of motif */  int N,					/* number of occurrences */  BOOLEAN invcomp,                              /* inv. compl. seq allowed */  DATASET *dataset  				/* the dataset */);extern void adjust_motif(  MODEL *model,				/* the model */  MODEL *scratch_model,			/* the scratch model */  DATASET *dataset,			/* the dataset */  PRIORS *priors,			/* the priors */  double wnsites,			/* weight on prior on nsites */  BOOLEAN ma_adj,			/* adjust w using mult. algn. method  */  BOOLEAN palindrome,			/* convert motif to palindrome */  int c,				/* component of model to adjust */  int min_w,				/* minimum width of motif allowed */  int max_w,				/* maximum width of motif allowed */  int maxiter,				/* maximum number iterations for EM */  double distance,			/* stopping criterion */  double wg,				/* gap cost (initialization) */  double ws,				/* space cost (extension) */  BOOLEAN endgaps 			/* penalize end gaps if TRUE */);extern void init_theta(  THETA theta,			/* theta */  char *start,			/* integer encoded starting sequence */  int w,			/* width of motif */  THETA map,			/* frequency vectors for each letter */   int alength			/* alphabet length */);extern S_POINT *get_starts(  DATASET *dataset,		/* the dataset */  MODEL *model,                 /* the model */  char *e_cons,			/* encoded consensus sequence */  double w_factor,		/* factor between sampled widths */  int *n_starts			/* number of starting points */);extern THETA init_map(  MAP_TYPE type,		/* type of mapping:					Uni	- add n prior					Pam	- pam matrix				*/  double scale,			/* degree of crispness, depends on type,					Uni	- add n prior (n)					Pam	- pam distance				*/  int alength,			/* length of alphabet */  double *back,			/* background frequencies */  BOOLEAN lo			/* create logodds matrix */);extern void copy_theta(  THETA s,	 		/* source */  THETA d,			/* destination */  int w,			/* width of motif */  int alength			/* length of alphabet */);extern void copy_model(  MODEL *m1, 				/* source */  MODEL *m2,				/* destination */  int alength				/* length of alphabet */);extern void init_meme(  int argc,			/* number of input arguments */  char **argv,			/* input arguments */  MODEL **model_p,      	/* the model */  MODEL **best_model_p, 	/* the best model */  MODEL **scratch_model_p, 	/* the best model */  MODEL **neg_model_p,  	/* model of negative examples */  DATASET **dataset_p,  	/* the dataset */  DATASET **neg_dataset_p       /* dataset of negative examples */);extern MODEL *create_model(  MTYPE mtype,				/* type of model */  BOOLEAN invcomp,			/* use inv comp strand  */  int max_w,				/* maximum width of motif */  int alength				/* length of alphabet */);extern double min_sites(  double nu,			/* degrees of freedom */  double alpha,			/* significance level */  double max_h			/* maximum entropy */);extern int read_motifs (  FILE *fdata,                          /* opened dataset file */  char *filename,                       /* motif file */  MOTIF motif[NMOTIFS],                 /* motif info */  BOOLEAN save_dataset,                 /* return dataset in memory */  DATASET *dataset                      /* integer-encoded dataset */);extern SITE *get_sites(  DATASET *dataset,			/* the dataset */  MODEL *model,				/* the model */  int *n,				/* number of sites found */  int *best_site			/* index of best site in array */);extern LOGODDS make_log_odds(  THETA theta1,			/* motif theta */  THETA theta0,			/* negative theta; use 0 if NULL */  double *back,			/* background frequencies; use 0 if NULL */  double q,			/* mixing parameter for theta0 and back */  int w,			/* width of motif */  int alength 			/* length of alphabet */);extern int get_max(  MTYPE mtype,		/* the model type */  DATASET *dataset,	/* the dataset */  int w,		/* length of sites */   P_PROB maxima, 	/* array of encoded site starts of local maxima */  BOOLEAN ic, 		/* use inverse complement, too */  BOOLEAN sort		/* sort the maxima */);extern int align_top_subsequences(  MTYPE mtype,				/* type of model */  int w,				/* width of motif */  DATASET *dataset,			/* the dataset */  int iseq,				/* sequence number of starting point */  int ioff,				/* sequence offset of starting point */  char *eseq,				/* integer encoded subsequence */  char *name,				/* name of sequence */  int n_nsites0,			/* number of nsites0 values to try */  int n_maxima,				/* number of local maxima */  P_PROB maxima,			/* sorted local maxima indices */  double *col_scores,			/* column scores for last start point */  S_POINT s_points[]			/* array of starting points */);extern double log_qfast(  int n,			/* number of random variables in product */  double logk			/* product of random variables */);#include <star.h>#include <llr.h>#include <em.h>#include <hash_alph.h>#include <read_seq_file.h>#include <display.h>#include <dpalign.h>#include <histogram.h>#include <clock.h>#include <message.h>#endif
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -