meme.c

来自「EM算法的改进」· C语言 代码 · 共 513 行 · 第 1/2 页

C
513
字号
/* * $Id: meme.c 1339 2006-09-21 19:46:28Z tbailey $ *  * $Log$ * Revision 1.3  2006/03/08 20:50:11  nadya * merge chamges from v3_5_2 branch * * Revision 1.2.4.1  2006/01/25 08:06:03  tbailey * Print "CPU: " even if UNIX not defined so output file will pass automatic * tests. * * Revision 1.2  2005/10/25 19:06:39  nadya * rm old macro for Header, all info is taken care of by Id and Log. * * Revision 1.1.1.1  2005/07/29 17:19:58  nadya * Importing from meme-3.0.14, and adding configure/make * *//*************************************************************************								       	**	MEME							       	**	Author: Timothy L. Bailey				       	**									**	Copyright							**	(1994 - 2000) The Regents of the University of California.	**	All Rights Reserved.						**									**	Permission to use, copy, modify, and distribute any part of 	**	this software for educational, research and non-profit purposes,**	without fee, and without a written agreement is hereby granted, **	provided that the above copyright notice, this paragraph and 	**	the following three paragraphs appear in all copies.		**									**	Those desiring to incorporate this software into commercial 	**	products or use for commercial purposes should contact the 	**	Technology Transfer Office, University of California, San Diego,**	9500 Gilman Drive, La Jolla, California, 92093-0910, 		**	Ph: (619) 534 5815.						**									**	IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO 	**	ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR 	**	CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF 	**	THE USE OF THIS SOFTWARE, EVEN IF THE UNIVERSITY OF CALIFORNIA 	**	HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 		**									**	THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE **	UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE 		**	MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.  **	THE UNIVERSITY OF CALIFORNIA MAKES NO REPRESENTATIONS AND 	**	EXTENDS NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, **	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 	**	MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT 	**	THE USE OF THE MATERIAL WILL NOT INFRINGE ANY PATENT, 		**	TRADEMARK OR OTHER RIGHTS.  					**								       	*************************************************************************//* 5-26-00 tlb; initialize model->cons0 in init_model *//* 10-3-99 tlb; replace nrdb with back *//* 7-29-99 tlb; set model->pal correctly in init_model *//* 7-02-99 tlb; in erase normalize z_ij to 1.0 so erasing will be complete *//* 7-02-99 tlb; in erase run just e_step of em *//* 6-28-99 tlb; remove sigma from model *//* 8-7-97 tlb; removed Mcm stuff from erase *//*		meme <datafile> [options]	Reads in a sequence file (Pearson/FASTA format).	Runs EM algorithm with selected values of W and lambda.	<datafile>	set of samples: [>id sequence]+*/	#define DEFINE_GLOBALS #include "meme.h"/* FIXME ??? */#ifndef PARALLEL#define mpMyID() 0#endif/* external subroutines */extern double sqrt(double x);/* local subroutines */static void erase(  DATASET *dataset,			/* the dataset */  MODEL *model	 			/* the model */);static BOOLEAN save_candidate(  MODEL *model,				/* final model */  DATASET *dataset,			/* the dataset */  S_POINT *s_point,			/* starting point */  CANDIDATE *candidates,		/* candidate for best model of width */  double best_sig			/* best significance */);static BOOLEAN init_model(  S_POINT *s_point,			/* the starting point */  MODEL *model,				/* the model to intialize */  DATASET *dataset,			/* the dataset */  int imotif				/* motif number */);BOOLEAN no_print = FALSE;	/* turn off printing if parallel and not main *//**********************************************************************//*	main *//**********************************************************************/extern int main(  int argc,  char *argv[]){  int i, imotif;  DATASET *dataset;		/* the dataset */  DATASET *neg_dataset;		/* the dataset of negative examples */  MODEL *model;			/* the model */  MODEL *neg_model;		/* the model of negatives */  MODEL *best_model;		/* the best model found (so far) */  MODEL *scratch_model;		/* a scratch model */  CANDIDATE *candidates = NULL;	/* list of candidate models */  int n_starts = 0;		/* number of starting points */  int nmotifs;			/* number of motifs to find */  double stop_time=0, m1time=0;	/* time when stopped, for motif 1 (secs) */#ifdef PARALLEL  int start_start, incr;  /* Initialize MPI. */  mpInit(&argc, &argv);  /* turn off printing if parallel and not the main processor */  no_print = (mpMyID() != 0);  incr = mpNodes();  /*fprintf(stderr, "Running on %d nodes...\n", incr);*/#endif /* PARALLEL */#ifdef debug_ieee  ieee_handler_setup("common");#endif   (void) myclock();		/* record CPU time */  /* print the command line */  if (VERBOSE) {    argv[0] = "meme";    for (i=0; i<argc; i++) printf("%s ", argv[i]);    printf("\n\n");    fflush(stdout);  }#ifdef UNIX  if (!no_print && VERBOSE) system("echo ''; echo CPU: `hostname`; echo '' ");#else  if (!no_print && VERBOSE) printf("\nCPU: unknown\n\n");#endif /* UNIX */  /* initialize everything from the command line */  init_meme(argc, argv, &model, &best_model, &scratch_model, &neg_model,     &dataset, &neg_dataset);  nmotifs = dataset->nmotifs;		/* number of motifs to find */  Resize(candidates, model->max_w+2, CANDIDATE);        /* motif candidates; max_w+2 in case -pal given */  /* describe the dataset */  print_dataset_summary (dataset);  /* describe the command */  print_command_summary(model, dataset);   /* print explanation of how to interpret the results */  if (DOC) print_meme_doc(dataset->meme_directory);  if (!NO_STATUS) {    fprintf(stderr, "\nseqs=%6d, min=%4ld, max=%5ld, total=%9d\n",      dataset->n_samples, dataset->min_slength, dataset->max_slength,      dataset->total_res);  }  /*  Find a concept and erase it loop */  for (imotif=1; imotif<=nmotifs; imotif++) {    S_POINT *s_points = NULL;		/* array of starting points */    int i_start;			/* index in s_points */    char *e_cons = dataset->p_point->e_cons0[imotif-1];	/* consensus sequence */    int best_w = 0;			/* best width */    double best_sig;			/* motif significance */    double w_factor = sqrt(2.0);	/* factor separating sampled widths */    int iter = 0;                       /* total number of EM iterations */    if (!NO_STATUS) fprintf(stderr, "\nmotif=%d\n", imotif);    /* known motif has been given */    if (dataset->nkmotifs > 0) {      model->min_w = model->max_w = dataset->motifs[imotif-1].width;      model->min_nsites = model->max_nsites = dataset->motifs[imotif-1].pos;    }    /* set up the array of starting points for EM */    s_points = get_starts(dataset, model, e_cons, w_factor, &n_starts);    /* leave loop if no starts found */    if (n_starts == 0) break;    /* tag each candidate width as unused; max_w+1 in case -pal given */    for (i=0; i<=model->max_w+1; i++) {      candidates[i].sig = BIG;      candidates[i].s_point = NULL;    }    /* run EM on each start and save best final model for each final width */    best_sig = BIG;				/* best motif significance */#ifdef PARALLEL    /* Check whether to parallelize this loop. */    start_start = mpMyID();    /* Make sure everybody has something to do. */    if (start_start >= n_starts) start_start = n_starts-1;    /* Divide the various starting points among processors. */    for (i_start=start_start; i_start < n_starts; i_start += incr) {#else    for (i_start=0; i_start<n_starts; i_start++) {#endif /* PARALLEL */      S_POINT *s_point = s_points+i_start;	/* current starting point */#ifdef DEBUG      double s_point_time = myclock()/1E6;#endif /* DEBUG */      /* initialize the model from the starting point */      if (! init_model(s_point, model, dataset, imotif)) continue;            /* Count iters per loop. */      model->iter = 0;      /* Run EM from the starting model */      em(model, dataset);      /* Keep track of the total number of EM iterations. */      iter += model->iter;      /* store model as a candidate for best model;	 save model if best so far */      if (save_candidate(model, dataset, s_point, candidates, best_sig)) {	best_w = model->w;			/* new best width */	swap(model, best_model, MODEL *);	/* save model as best_model */	best_sig = candidates[best_w].sig;	/* new best significance */      }    } /* starting point loop */     /* found the best model */    swap(model, best_model, MODEL *);#ifdef PARALLEL    /* Copy the consensus sequence into the model. */    store_consensus(model, candidates);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?