meme.c
来自「EM算法的改进」· C语言 代码 · 共 513 行 · 第 1/2 页
C
513 行
/* * $Id: meme.c 1339 2006-09-21 19:46:28Z tbailey $ * * $Log$ * Revision 1.3 2006/03/08 20:50:11 nadya * merge chamges from v3_5_2 branch * * Revision 1.2.4.1 2006/01/25 08:06:03 tbailey * Print "CPU: " even if UNIX not defined so output file will pass automatic * tests. * * Revision 1.2 2005/10/25 19:06:39 nadya * rm old macro for Header, all info is taken care of by Id and Log. * * Revision 1.1.1.1 2005/07/29 17:19:58 nadya * Importing from meme-3.0.14, and adding configure/make * *//************************************************************************* ** MEME ** Author: Timothy L. Bailey ** ** Copyright ** (1994 - 2000) The Regents of the University of California. ** All Rights Reserved. ** ** Permission to use, copy, modify, and distribute any part of ** this software for educational, research and non-profit purposes,** without fee, and without a written agreement is hereby granted, ** provided that the above copyright notice, this paragraph and ** the following three paragraphs appear in all copies. ** ** Those desiring to incorporate this software into commercial ** products or use for commercial purposes should contact the ** Technology Transfer Office, University of California, San Diego,** 9500 Gilman Drive, La Jolla, California, 92093-0910, ** Ph: (619) 534 5815. ** ** IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ** ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR ** CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF ** THE USE OF THIS SOFTWARE, EVEN IF THE UNIVERSITY OF CALIFORNIA ** HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ** ** THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE ** UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE ** MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. ** THE UNIVERSITY OF CALIFORNIA MAKES NO REPRESENTATIONS AND ** EXTENDS NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, ** INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ** MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT ** THE USE OF THE MATERIAL WILL NOT INFRINGE ANY PATENT, ** TRADEMARK OR OTHER RIGHTS. ** *************************************************************************//* 5-26-00 tlb; initialize model->cons0 in init_model *//* 10-3-99 tlb; replace nrdb with back *//* 7-29-99 tlb; set model->pal correctly in init_model *//* 7-02-99 tlb; in erase normalize z_ij to 1.0 so erasing will be complete *//* 7-02-99 tlb; in erase run just e_step of em *//* 6-28-99 tlb; remove sigma from model *//* 8-7-97 tlb; removed Mcm stuff from erase *//* meme <datafile> [options] Reads in a sequence file (Pearson/FASTA format). Runs EM algorithm with selected values of W and lambda. <datafile> set of samples: [>id sequence]+*/ #define DEFINE_GLOBALS #include "meme.h"/* FIXME ??? */#ifndef PARALLEL#define mpMyID() 0#endif/* external subroutines */extern double sqrt(double x);/* local subroutines */static void erase( DATASET *dataset, /* the dataset */ MODEL *model /* the model */);static BOOLEAN save_candidate( MODEL *model, /* final model */ DATASET *dataset, /* the dataset */ S_POINT *s_point, /* starting point */ CANDIDATE *candidates, /* candidate for best model of width */ double best_sig /* best significance */);static BOOLEAN init_model( S_POINT *s_point, /* the starting point */ MODEL *model, /* the model to intialize */ DATASET *dataset, /* the dataset */ int imotif /* motif number */);BOOLEAN no_print = FALSE; /* turn off printing if parallel and not main *//**********************************************************************//* main *//**********************************************************************/extern int main( int argc, char *argv[]){ int i, imotif; DATASET *dataset; /* the dataset */ DATASET *neg_dataset; /* the dataset of negative examples */ MODEL *model; /* the model */ MODEL *neg_model; /* the model of negatives */ MODEL *best_model; /* the best model found (so far) */ MODEL *scratch_model; /* a scratch model */ CANDIDATE *candidates = NULL; /* list of candidate models */ int n_starts = 0; /* number of starting points */ int nmotifs; /* number of motifs to find */ double stop_time=0, m1time=0; /* time when stopped, for motif 1 (secs) */#ifdef PARALLEL int start_start, incr; /* Initialize MPI. */ mpInit(&argc, &argv); /* turn off printing if parallel and not the main processor */ no_print = (mpMyID() != 0); incr = mpNodes(); /*fprintf(stderr, "Running on %d nodes...\n", incr);*/#endif /* PARALLEL */#ifdef debug_ieee ieee_handler_setup("common");#endif (void) myclock(); /* record CPU time */ /* print the command line */ if (VERBOSE) { argv[0] = "meme"; for (i=0; i<argc; i++) printf("%s ", argv[i]); printf("\n\n"); fflush(stdout); }#ifdef UNIX if (!no_print && VERBOSE) system("echo ''; echo CPU: `hostname`; echo '' ");#else if (!no_print && VERBOSE) printf("\nCPU: unknown\n\n");#endif /* UNIX */ /* initialize everything from the command line */ init_meme(argc, argv, &model, &best_model, &scratch_model, &neg_model, &dataset, &neg_dataset); nmotifs = dataset->nmotifs; /* number of motifs to find */ Resize(candidates, model->max_w+2, CANDIDATE); /* motif candidates; max_w+2 in case -pal given */ /* describe the dataset */ print_dataset_summary (dataset); /* describe the command */ print_command_summary(model, dataset); /* print explanation of how to interpret the results */ if (DOC) print_meme_doc(dataset->meme_directory); if (!NO_STATUS) { fprintf(stderr, "\nseqs=%6d, min=%4ld, max=%5ld, total=%9d\n", dataset->n_samples, dataset->min_slength, dataset->max_slength, dataset->total_res); } /* Find a concept and erase it loop */ for (imotif=1; imotif<=nmotifs; imotif++) { S_POINT *s_points = NULL; /* array of starting points */ int i_start; /* index in s_points */ char *e_cons = dataset->p_point->e_cons0[imotif-1]; /* consensus sequence */ int best_w = 0; /* best width */ double best_sig; /* motif significance */ double w_factor = sqrt(2.0); /* factor separating sampled widths */ int iter = 0; /* total number of EM iterations */ if (!NO_STATUS) fprintf(stderr, "\nmotif=%d\n", imotif); /* known motif has been given */ if (dataset->nkmotifs > 0) { model->min_w = model->max_w = dataset->motifs[imotif-1].width; model->min_nsites = model->max_nsites = dataset->motifs[imotif-1].pos; } /* set up the array of starting points for EM */ s_points = get_starts(dataset, model, e_cons, w_factor, &n_starts); /* leave loop if no starts found */ if (n_starts == 0) break; /* tag each candidate width as unused; max_w+1 in case -pal given */ for (i=0; i<=model->max_w+1; i++) { candidates[i].sig = BIG; candidates[i].s_point = NULL; } /* run EM on each start and save best final model for each final width */ best_sig = BIG; /* best motif significance */#ifdef PARALLEL /* Check whether to parallelize this loop. */ start_start = mpMyID(); /* Make sure everybody has something to do. */ if (start_start >= n_starts) start_start = n_starts-1; /* Divide the various starting points among processors. */ for (i_start=start_start; i_start < n_starts; i_start += incr) {#else for (i_start=0; i_start<n_starts; i_start++) {#endif /* PARALLEL */ S_POINT *s_point = s_points+i_start; /* current starting point */#ifdef DEBUG double s_point_time = myclock()/1E6;#endif /* DEBUG */ /* initialize the model from the starting point */ if (! init_model(s_point, model, dataset, imotif)) continue; /* Count iters per loop. */ model->iter = 0; /* Run EM from the starting model */ em(model, dataset); /* Keep track of the total number of EM iterations. */ iter += model->iter; /* store model as a candidate for best model; save model if best so far */ if (save_candidate(model, dataset, s_point, candidates, best_sig)) { best_w = model->w; /* new best width */ swap(model, best_model, MODEL *); /* save model as best_model */ best_sig = candidates[best_w].sig; /* new best significance */ } } /* starting point loop */ /* found the best model */ swap(model, best_model, MODEL *);#ifdef PARALLEL /* Copy the consensus sequence into the model. */ store_consensus(model, candidates);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?