📄 hmmcalibrate.c
字号:
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved * * This source code is distributed under the terms of the * GNU General Public License. See the files COPYING and LICENSE * for details. ************************************************************//* hmmcalibrate.c * SRE, Fri Oct 31 09:25:21 1997 [St. Louis] * * Score an HMM against random sequence data sets; * set histogram fitting parameters. * * CVS $Id: hmmcalibrate.c,v 1.23 2001/08/05 23:44:43 eddy Exp $ */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <signal.h>#include <math.h>#include <time.h>#include <limits.h>#include <float.h>#ifdef HMMER_THREADS#include <pthread.h>#endif#ifdef HMMER_PVM#include <pvm3.h>#endif#include "squid.h" /* general sequence analysis library */#include "config.h" /* compile-time configuration constants */#include "structs.h" /* data structures, macros, #define's */#include "funcs.h" /* function declarations */#include "globals.h" /* alphabet global variables */#include "version.h" /* release version info */#include "stopwatch.h" /* process timings */static char banner[] = "hmmcalibrate -- calibrate HMM search statistics";static char usage[] = "\Usage: hmmcalibrate [-options] <hmmfile>\n\Available options are:\n\ -h : print short usage and version info, then exit\n\";static char experts[] = "\ --cpu <n> : run <n> threads in parallel (if threaded)\n\ --fixed <n> : fix random sequence length at <n>\n\ --histfile <f> : save histogram(s) to file <f>\n\ --mean <x> : set random seq length mean at <x> [350]\n\ --num <n> : set number of sampled seqs to <n> [5000]\n\ --pvm : run on a Parallel Virtual Machine (PVM)\n\ --sd <x> : set random seq length std. dev to <x> [350]\n\ --seed <n> : set random seed to <n> [time()]\n\";static struct opt_s OPTIONS[] = { { "-h", TRUE, sqdARG_NONE }, { "--cpu", FALSE, sqdARG_INT }, { "--fixed", FALSE, sqdARG_INT }, { "--histfile", FALSE, sqdARG_STRING }, { "--mean", FALSE, sqdARG_FLOAT }, { "--num", FALSE, sqdARG_INT }, { "--pvm", FALSE, sqdARG_NONE }, { "--sd", FALSE, sqdARG_FLOAT }, { "--seed", FALSE, sqdARG_INT}, };#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))static void main_loop_serial(struct plan7_s *hmm, int seed, int nsample, float lenmean, float lensd, int fixedlen, struct histogram_s **ret_hist, float *ret_max);#ifdef HMMER_THREADS/* A structure of this type is shared by worker threads in the POSIX * threads parallel version. */struct workpool_s { /* Static configuration: */ struct plan7_s *hmm; /* ptr to single HMM to search with */ int fixedlen; /* if >0, fix random seq len to this */ float lenmean; /* mean of Gaussian for random seq len */ float lensd; /* s.d. of Gaussian for random seq len */ float *randomseq; /* 0..Alphabet_size-1 i.i.d. probs */ int nsample; /* number of random seqs to do */ /* Shared (mutex-protected) input: */ int nseq; /* current number of seqs searched */ /* Shared (mutex-protected) output: */ struct histogram_s *hist; /* histogram */ float max_score; /* maximum score seen */ Stopwatch_t watch; /* Timings accumulated for threads */ /* Thread pool information: */ pthread_t *thread; /* our pool of threads */ int num_threads; /* number of threads */ pthread_mutex_t input_lock; /* a mutex protecting input fields */ pthread_mutex_t output_lock; /* a mutex protecting output fields */};static void main_loop_threaded(struct plan7_s *hmm, int seed, int nsample, float lenmean, float lensd, int fixedlen, int nthreads, struct histogram_s **ret_hist, float *ret_max, Stopwatch_t *twatch);static struct workpool_s *workpool_start(struct plan7_s *hmm, float lenmean, float lensd, int fixedlen, float *randomseq, int nsample, struct histogram_s *hist, int num_threads);static void workpool_stop(struct workpool_s *wpool);static void workpool_free(struct workpool_s *wpool);static void *worker_thread(void *ptr);#endif /* HMMER_THREADS */#ifdef HMMER_PVMstatic void main_loop_pvm(struct plan7_s *hmm, int seed, int nsample, int lumpsize, float lenmean, float lensd, int fixedlen, struct histogram_s **ret_hist, float *ret_max, Stopwatch_t *extrawatch, int *ret_nslaves);#endif /* HMMER_PVM */intmain(int argc, char **argv){ char *hmmfile; /* HMM file to open */ char *tmpfile; /* temporary calibrated HMM file */ HMMFILE *hmmfp; /* opened hmm file pointer */ FILE *outfp; /* for writing HMM(s) into tmpfile */ char *mode; /* write mode, "w" or "wb" */ struct plan7_s *hmm; /* the hidden Markov model */ int idx; /* counter over sequences */ sigset_t blocksigs; /* list of signals to protect from */ int nhmm; /* number of HMMs calibrated */ struct histogram_s *hist; /* a resulting histogram */ float max; /* maximum score from an HMM */ char *histfile; /* histogram save file */ FILE *hfp; /* open file pointer for histfile */ Stopwatch_t stopwatch; /* main stopwatch for process */ Stopwatch_t extrawatch; /* stopwatch for threads/PVM slaves */ float *mu; /* array of EVD mu's for HMMs */ float *lambda; /* array of EVD lambda's for HMMs */ int mu_lumpsize; /* allocation lumpsize for mu, lambda */ int nsample; /* number of random seqs to sample */ int seed; /* random number seed */ int fixedlen; /* fixed length, or 0 if unused */ float lenmean; /* mean of length distribution */ float lensd; /* std dev of length distribution */ int do_pvm; /* TRUE to use PVM */ int pvm_lumpsize; /* # of seqs to do per PVM slave exchange */ int pvm_nslaves; /* number of slaves used in the PVM */ char *optname; /* name of option found by Getopt() */ char *optarg; /* argument found by Getopt() */ int optind; /* index in argv[] */ int num_threads; /* number of worker threads */ /*********************************************** * Parse the command line ***********************************************/ StopwatchStart(&stopwatch); StopwatchZero(&extrawatch); nsample = 5000; fixedlen = 0; lenmean = 325.; lensd = 200.; seed = (int) time ((time_t *) NULL); histfile = NULL; do_pvm = FALSE; pvm_lumpsize = 20; /* 20 seqs/PVM exchange: sets granularity */ mu_lumpsize = 100;#ifdef HMMER_THREADS num_threads = ThreadNumber(); /* only matters if we're threaded */#else num_threads = 0;#endif while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, &optind, &optname, &optarg)) { if (strcmp(optname, "--cpu") == 0) num_threads = atoi(optarg); else if (strcmp(optname, "--fixed") == 0) fixedlen = atoi(optarg); else if (strcmp(optname, "--histfile") == 0) histfile = optarg; else if (strcmp(optname, "--mean") == 0) lenmean = atof(optarg); else if (strcmp(optname, "--num") == 0) nsample = atoi(optarg); else if (strcmp(optname, "--pvm") == 0) do_pvm = TRUE; else if (strcmp(optname, "--sd") == 0) lensd = atof(optarg); else if (strcmp(optname, "--seed") == 0) seed = atoi(optarg); else if (strcmp(optname, "-h") == 0) { Banner(stdout, banner); puts(usage); puts(experts); exit(0); } } if (argc - optind != 1) Die("Incorrect number of arguments.\n%s\n", usage); hmmfile = argv[optind++];#ifndef HMMER_PVM if (do_pvm) Die("PVM support is not compiled into HMMER; --pvm doesn't work.");#endif#ifndef HMMER_THREADS if (num_threads) Die("Posix threads support is not compiled into HMMER; --cpu doesn't have any effect");#endif /*********************************************** * Open our i/o file pointers, make sure all is well ***********************************************/ /* HMM file */ if ((hmmfp = HMMFileOpen(hmmfile, NULL)) == NULL) Die("failed to open HMM file %s for reading.", hmmfile); /* histogram file */ hfp = NULL; if (histfile != NULL) { if ((hfp = fopen(histfile, "w")) == NULL) Die("Failed to open histogram save file %s for writing\n", histfile); } /* Generate calibrated HMM(s) in a tmp file in the current * directory. When we're finished, we delete the original * HMM file and rename() this one. That way, the worst * effect of a catastrophic failure should be that we * leave a tmp file lying around, but the original HMM * file remains uncorrupted. tmpnam() doesn't work portably here, * because it'll put the file in /tmp and we won't * necessarily be able to rename() it from there. */ tmpfile = MallocOrDie(strlen(hmmfile) + 5); strcpy(tmpfile, hmmfile); strcat(tmpfile, ".xxx"); /* could be more inventive here... */ if (FileExists(tmpfile)) Die("temporary file %s already exists; please delete it first", tmpfile); if (hmmfp->is_binary) mode = "wb"; else mode = "w"; /*********************************************** * Show the banner ***********************************************/ Banner(stdout, banner); printf("HMM file: %s\n", hmmfile); if (fixedlen) printf("Length fixed to: %d\n", fixedlen); else { printf("Length distribution mean: %.0f\n", lenmean); printf("Length distribution s.d.: %.0f\n", lensd); } printf("Number of samples: %d\n", nsample); printf("random seed: %d\n", seed); printf("histogram(s) saved to: %s\n", histfile != NULL ? histfile : "[not saved]"); if (do_pvm) printf("PVM: ACTIVE\n"); else if (num_threads > 0) printf("POSIX threads: %d\n", num_threads); printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n"); /*********************************************** * Read the HMMs one at a time, and send them off * in probability form to one of the main loops. * The main loop functions are responsible for * synthesizing random sequences and returning * a score histogram for each HMM. ***********************************************/ nhmm = 0; mu = MallocOrDie(sizeof(float) * mu_lumpsize); lambda = MallocOrDie(sizeof(float) * mu_lumpsize); while (HMMFileRead(hmmfp, &hmm)) { if (hmm == NULL) Die("HMM file may be corrupt or in incorrect format; parse failed"); if (! do_pvm && num_threads == 0) main_loop_serial(hmm, seed, nsample, lenmean, lensd, fixedlen, &hist, &max);#ifdef HMMER_PVM else if (do_pvm) { pvm_nslaves = 0; /* solely to silence compiler warnings */ main_loop_pvm(hmm, seed, nsample, pvm_lumpsize, lenmean, lensd, fixedlen, &hist, &max, &extrawatch, &pvm_nslaves); }#endif #ifdef HMMER_THREADS else if (num_threads > 0) main_loop_threaded(hmm, seed, nsample, lenmean, lensd, fixedlen, num_threads, &hist, &max, &extrawatch);#endif else Die("wait. that can't happen. I didn't do anything."); /* Fit an EVD to the observed histogram. * The TRUE left-censors and fits only the right slope of the histogram.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -