⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hmmcalibrate.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 3 页
字号:
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. ************************************************************//* hmmcalibrate.c * SRE, Fri Oct 31 09:25:21 1997 [St. Louis] *  * Score an HMM against random sequence data sets; * set histogram fitting parameters. *  * CVS $Id: hmmcalibrate.c,v 1.23 2001/08/05 23:44:43 eddy Exp $ */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <signal.h>#include <math.h>#include <time.h>#include <limits.h>#include <float.h>#ifdef HMMER_THREADS#include <pthread.h>#endif#ifdef HMMER_PVM#include <pvm3.h>#endif#include "squid.h"		/* general sequence analysis library    */#include "config.h"		/* compile-time configuration constants */#include "structs.h"		/* data structures, macros, #define's   */#include "funcs.h"		/* function declarations                */#include "globals.h"		/* alphabet global variables            */#include "version.h"		/* release version info                 */#include "stopwatch.h"		/* process timings                      */static char banner[] = "hmmcalibrate -- calibrate HMM search statistics";static char usage[] = "\Usage: hmmcalibrate [-options] <hmmfile>\n\Available options are:\n\  -h             : print short usage and version info, then exit\n\";static char experts[] = "\  --cpu <n>      : run <n> threads in parallel (if threaded)\n\  --fixed <n>    : fix random sequence length at <n>\n\  --histfile <f> : save histogram(s) to file <f>\n\  --mean <x>     : set random seq length mean at <x> [350]\n\  --num <n>      : set number of sampled seqs to <n> [5000]\n\  --pvm          : run on a Parallel Virtual Machine (PVM)\n\  --sd <x>       : set random seq length std. dev to <x> [350]\n\  --seed <n>     : set random seed to <n> [time()]\n\";static struct opt_s OPTIONS[] = {   { "-h",         TRUE,  sqdARG_NONE  },   { "--cpu",      FALSE, sqdARG_INT },   { "--fixed",    FALSE, sqdARG_INT   },   { "--histfile", FALSE, sqdARG_STRING },   { "--mean",     FALSE, sqdARG_FLOAT },   { "--num",      FALSE, sqdARG_INT   },   { "--pvm",      FALSE, sqdARG_NONE  },   { "--sd",       FALSE, sqdARG_FLOAT },      { "--seed",     FALSE, sqdARG_INT}, };#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))static void main_loop_serial(struct plan7_s *hmm, int seed, int nsample,			     float lenmean, float lensd, int fixedlen,			     struct histogram_s **ret_hist, float *ret_max);#ifdef HMMER_THREADS/* A structure of this type is shared by worker threads in the POSIX * threads parallel version. */struct workpool_s {  /* Static configuration:   */  struct plan7_s  *hmm;		/* ptr to single HMM to search with    */  int    fixedlen;		/* if >0, fix random seq len to this   */  float  lenmean;		/* mean of Gaussian for random seq len */  float  lensd;			/* s.d. of Gaussian for random seq len */  float *randomseq;             /* 0..Alphabet_size-1 i.i.d. probs     */  int    nsample;		/* number of random seqs to do         */  /* Shared (mutex-protected) input:   */  int    nseq;			/* current number of seqs searched     */  /* Shared (mutex-protected) output:   */  struct histogram_s *hist;     /* histogram          */  float          max_score;     /* maximum score seen */  Stopwatch_t    watch;		/* Timings accumulated for threads */  /* Thread pool information:   */  pthread_t      *thread;       /* our pool of threads */  int             num_threads;  /* number of threads   */  pthread_mutex_t input_lock;	/* a mutex protecting input fields */  pthread_mutex_t output_lock;  /* a mutex protecting output fields */};static void main_loop_threaded(struct plan7_s *hmm, int seed, int nsample, 			       float lenmean, float lensd, int fixedlen,			       int nthreads,			       struct histogram_s **ret_hist, float *ret_max,			       Stopwatch_t *twatch);static struct workpool_s *workpool_start(struct plan7_s *hmm, 				 float lenmean, float lensd, int fixedlen,				 float *randomseq, int nsample, 				 struct histogram_s *hist, 				 int num_threads);static void  workpool_stop(struct workpool_s *wpool);static void  workpool_free(struct workpool_s *wpool);static void *worker_thread(void *ptr);#endif /* HMMER_THREADS */#ifdef HMMER_PVMstatic void main_loop_pvm(struct plan7_s *hmm, int seed, int nsample, 			  int lumpsize,			  float lenmean, float lensd, int fixedlen,			  struct histogram_s **ret_hist, float *ret_max, 			  Stopwatch_t *extrawatch, int *ret_nslaves);#endif /* HMMER_PVM */intmain(int argc, char **argv){  char    *hmmfile;             /* HMM file to open                */  char    *tmpfile;             /* temporary calibrated HMM file   */  HMMFILE *hmmfp;               /* opened hmm file pointer         */  FILE    *outfp;               /* for writing HMM(s) into tmpfile */  char    *mode;                /* write mode, "w" or "wb"         */  struct plan7_s *hmm;          /* the hidden Markov model         */  int     idx;			/* counter over sequences          */  sigset_t blocksigs;		/* list of signals to protect from */  int     nhmm;			/* number of HMMs calibrated       */  struct histogram_s *hist;     /* a resulting histogram           */  float   max;			/* maximum score from an HMM       */  char   *histfile;             /* histogram save file             */  FILE   *hfp;                  /* open file pointer for histfile  */  Stopwatch_t stopwatch;	/* main stopwatch for process       */  Stopwatch_t extrawatch;	/* stopwatch for threads/PVM slaves */  float  *mu;			/* array of EVD mu's for HMMs      */  float  *lambda;		/* array of EVD lambda's for HMMs  */  int     mu_lumpsize;		/* allocation lumpsize for mu, lambda */  int     nsample;		/* number of random seqs to sample */  int     seed;			/* random number seed              */  int     fixedlen;		/* fixed length, or 0 if unused    */  float   lenmean;		/* mean of length distribution     */  float   lensd;		/* std dev of length distribution  */  int     do_pvm;		/* TRUE to use PVM                 */  int     pvm_lumpsize;		/* # of seqs to do per PVM slave exchange */  int     pvm_nslaves;		/* number of slaves used in the PVM */  char *optname;		/* name of option found by Getopt() */  char *optarg;			/* argument found by Getopt()       */  int   optind;		        /* index in argv[]                  */  int   num_threads;            /* number of worker threads */     /***********************************************   * Parse the command line   ***********************************************/  StopwatchStart(&stopwatch);  StopwatchZero(&extrawatch);  nsample      = 5000;  fixedlen     = 0;  lenmean      = 325.;  lensd        = 200.;  seed         = (int) time ((time_t *) NULL);  histfile     = NULL;  do_pvm       = FALSE;  pvm_lumpsize = 20;		/* 20 seqs/PVM exchange: sets granularity */  mu_lumpsize  = 100;#ifdef HMMER_THREADS  num_threads  = ThreadNumber(); /* only matters if we're threaded */#else  num_threads  = 0;#endif  while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,		&optind, &optname, &optarg))    {      if      (strcmp(optname, "--cpu")      == 0) num_threads  = atoi(optarg);      else if (strcmp(optname, "--fixed")    == 0) fixedlen = atoi(optarg);      else if (strcmp(optname, "--histfile") == 0) histfile = optarg;      else if (strcmp(optname, "--mean")     == 0) lenmean  = atof(optarg);       else if (strcmp(optname, "--num")      == 0) nsample  = atoi(optarg);       else if (strcmp(optname, "--pvm")      == 0) do_pvm   = TRUE;      else if (strcmp(optname, "--sd")       == 0) lensd    = atof(optarg);       else if (strcmp(optname, "--seed")     == 0) seed     = atoi(optarg);      else if (strcmp(optname, "-h") == 0)	{	  Banner(stdout, banner);	  puts(usage);	  puts(experts);	  exit(0);	}    }  if (argc - optind != 1) Die("Incorrect number of arguments.\n%s\n", usage);  hmmfile = argv[optind++];#ifndef HMMER_PVM  if (do_pvm) Die("PVM support is not compiled into HMMER; --pvm doesn't work.");#endif#ifndef HMMER_THREADS  if (num_threads) Die("Posix threads support is not compiled into HMMER; --cpu doesn't have any effect");#endif  /***********************************************   * Open our i/o file pointers, make sure all is well   ***********************************************/				/* HMM file */  if ((hmmfp = HMMFileOpen(hmmfile, NULL)) == NULL)    Die("failed to open HMM file %s for reading.", hmmfile);				/* histogram file */  hfp = NULL;  if (histfile != NULL) {    if ((hfp = fopen(histfile, "w")) == NULL)      Die("Failed to open histogram save file %s for writing\n", histfile);  }  /* Generate calibrated HMM(s) in a tmp file in the current   * directory. When we're finished, we delete the original   * HMM file and rename() this one. That way, the worst   * effect of a catastrophic failure should be that we   * leave a tmp file lying around, but the original HMM   * file remains uncorrupted. tmpnam() doesn't work portably here,   * because it'll put the file in /tmp and we won't   * necessarily be able to rename() it from there.   */  tmpfile = MallocOrDie(strlen(hmmfile) + 5);  strcpy(tmpfile, hmmfile);  strcat(tmpfile, ".xxx");	/* could be more inventive here... */  if (FileExists(tmpfile))    Die("temporary file %s already exists; please delete it first", tmpfile);  if (hmmfp->is_binary) mode = "wb";  else                  mode = "w";   /***********************************************    * Show the banner   ***********************************************/  Banner(stdout, banner);  printf("HMM file:                 %s\n", hmmfile);  if (fixedlen)     printf("Length fixed to:          %d\n", fixedlen);  else {    printf("Length distribution mean: %.0f\n", lenmean);    printf("Length distribution s.d.: %.0f\n", lensd);  }  printf("Number of samples:        %d\n", nsample);  printf("random seed:              %d\n", seed);  printf("histogram(s) saved to:    %s\n",	 histfile != NULL ? histfile : "[not saved]");  if (do_pvm)    printf("PVM:                      ACTIVE\n");  else if (num_threads > 0)    printf("POSIX threads:            %d\n", num_threads);  printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n");  /***********************************************   * Read the HMMs one at a time, and send them off   * in probability form to one of the main loops.   * The main loop functions are responsible for    * synthesizing random sequences and returning   * a score histogram for each HMM.   ***********************************************/  nhmm = 0;  mu     = MallocOrDie(sizeof(float) * mu_lumpsize);  lambda = MallocOrDie(sizeof(float) * mu_lumpsize);  while (HMMFileRead(hmmfp, &hmm))    {      if (hmm == NULL)	Die("HMM file may be corrupt or in incorrect format; parse failed");      if (! do_pvm && num_threads == 0)	main_loop_serial(hmm, seed, nsample, lenmean, lensd, fixedlen, 			 &hist, &max);#ifdef HMMER_PVM      else if (do_pvm) {	pvm_nslaves = 0;	/* solely to silence compiler warnings */	main_loop_pvm(hmm, seed, nsample, pvm_lumpsize, 		      lenmean, lensd, fixedlen, 		      &hist, &max, &extrawatch, &pvm_nslaves);      }#endif #ifdef HMMER_THREADS      else if (num_threads > 0)	main_loop_threaded(hmm, seed, nsample, lenmean, lensd, fixedlen,			   num_threads, &hist, &max, &extrawatch);#endif      else 	Die("wait. that can't happen. I didn't do anything.");      /* Fit an EVD to the observed histogram.       * The TRUE left-censors and fits only the right slope of the histogram.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -