⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hmmsearch.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 3 页
字号:
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. ************************************************************//* hmmsearch.c * SRE, Tue Jan  7 17:19:20 1997 [St. Louis] * * Search a sequence database with a profile HMM. * Conditionally includes PVM parallelization when HMMER_PVM is defined *    at compile time; hmmsearch --pvm runs the PVM version. * * CVS $Id: hmmsearch.c,v 1.34 2001/06/11 14:49:47 eddy Exp $ */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <float.h>#ifdef HMMER_THREADS#include <pthread.h>#endif#ifdef HMMER_PVM#include <pvm3.h>#endif#include "squid.h"		/* general sequence analysis library    */#include "config.h"		/* compile-time configuration constants */#include "structs.h"		/* data structures, macros, #define's   */#include "funcs.h"		/* function declarations                */#include "globals.h"		/* alphabet global variables            */#include "version.h"		/* version info                         */static char banner[] = "hmmsearch - search a sequence database with a profile HMM";static char usage[]  = "\Usage: hmmsearch [-options] <hmmfile> <sequence file or database>\n\  Available options are:\n\   -h        : help; print brief help on version and usage\n\   -A <n>    : sets alignment output limit to <n> best domain alignments\n\   -E <x>    : sets E value cutoff (globE) to <= x\n\   -T <x>    : sets T bit threshold (globT) to >= x\n\   -Z <n>    : sets Z (# seqs) for E-value calculation\n\";static char experts[] = "\   --compat  : make best effort to use last version's output style\n\   --cpu <n> : run <n> threads in parallel (if threaded)\n\   --cut_ga  : use Pfam GA gathering threshold cutoffs\n\   --cut_nc  : use Pfam NC noise threshold cutoffs\n\   --cut_tc  : use Pfam TC trusted threshold cutoffs\n\   --domE <x>: sets domain Eval cutoff (2nd threshold) to <= x\n\   --domT <x>: sets domain T bit thresh (2nd threshold) to >= x\n\   --forward : use the full Forward() algorithm instead of Viterbi\n\   --informat <s>: sequence file is in format <s>, not FASTA\n\   --null2   : turn OFF the post hoc second null model\n\   --pvm     : run on a Parallel Virtual Machine (PVM)\n\   --xnu     : turn ON XNU filtering of target protein sequences\n\";static struct opt_s OPTIONS[] = {  { "-h",        TRUE,  sqdARG_NONE },   { "-A",        TRUE,  sqdARG_INT  },    { "-E",        TRUE,  sqdARG_FLOAT},    { "-T",        TRUE,  sqdARG_FLOAT},    { "-Z",        TRUE,  sqdARG_INT  },  { "--compat",  FALSE, sqdARG_NONE },  { "--cpu",     FALSE, sqdARG_INT  },  { "--cut_ga",  FALSE, sqdARG_NONE },  { "--cut_nc",  FALSE, sqdARG_NONE },  { "--cut_tc",  FALSE, sqdARG_NONE },  { "--domE",    FALSE, sqdARG_FLOAT},  { "--domT",    FALSE, sqdARG_FLOAT},  { "--forward", FALSE, sqdARG_NONE },    { "--informat",FALSE, sqdARG_STRING},  { "--null2",   FALSE, sqdARG_NONE },  { "--pvm",     FALSE, sqdARG_NONE },  { "--xnu",     FALSE, sqdARG_NONE },};#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))#ifdef HMMER_THREADS/* POSIX threads version: * the threads share a workpool_s structure amongst themselves, * for obtaining locks on input HMM file and output histogram and * tophits structures. */struct workpool_s {  /* Shared configuration resources which don't change:   */  struct plan7_s *hmm;		/* HMM to search with              */  int    do_xnu;		/* TRUE to apply XNU filter  */  int    do_forward;		/* TRUE to score using Forward     */  int    do_null2;		/* TRUE to apply null2 ad hoc correction */  struct threshold_s *thresh;   /* score/evalue threshold info     */    /* Shared (mutex-protected) input resources:   */  SQFILE *sqfp;                 /* ptr to open sequence file      */  int nseq;			/* number of seqs searched so far */  pthread_mutex_t input_lock;   /* mutex for locking input        */  /* Shared (mutex-protected) output resources:   */  struct tophit_s *ghit;        /* per-sequence top hits */  struct tophit_s *dhit;        /* per-domain top hits */  struct histogram_s *hist;     /* histogram of scores */  pthread_mutex_t output_lock;  /* mutex for locking output */  /* Thread pool information   */  pthread_t *thread;            /* our pool of threads */  int        num_threads;       /* number of threads   */};static struct workpool_s *workpool_start(struct plan7_s *hmm, SQFILE *sqfp, 					 int do_xnu, int do_forward, int do_null2, 					 struct threshold_s *thresh,					 struct tophit_s *ghit, struct tophit_s *dhit, 					 struct histogram_s *hist, int num_threads);static void  workpool_stop(struct workpool_s *wpool);static void  workpool_free(struct workpool_s *wpool);static void *worker_thread(void *ptr);#endif /* HMMER_THREADS */static void main_loop_serial(struct plan7_s *hmm, SQFILE *sqfp, struct threshold_s *thresh, int do_forward,			     int do_null2, int do_xnu, int num_threads,			     struct histogram_s *histogram, struct tophit_s *ghit, 			     struct tophit_s *dhit, int *ret_nseq);#ifdef HMMER_PVMstatic void main_loop_pvm(struct plan7_s *hmm, SQFILE *sqfp, struct threshold_s *thresh, int do_forward,			  int do_null2, int do_xnu, struct histogram_s *histogram, 			  struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nseq);#endifintmain(int argc, char **argv) {  char    *hmmfile;	        /* file to read HMM(s) from                */  HMMFILE *hmmfp;               /* opened hmmfile for reading              */  char    *seqfile;             /* file to read target sequence(s) from    */   SQFILE   *sqfp;               /* opened seqfile for reading              */  int       format;	        /* format of seqfile                       */  int       i;   struct plan7_s  *hmm;         /* HMM to search with                      */   struct histogram_s *histogram;/* histogram of all scores                 */  struct fancyali_s *ali;       /* displayed alignment info                */   struct tophit_s   *ghit;      /* list of top hits for whole sequences    */  struct tophit_s   *dhit;	/* list of top hits for domains            */  float     sc;	        	/* score of an HMM search                  */  double  pvalue;		/* pvalue of an HMM score                  */  double  evalue;		/* evalue of an HMM score                  */  double  motherp;		/* pvalue of a whole seq HMM score         */  float   mothersc;		/* score of a whole seq parent of domain   */  int     sqfrom, sqto;		/* coordinates in sequence                 */  int     hmmfrom, hmmto;	/* coordinate in HMM                       */  char   *name, *acc, *desc;    /* hit sequence name and description       */  int     sqlen;		/* length of seq that was hit              */  int     nseq;			/* number of sequences searched            */  int     Z;			/* # of seqs for purposes of E-val calc    */  int     domidx;		/* number of this domain                   */  int     ndom;			/* total # of domains in this seq          */  int     namewidth;		/* max width of sequence name              */  int     descwidth;		/* max width of description */  int     nreported;		/* # of hits reported in a list            */  int    Alimit;		/* A parameter limiting output alignments   */  struct threshold_s thresh;    /* contains all threshold (cutoff) info     */  char *optname;                /* name of option found by Getopt()         */  char *optarg;                 /* argument found by Getopt()               */  int   optind;                 /* index in argv[]                          */  int   do_null2;		/* TRUE to adjust scores with null model #2 */  int   do_forward;		/* TRUE to use Forward() not Viterbi()      */  int   do_xnu;			/* TRUE to filter sequences thru XNU        */  int   do_pvm;			/* TRUE to run on Parallel Virtual Machine  */  int   be_backwards;		/* TRUE to be backwards-compatible in output*/  int   num_threads;		/* number of worker threads                 */  /***********************************************    * Parse command line   ***********************************************/    format      = SQFILE_UNKNOWN;	/* default: autodetect seq file format w/ Babelfish */  do_forward  = FALSE;  do_null2    = TRUE;  do_xnu      = FALSE;  do_pvm      = FALSE;    Z           = 0;  be_backwards= FALSE;   Alimit         = INT_MAX;	/* no limit on alignment output       */  thresh.globE   = 10.0;	/* use a reasonable Eval threshold;   */  thresh.globT   = -FLT_MAX;	/*   but no bit threshold,            */  thresh.domT    = -FLT_MAX;	/*   no domain bit threshold,         */  thresh.domE    = FLT_MAX;     /*   and no domain Eval threshold.    */  thresh.autocut = CUT_NONE;	/*   and no Pfam cutoffs used         */  thresh.Z       = 0;           /* Z not preset; use actual # of seqs */#ifdef HMMER_THREADS  num_threads = ThreadNumber(); /* only matters if we're threaded */#else  num_threads = 0;#endif   while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,                &optind, &optname, &optarg))  {    if      (strcmp(optname, "-A") == 0)        Alimit         = atoi(optarg);      else if (strcmp(optname, "-E") == 0)        thresh.globE   = atof(optarg);    else if (strcmp(optname, "-T") == 0)        thresh.globT   = atof(optarg);    else if (strcmp(optname, "-Z") == 0)        thresh.Z       = atoi(optarg);    else if (strcmp(optname, "--compat")  == 0) be_backwards   = TRUE;    else if (strcmp(optname, "--cpu")     == 0) num_threads    = atoi(optarg);    else if (strcmp(optname, "--cut_ga")  == 0) thresh.autocut = CUT_GA;    else if (strcmp(optname, "--cut_nc")  == 0) thresh.autocut = CUT_NC;    else if (strcmp(optname, "--cut_tc")  == 0) thresh.autocut = CUT_TC;    else if (strcmp(optname, "--domE")    == 0) thresh.domE    = atof(optarg);    else if (strcmp(optname, "--domT")    == 0) thresh.domT    = atof(optarg);    else if (strcmp(optname, "--forward") == 0) do_forward     = TRUE;    else if (strcmp(optname, "--null2")   == 0) do_null2       = FALSE;    else if (strcmp(optname, "--pvm")     == 0) do_pvm         = TRUE;    else if (strcmp(optname, "--xnu")     == 0) do_xnu         = TRUE;    else if (strcmp(optname, "--informat") == 0) {      format = String2SeqfileFormat(optarg);      if (format == SQFILE_UNKNOWN) 	Die("unrecognized sequence file format \"%s\"", optarg);    }    else if (strcmp(optname, "-h") == 0) {      Banner(stdout, banner);      puts(usage);      puts(experts);      exit(0);    }  }  if (argc - optind != 2)    Die("Incorrect number of arguments.\n%s\n", usage);  hmmfile = argv[optind++];  seqfile = argv[optind++];   #ifndef HMMER_PVM  if (do_pvm) Die("PVM support is not compiled into your HMMER software; --pvm doesn't work.");#endif#ifndef HMMER_THREADS  if (num_threads) Die("Posix threads support is not compiled into HMMER; --cpu doesn't have any effect");#endif  /***********************************************    * Open sequence database (might be in BLASTDB or current directory)   ***********************************************/  if ((sqfp = SeqfileOpen(seqfile, format, "BLASTDB")) == NULL)    Die("Failed to open sequence database file %s\n%s\n", seqfile, usage);  /***********************************************    * Open HMM file (might be in HMMERDB or current directory).   * Read a single HMM from it. (Config HMM, if necessary).   * Alphabet globals are set by reading the HMM.   ***********************************************/  if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL)    Die("Failed to open HMM file %s\n%s", hmmfile, usage);  if (!HMMFileRead(hmmfp, &hmm))     Die("Failed to read any HMMs from %s\n", hmmfile);  if (hmm == NULL)     Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile);  P7Logoddsify(hmm, !do_forward);  if (do_xnu && Alphabet_type == hmmNUCLEIC)     Die("The HMM is a DNA model, and you can't use the --xnu filter on DNA data");  /*****************************************************************   * Set up optional Pfam score thresholds.    * Can do this before starting any searches, since we'll only use 1 HMM.   *****************************************************************/   if (! SetAutocuts(&thresh, hmm))     Die("HMM %s did not contain the GA, TC, or NC cutoffs you needed",	hmm->name);  /***********************************************    * Show the banner   ***********************************************/  Banner(stdout, banner);  printf(   "HMM file:                   %s [%s]\n", hmmfile, hmm->name);  printf(   "Sequence database:          %s\n", seqfile);   if (do_pvm)    printf( "PVM:                        ACTIVE\n");  printf(   "per-sequence score cutoff:  ");  if (thresh.globT == -FLT_MAX) printf("[none]\n");  else  {    printf(">= %.1f", thresh.globT);    if      (thresh.autocut == CUT_GA) printf(" [GA1]\n");    else if (thresh.autocut == CUT_NC) printf(" [NC1]\n");    else if (thresh.autocut == CUT_TC) printf(" [TC1]\n");    else                               printf("\n");  }  printf(   "per-domain score cutoff:    ");  if (thresh.domT == -FLT_MAX) printf("[none]\n");  else  {    printf(">= %.1f", thresh.domT);    if      (thresh.autocut == CUT_GA) printf(" [GA2]\n");    else if (thresh.autocut == CUT_NC) printf(" [NC2]\n");    else if (thresh.autocut == CUT_TC) printf(" [TC2]\n");    else                               printf("\n");  }  printf(   "per-sequence Eval cutoff:   ");  if (thresh.globE == FLT_MAX) printf("[none]\n");  else                  printf("<= %-10.2g\n", thresh.globE);      printf(   "per-domain Eval cutoff:     ");  if (thresh.domE == FLT_MAX) printf("[none]\n");  else                 printf("<= %10.2g\n", thresh.domE);  printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");  /***********************************************    * Search HMM against each sequence   ***********************************************/				/* set up structures for storing output  */  histogram = AllocHistogram(-200, 200, 100);  /* keeps full histogram */  ghit      = AllocTophits(200);         /* per-seq hits: 200=lumpsize */  dhit      = AllocTophits(200);         /* domain hits:  200=lumpsize */  if (! do_pvm)    main_loop_serial(hmm, sqfp, &thresh, do_forward, do_null2, do_xnu, num_threads,		     histogram, ghit, dhit, &nseq);#ifdef HMMER_PVM  else    main_loop_pvm(hmm, sqfp, &thresh, do_forward, do_null2, do_xnu, 		  histogram, ghit, dhit, &nseq);#endif  /***********************************************    * Process hit lists, produce text output   ***********************************************/  /* Set the theoretical EVD curve in our histogram using    * calibration in the HMM, if available.    */  if (hmm->flags & PLAN7_STATS)    ExtremeValueSetHistogram(histogram, hmm->mu, hmm->lambda, 			     histogram->lowscore, histogram->highscore, 0);  if (!thresh.Z) thresh.Z = nseq;		/* set Z for good now that we're done. */  /* Format and report our output    */  /* 1. Report overall sequence hits (sorted on E-value) */  if (be_backwards)     {      printf("\nQuery HMM: %s|%s|%s\n", 	     hmm->name, 	     hmm->flags & PLAN7_ACC  ? hmm->acc  : "",	     hmm->flags & PLAN7_DESC ? hmm->desc : "");    }  else     {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -