⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hmmpfam.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 3 页
字号:
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. ************************************************************//* hmmpfam.c * SRE, Mon Aug 25 17:03:14 1997 [Denver]  * * Search a single sequence against an HMM database. * Conditionally includes PVM parallelization when HMMER_PVM is defined *    at compile time; hmmpfam --pvm runs the PVM version. *     * CVS $Id: hmmpfam.c,v 1.23 2001/06/11 14:49:47 eddy Exp $ */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <float.h>#ifdef HMMER_THREADS#include <pthread.h>#endif#ifdef HMMER_PVM#include <pvm3.h>#endif#include "squid.h"		/* general sequence analysis library    */#include "config.h"		/* compile-time configuration constants */#include "structs.h"		/* data structures, macros, #define's   */#include "funcs.h"		/* function declarations                */#include "globals.h"		/* alphabet global variables            */#include "version.h"		/* version info                         */static char banner[] = "hmmpfam - search one or more sequences against HMM database";static char usage[]  = "\Usage: hmmpfam [-options] <hmm database> <sequence file or database>\n\  Available options are:\n\   -h        : help; print brief help on version and usage\n\   -n        : nucleic acid models/sequence (default protein)\n\   -A <n>    : sets alignment output limit to <n> best domain alignments\n\   -E <x>    : sets E value cutoff (globE) to <x>; default 10\n\   -T <x>    : sets T bit threshold (globT) to <x>; no threshold by default\n\   -Z <n>    : sets Z (# models) for E-value calculation\n\";static char experts[] = "\   --acc         : use HMM accession numbers instead of names in output\n\   --compat      : make best effort to use last version's output style\n\   --cpu <n>     : run <n> threads in parallel (if threaded)\n\   --cut_ga      : use Pfam GA gathering threshold cutoffs\n\   --cut_nc      : use Pfam NC noise threshold cutoffs\n\   --cut_tc      : use Pfam TC trusted threshold cutoffs\n\   --domE <x>    : sets domain Eval cutoff (2nd threshold) to <x>\n\   --domT <x>    : sets domain T bit thresh (2nd threshold) to <x>\n\   --forward     : use the full Forward() algorithm instead of Viterbi\n\   --informat <s>: sequence file is in format <s>, not FASTA\n\   --null2       : turn OFF the post hoc second null model\n\   --pvm         : run on a PVM (Parallel Virtual Machine) cluster\n\   --xnu         : turn ON XNU filtering of query protein sequence\n\\n";static struct opt_s OPTIONS[] = {  { "-h",        TRUE,  sqdARG_NONE },   { "-n",        TRUE,  sqdARG_NONE },  { "-A",        TRUE,  sqdARG_INT  },    { "-E",        TRUE,  sqdARG_FLOAT},    { "-T",        TRUE,  sqdARG_FLOAT},    { "-Z",        TRUE,  sqdARG_INT  },  { "--acc",     FALSE, sqdARG_NONE },  { "--compat",  FALSE, sqdARG_NONE },  { "--cpu",     FALSE, sqdARG_INT  },  { "--cut_ga",  FALSE, sqdARG_NONE },  { "--cut_nc",  FALSE, sqdARG_NONE },  { "--cut_tc",  FALSE, sqdARG_NONE },  { "--domE",    FALSE, sqdARG_FLOAT},  { "--domT",    FALSE, sqdARG_FLOAT},  { "--forward", FALSE, sqdARG_NONE },  { "--informat",FALSE, sqdARG_STRING},  { "--null2",   FALSE, sqdARG_NONE },    { "--pvm",     FALSE, sqdARG_NONE },    { "--xnu",     FALSE, sqdARG_NONE },};#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))#ifdef HMMER_THREADS/* POSIX threads version: * the threads share a workpool_s structure amongst themselves, * for obtaining locks on input HMM file and output histogram and * tophits structures. */struct workpool_s {  /* Shared configuration resources that don't change:   */  char  *hmmfile;               /* name of HMM file                */  char  *dsq;                   /* digitized query sequence        */  char  *seqname;               /* sequence name                   */  int    L;			/* length of dsq                   */  int    do_forward;		/* TRUE to score using Forward     */  int    do_null2;		/* TRUE to apply null2 correction  */  struct threshold_s *thresh;   /* score/evalue cutoff information */     /* Shared (mutex-protected) input resources:   */  HMMFILE *hmmfp;               /* ptr to open HMM file           */  int nhmm;			/* number of HMMs searched so far */  pthread_mutex_t input_lock;   /* mutex for locking input        */  /* Shared (mutex-protected) output resources:   */  struct tophit_s *ghit;        /* per-sequence top hits */  struct tophit_s *dhit;        /* per-domain top hits */  pthread_mutex_t output_lock;  /* mutex for locking output */  /* Thread pool information   */  pthread_t *thread;            /* our pool of threads */  int        num_threads;       /* number of threads   */};static struct workpool_s *workpool_start(char *hmmfile, HMMFILE *hmmfp, 					 char *dsq, char *seqname, int L,					 int do_forward, int do_null2, 					 struct threshold_s *thresh,					 struct tophit_s *ghit, struct tophit_s *dhit, 					 int num_threads);static void  workpool_stop(struct workpool_s *wpool);static void  workpool_free(struct workpool_s *wpool);static void *worker_thread(void *ptr);#endif /* HMMER_THREADS */#ifdef HMMER_PVMstatic void main_loop_pvm(char *hmmfile, HMMFILE *hmmfp, char *seq, SQINFO *sqinfo, 			  struct threshold_s *thresh, int do_xnu, int do_forward, int do_null2,			  struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nhmm);#endifstatic void main_loop_serial(char *hmmfile, HMMFILE *hmmfp, char *seq, SQINFO *sqinfo, 			     struct threshold_s *thresh, int do_xnu, int do_forward, int do_null2,			     int num_threads,			     struct tophit_s *ghit, struct tophit_s *dhit, int *nhmm);intmain(int argc, char **argv) {  char            *hmmfile;	/* file to read HMMs from                  */  HMMFILE         *hmmfp;       /* opened hmmfile for reading              */  char            *seqfile;     /* file to read target sequence from       */   SQFILE          *sqfp;        /* opened seqfile for reading              */  int              format;	/* format of seqfile                       */  char              *seq;	/* target sequence                         */  SQINFO             sqinfo;	/* optional info for seq                   */  struct fancyali_s *ali;	/* an alignment for display                */  struct tophit_s   *ghit;      /* list of top hits and alignments for seq  */  struct tophit_s   *dhit;	/* list of top hits/alignments for domains  */  float   sc;			/* log-odds score in bits                  */  double  pvalue;		/* pvalue of an HMM score                  */  double  evalue;		/* evalue of an HMM score                  */  double  motherp;		/* pvalue of a whole seq HMM score         */  float   mothersc;		/* score of a whole seq parent of domain   */  int     sqfrom, sqto;		/* coordinates in sequence                 */  int     hmmfrom, hmmto;	/* coordinate in HMM                       */  char   *name, *acc, *desc;    /* hit HMM name, accession, description            */  int     hmmlen;		/* length of HMM hit                       */  int     nhmm;			/* number of HMMs searched                 */  int     domidx;		/* number of this domain                   */  int     ndom;			/* total # of domains in this seq          */  int     namewidth;		/* max width of printed HMM name           */  int     descwidth;		/* max width of printed description        */  int    Alimit;		/* A parameter limiting output alignments   */  struct threshold_s thresh;    /* contains all threshold (cutoff) info     */  char *optname;                /* name of option found by Getopt()         */  char *optarg;                 /* argument found by Getopt()               */  int   optind;                 /* index in argv[]                          */  int   do_forward;		/* TRUE to use Forward() not Viterbi()      */  int   do_nucleic;		/* TRUE to do DNA/RNA instead of protein    */  int   do_null2;		/* TRUE to adjust scores with null model #2 */  int   do_pvm;			/* TRUE to run on PVM                       */  int   do_xnu;			/* TRUE to do XNU filtering                 */  int   be_backwards;		/* TRUE to be backwards-compatible in output*/  int   show_acc;		/* TRUE to sub HMM accessions for names     */  int   i;  int   nreported;  int   num_threads;            /* number of worker threads */     /***********************************************    * Parse command line   ***********************************************/    format      = SQFILE_UNKNOWN;	/* default: autodetect format w/ Babelfish */  do_forward  = FALSE;  do_nucleic  = FALSE;  do_null2    = TRUE;  do_pvm      = FALSE;  do_xnu      = FALSE;  be_backwards= FALSE;   show_acc    = FALSE;    Alimit         = INT_MAX;	/* no limit on alignment output     */  thresh.globE   = 10.0;	/* use a reasonable Eval threshold; */  thresh.globT   = -FLT_MAX;	/*   but no bit threshold,          */  thresh.domT    = -FLT_MAX;	/*   no domain bit threshold,       */  thresh.domE    = FLT_MAX;     /*   and no domain Eval threshold.  */  thresh.autocut = CUT_NONE;	/*   and no Pfam cutoffs used.      */  thresh.Z       = 0;		/* Z not preset, so determined by # of HMMs */#ifdef HMMER_THREADS  num_threads = ThreadNumber(); /* only matters if we're threaded */#else  num_threads = 0;#endif   while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,                &optind, &optname, &optarg))  {    if      (strcmp(optname, "-n")        == 0) do_nucleic     = TRUE;     else if (strcmp(optname, "-A")        == 0) Alimit         = atoi(optarg);      else if (strcmp(optname, "-E")        == 0) thresh.globE   = atof(optarg);    else if (strcmp(optname, "-T")        == 0) thresh.globT   = atof(optarg);    else if (strcmp(optname, "-Z")        == 0) thresh.Z       = atoi(optarg);    else if (strcmp(optname, "--acc")     == 0) show_acc       = TRUE;    else if (strcmp(optname, "--compat")  == 0) be_backwards   = TRUE;    else if (strcmp(optname, "--cpu")     == 0) num_threads    = atoi(optarg);    else if (strcmp(optname, "--cut_ga")  == 0) thresh.autocut = CUT_GA;    else if (strcmp(optname, "--cut_nc")  == 0) thresh.autocut = CUT_NC;    else if (strcmp(optname, "--cut_tc")  == 0) thresh.autocut = CUT_TC;    else if (strcmp(optname, "--domE")    == 0) thresh.domE    = atof(optarg);    else if (strcmp(optname, "--domT")    == 0) thresh.domT    = atof(optarg);    else if (strcmp(optname, "--forward") == 0) do_forward     = TRUE;    else if (strcmp(optname, "--null2")   == 0) do_null2       = FALSE;    else if (strcmp(optname, "--pvm")     == 0) do_pvm         = TRUE;    else if (strcmp(optname, "--xnu")     == 0) do_xnu         = TRUE;     else if (strcmp(optname, "--informat") == 0) {      format = String2SeqfileFormat(optarg);      if (format == SQFILE_UNKNOWN) 	Die("unrecognized sequence file format \"%s\"", optarg);    }    else if (strcmp(optname, "-h")      == 0) {      Banner(stdout, banner);      puts(usage);      puts(experts);      exit(0);    }  }  if (argc - optind != 2)    Die("Incorrect number of arguments.\n%s\n", usage);  hmmfile = argv[optind++];  seqfile = argv[optind++]; #ifndef HMMER_PVM  if (do_pvm) Die("PVM support is not compiled into HMMER; --pvm doesn't work.");#endif#ifndef HMMER_THREADS  if (num_threads) Die("Posix threads support is not compiled into HMMER; --cpu doesn't have any effect");#endif  /***********************************************    * Open sequence database (must be in curr directory);   * get target sequence.   ***********************************************/  if (do_nucleic) SetAlphabet(hmmNUCLEIC);  else            SetAlphabet(hmmAMINO);  if (do_nucleic && do_xnu)     Die("You can't use -n and --xnu together: I can't xnu DNA data.");  if ((sqfp = SeqfileOpen(seqfile, format, NULL)) == NULL)    Die("Failed to open sequence file %s\n%s\n", seqfile, usage);  /***********************************************    * Open HMM database (might be in HMMERDB or current directory)   ***********************************************/  if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL)    Die("Failed to open HMM database %s\n%s", hmmfile, usage);  /***********************************************    * Show the banner   ***********************************************/  Banner(stdout, banner);  printf(   "HMM file:                 %s\n", hmmfile);  printf(   "Sequence file:            %s\n", seqfile);  if (do_pvm)    printf( "PVM:                      ACTIVE\n");  printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");  /***********************************************    * Search each HMM against each sequence   ***********************************************/  while (ReadSeq(sqfp, format, &seq, &sqinfo))     {      ghit = AllocTophits(20);   /* keeps full seq scores */      dhit = AllocTophits(20);   /* keeps domain scores   */      /* 1. Search sequence against all HMMs.       *    Significant scores+alignments accumulate in ghit, dhit.       */      if (!do_pvm)	main_loop_serial(hmmfile, hmmfp, seq, &sqinfo, 			 &thresh, do_xnu, do_forward, do_null2, num_threads,			 ghit, dhit, &nhmm);#ifdef HMMER_PVM      else if (do_pvm)	{	  SQD_DPRINTF1(("Entering PVM main loop\n"));	  main_loop_pvm(hmmfile, hmmfp, seq, &sqinfo, 			&thresh, do_xnu, do_forward, do_null2,			ghit, dhit, &nhmm);	}#endif      else Die("wait. that can't happen. I didn't do anything.");				/* set Z for good now that we're done */      if (!thresh.Z) thresh.Z = nhmm;	      /* 2. (Done searching all HMMs for this query seq; start output)       *    Report the overall sequence hits, sorted by significance.       */      if (be_backwards)	{	  printf("Query:  %s  %s\n", sqinfo.name, 		 sqinfo.flags & SQINFO_DESC ? sqinfo.desc : "");	}      else	{	  printf("\nQuery sequence: %s\n", sqinfo.name);	  printf("Accession:      %s\n", sqinfo.flags &SQINFO_ACC ? sqinfo.acc  : "[none]");	  printf("Description:    %s\n", sqinfo.flags &SQINFO_DESC? sqinfo.desc : "[none]");	}      /* We'll now sort the global hit list by evalue...        * (not score! that was bug #12. in hmmpfam, score and evalue are not        *  monotonic.)       */      FullSortTophits(ghit);	      namewidth = MAX(8, TophitsMaxName(ghit)); /* must print whole name, no truncation  */      descwidth = MAX(52-namewidth, 11);        /* may truncate desc, but avoid neg len! */      printf("\nScores for sequence family classification (score includes all domains):\n");      printf("%-*s %-*s %7s %10s %3s\n", namewidth, "Model", descwidth, "Description", "Score", "E-value", " N ");      printf("%-*s %-*s %7s %10s %3s\n", namewidth, "--------", descwidth, "-----------", "-----", "-------", "---");      for (i = 0, nreported = 0; i < ghit->num; i++)	{	  char *safedesc;	  GetRankedHit(ghit, i, 		       &pvalue, &sc, NULL, NULL,		       &name, &acc, &desc,		       NULL, NULL, NULL,           /* seq positions */		       NULL, NULL, NULL,           /* HMM positions */		       NULL, &ndom,                /* domain info   */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -