⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hmmsearch.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 3 页
字号:
      printf("\nQuery HMM:   %s\n", hmm->name);      printf("Accession:   %s\n", hmm->flags & PLAN7_ACC  ? hmm->acc  : "[none]");      printf("Description: %s\n", hmm->flags & PLAN7_DESC ? hmm->desc : "[none]");    }  if (hmm->flags & PLAN7_STATS)    printf("  [HMM has been calibrated; E-values are empirical estimates]\n");  else    printf("  [No calibration for HMM; E-values are upper bounds]\n");  FullSortTophits(ghit);  namewidth = MAX(8, TophitsMaxName(ghit)); /* cannot truncate name. */  descwidth = MAX(52-namewidth, 11);/* may truncate desc, but need strlen("Description") */  printf("\nScores for complete sequences (score includes all domains):\n");  printf("%-*s %-*s %7s %10s %3s\n", namewidth, "Sequence", descwidth, "Description", "Score", "E-value", " N ");  printf("%-*s %-*s %7s %10s %3s\n", namewidth, "--------", descwidth, "-----------", "-----", "-------", "---");  for (i = 0, nreported = 0; i < ghit->num; i++)    {      char *safedesc;      GetRankedHit(ghit, i, 		   &pvalue, &sc, NULL, NULL,		   &name, NULL, &desc,		   NULL, NULL, NULL,               /* sequence positions */		   NULL, NULL, NULL,               /* HMM positions      */		   NULL, &ndom,	                   /* domain info        */		   NULL);	                   /* alignment info     */      evalue = pvalue * (double) thresh.Z;      /* safedesc is a workaround for an apparent Linux printf()       * bug with the *.*s format. dbmalloc crashes with a memchr() ptr out of bounds       * flaw if the malloc'ed space for desc is short. The workaround       * is to make sure the ptr for *.* has a big malloc space.       */      if (desc != NULL && strlen(desc) < 80) 	{	  safedesc = MallocOrDie(sizeof(char) * 80);	  strcpy(safedesc, desc);	}      else safedesc = Strdup(desc);      if (evalue <= thresh.globE && sc >= thresh.globT) {	printf("%-*s %-*.*s %7.1f %10.2g %3d\n", 	       namewidth, name, 	       descwidth, descwidth, safedesc != NULL ? safedesc : "",	       sc, evalue, ndom);	nreported++;      }      free(safedesc);    }  if (nreported == 0) printf("\t[no hits above thresholds]\n");  /* 2. Report domain hits (also sorted on E-value) */  FullSortTophits(dhit);  namewidth = MAX(8, TophitsMaxName(dhit));  printf("\nParsed for domains:\n");  printf("%-*s %7s %5s %5s    %5s %5s    %7s %8s\n",	 namewidth, "Sequence", "Domain ", "seq-f", "seq-t", "hmm-f", "hmm-t", "score", "E-value");  printf("%-*s %7s %5s %5s    %5s %5s    %7s %8s\n",	 namewidth, "--------", "-------", "-----", "-----", "-----", "-----", "-----", "-------");        for (i = 0, nreported = 0; i < dhit->num; i++)    {      GetRankedHit(dhit, i, 		   &pvalue, &sc, &motherp, &mothersc,		   &name, NULL, NULL,		   &sqfrom, &sqto, &sqlen,            /* seq position info  */		   &hmmfrom, &hmmto, NULL,            /* HMM position info  */		   &domidx, &ndom,                    /* domain info        */		   NULL);	                      /* alignment info     */      evalue = pvalue * (double) thresh.Z;      if (motherp * (double) thresh.Z > thresh.globE || mothersc < thresh.globT) 	continue;      else if (evalue <= thresh.domE && sc >= thresh.domT) {	printf("%-*s %3d/%-3d %5d %5d %c%c %5d %5d %c%c %7.1f %8.2g\n",	       namewidth, name, 	       domidx, ndom,	       sqfrom, sqto, 	       sqfrom == 1 ? '[' : '.', sqto == sqlen ? ']' : '.',	       hmmfrom, hmmto,	       hmmfrom == 1 ? '[':'.', hmmto == hmm->M ? ']' : '.',	       sc, evalue);	nreported++;      }    }  if (nreported == 0) printf("\t[no hits above thresholds]\n");  /* 3. Alignment output, also by domain.   *    dhits is already sorted and namewidth is set, from above code.   *    Number of displayed alignments is limited by Alimit parameter;   *    also by domE (evalue threshold), domT (score theshold).   */  if (Alimit != 0)    {      printf("\nAlignments of top-scoring domains:\n");      for (i = 0, nreported = 0; i < dhit->num; i++)	{	  if (nreported == Alimit) break; /* limit to Alimit output alignments */	  GetRankedHit(dhit, i, 		       &pvalue, &sc, &motherp, &mothersc,		       &name, NULL, NULL,		       &sqfrom, &sqto, &sqlen,            /* seq position info  */		       &hmmfrom, &hmmto, NULL,            /* HMM position info  */		       &domidx, &ndom,                    /* domain info        */		       &ali);	                      /* alignment info     */	  evalue = pvalue * (double) thresh.Z;	  if (motherp * (double) thresh.Z > thresh.globE || mothersc < thresh.globT) 	    continue;	  else if (evalue <= thresh.domE && sc >= thresh.domT) 	    {	      printf("%s: domain %d of %d, from %d to %d: score %.1f, E = %.2g\n", 		     name, domidx, ndom, sqfrom, sqto, sc, evalue);	      PrintFancyAli(stdout, ali);	      nreported++;	    }	}      if (nreported == 0)      printf("\t[no hits above thresholds]\n");      if (nreported == Alimit) printf("\t[output cut off at A = %d top alignments]\n", Alimit);    }  /* 4. Histogram output */  printf("\nHistogram of all scores:\n");  PrintASCIIHistogram(stdout, histogram);  /* 5. Tophits summaries, while developing...   */  printf("\nTotal sequences searched: %d\n", nseq);  printf("\nWhole sequence top hits:\n");  TophitsReport(ghit, thresh.globE, nseq);  printf("\nDomain top hits:\n");  TophitsReport(dhit, thresh.domE, nseq);  /***********************************************    * Clean-up and exit.   ***********************************************/  FreeHistogram(histogram);  HMMFileClose(hmmfp);  SeqfileClose(sqfp);  FreeTophits(ghit);  FreeTophits(dhit);  FreePlan7(hmm);  SqdClean();  return 0;}/* Function: main_loop_serial() * Date:     SRE, Wed Sep 23 10:20:49 1998 [St. Louis] * * Purpose:  Search an HMM against a sequence database. *           main loop for the serial (non-PVM, non-threads) *           version. *            *           In:   HMM and open sqfile, plus options *           Out:  histogram, global hits list, domain hits list, nseq. * * Args:     hmm        - the HMM to search with.  *           sqfp       - open SQFILE for sequence database *           thresh     - score/evalue threshold info *           do_forward - TRUE to score using Forward()         *           do_null2   - TRUE to use ad hoc null2 score correction *           do_xnu     - TRUE to apply XNU mask *           num_threads- number of worker threads to start, or 0 *           histogram  - RETURN: score histogram *           ghit       - RETURN: ranked global scores *           dhit       - RETURN: ranked domain scores *           ret_nseq   - RETURN: actual number of seqs searched *            * Returns:  (void) */static voidmain_loop_serial(struct plan7_s *hmm, SQFILE *sqfp, struct threshold_s *thresh, int do_forward,		 int do_null2, int do_xnu, int num_threads,		 struct histogram_s *histogram, 		 struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nseq){#ifdef HMMER_THREADS  struct workpool_s *wpool;	/* pool of worker threads                  */#else  struct p7trace_s *tr;         /* traceback                               */  char   *seq;                  /* target sequence                         */  char   *dsq;		        /* digitized target sequence               */  SQINFO sqinfo;		/* optional info for seq                   */  float  sc;	        	/* score of an HMM search                  */  double pvalue;		/* pvalue of an HMM score                  */  double evalue;		/* evalue of an HMM score                  */#endif  int    nseq;			/* number of sequences searched            */ #ifdef HMMER_THREADS  wpool = workpool_start(hmm, sqfp, do_xnu, do_forward, do_null2, thresh,			 ghit, dhit, histogram, num_threads);  workpool_stop(wpool);  nseq = wpool->nseq;  workpool_free(wpool);#else /* unthreaded code: */  nseq = 0;  while (ReadSeq(sqfp, sqfp->format, &seq, &sqinfo))    {      /* Silently skip length 0 seqs.        * What, you think this doesn't occur? Welcome to genomics,        * young grasshopper.       */      if (sqinfo.len == 0) continue;      nseq++;      dsq = DigitizeSequence(seq, sqinfo.len);            if (do_xnu && Alphabet_type == hmmAMINO) XNU(dsq, sqinfo.len);            /* 1. Recover a trace by Viterbi.       */      if (P7ViterbiSize(sqinfo.len, hmm->M) <= RAMLIMIT)	sc = P7Viterbi(dsq, sqinfo.len, hmm, &tr);      else	sc = P7SmallViterbi(dsq, sqinfo.len, hmm, &tr);      /* 2. If we're using Forward scores, calculate the       *    whole sequence score; this overrides anything       *    PostprocessSignificantHit() is going to do to the per-seq score.       */      if (do_forward) {	sc  = P7Forward(dsq, sqinfo.len, hmm, NULL);	if (do_null2)   sc -= TraceScoreCorrection(hmm, tr, dsq);       }#if DEBUGLEVEL >= 2      P7PrintTrace(stdout, tr, hmm, dsq); #endif      /* 2. Store score/pvalue for global alignment; will sort on score,       *    which in hmmsearch is monotonic with E-value.        *    Keep all domains in a significant sequence hit.       *    We can only make a lower bound estimate of E-value since       *    we don't know the final value of nseq yet, so the list       *    of hits we keep in memory is >= the list we actually       *    output.        */      pvalue = PValue(hmm, sc);      evalue = thresh->Z ? (double) thresh->Z * pvalue : (double) nseq * pvalue;      if (sc >= thresh->globT && evalue <= thresh->globE) 	{	  PostprocessSignificantHit(ghit, dhit, 				    tr, hmm, dsq, sqinfo.len,				    sqinfo.name, 				    sqinfo.flags & SQINFO_ACC  ? sqinfo.acc  : NULL, 				    sqinfo.flags & SQINFO_DESC ? sqinfo.desc : NULL, 				    do_forward, sc,				    do_null2,				    thresh,				    FALSE); /* FALSE-> not hmmpfam mode, hmmsearch mode */	}      AddToHistogram(histogram, sc);      FreeSequence(seq, &sqinfo);       P7FreeTrace(tr);      free(dsq);    }#endif  *ret_nseq = nseq;  return;}#ifdef HMMER_PVM/***************************************************************** * PVM specific functions ****************************************************************/ /* Function: main_loop_pvm() * Date:     SRE, Wed Sep 23 10:36:44 1998 [St. Louis] * * Purpose:  Search an HMM against a sequence database. *           main loop for the PVM version. *            *           In:   HMM and open sqfile, plus options *           Out:  histogram, global hits list, domain hits list, nseq. * * Args:     hmm        - the HMM to search with. scoring form. *           sqfp       - open SQFILE for sequence database *           thresh     - score/evalue threshold information *           do_forward - TRUE to score using Forward()         *           do_null2   - TRUE to use ad hoc null2 score correction *           do_xnu     - TRUE to apply XNU mask *           histogram  - RETURN: score histogram *           ghit       - RETURN: ranked global scores *           dhit       - RETURN: ranked domain scores *           ret_nseq   - RETURN: actual number of seqs searched *            * Returns:  (void) */static voidmain_loop_pvm(struct plan7_s *hmm, SQFILE *sqfp, struct threshold_s *thresh, int do_forward,	      int do_null2, int do_xnu, struct histogram_s *histogram, 	      struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nseq){  char *seq;                    /* target sequence */  char *dsq;                    /* digitized target seq */  SQINFO sqinfo;                /* optional info about target seq */  int   master_tid;		/* master's (my) PVM TID */  int  *slave_tid;              /* array of slave TID's  */  int   nslaves;		/* number of slaves      */  int   code;			/* status code rec'd from a slave */  int   nseq;			/* number of sequences searched */  int   sent_trace;		/* TRUE if slave gave us a trace */  char **dsqlist;               /* remember what seqs slaves are doing */  char **namelist;              /* remember what seq names slaves are doing */  char **acclist ;              /* remember what seq accessions slaves are doing */  char **desclist;              /* remember what seq desc's slaves are doing */  int   *lenlist;               /* remember lengths of seqs slaves are doing */  int    slaveidx;		/* counter for slaves */  float  sc;			/* score of an alignment */  double pvalue;		/* P-value of a score of an alignment */  struct p7trace_s *tr;         /* Viterbi traceback of an alignment */  int    i;			/* generic counter */  /* Initialize PVM.   */  SQD_DPRINTF1(("Requesting master TID...\n"));  master_tid = pvm_mytid();#if DEBUGLEVEL >= 1  pvm_catchout(stderr);		/* catch output for debugging */#endif  SQD_DPRINTF1(("Spawning slaves...\n"));  PVMSpawnSlaves("hmmsearch-pvm", &slave_tid, &nslaves);  SQD_DPRINTF1(("Spawned a total of %d slaves...\n", nslaves));   /* Initialize the slaves by broadcast.   */  SQD_DPRINTF1(("Broadcasting to %d slaves...\n", nslaves));  pvm_initsend(PvmDataDefault);  pvm_pkfloat(&(thresh->globT), 1, 1);  pvm_pkdouble(&(thresh->globE), 1, 1);  pvm_pkint(&(thresh->Z),          1, 1);  pvm_pkint(&do_forward, 1, 1);  pvm_pkint(&do_null2,   1, 1);  pvm_pkint(&Alphabet_type, 1, 1);  PVMPackHMM(hmm);  pvm_mcast(slave_tid, nslaves, HMMPVM_INIT);  SQD_DPRINTF1(("Slaves should be ready...\n"));  /* Confirm slaves' OK status.   */  PVMConfirmSlaves(slave_tid, nslaves);  SQD_DPRINTF1(("Slaves confirm that they're ok...\n"));    /* Alloc arrays for remembering what seq each   * slave was working on.   */  namelist = MallocOrDie(sizeof(char *) * nslaves);  acclist  = MallocOrDie(sizeof(char *) * nslaves);  desclist = MallocOrDie(sizeof(char *) * nslaves);  dsqlist  = MallocOrDie(sizeof(char *) * nslaves);  lenlist  = MallocOrDie(sizeof(int) * nslaves);  /* Load the slaves.   * Give them all a sequence number and a digitized sequence   * to work on.   * A side effect of the seq number is that we assign each slave

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -