⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hmmpfam.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 3 页
字号:
		       NULL);	                   /* alignment info*/	  evalue = pvalue * (double) thresh.Z;	  /* safedesc is a workaround for an apparent Linux printf()	   * bug with the *.*s format. dbmalloc crashes with a memchr() ptr out of bounds	   * flaw if the malloc'ed space for desc is short. The workaround           * is to make sure the ptr for *.* has a big malloc space.	   */	  if (desc != NULL && strlen(desc) < 80) 	    {	      safedesc = MallocOrDie(sizeof(char) * 80);	      strcpy(safedesc, desc);	    }	  else safedesc = Strdup(desc);	  /* sneaky trick warning:	   * if we're using dynamic Pfam score cutoffs (GA, TC, NC),	   * then the list of hits is already correct and does not	   * need any score cutoffs. Unset the thresholds. They'll           * be reset in the main_loop if we still have sequences           * to process.	   */	  if (thresh.autocut != CUT_NONE) {	    thresh.globE = thresh.domE = FLT_MAX;	    thresh.globT = thresh.domT = -FLT_MAX;	  }	  if (evalue <= thresh.globE && sc >= thresh.globT) 	    {	      printf("%-*s %-*.*s %7.1f %10.2g %3d\n", 		     namewidth, 		     (show_acc && acc != NULL) ?  acc : name,		     descwidth, descwidth, safedesc != NULL ? safedesc : "",		     sc, evalue, ndom);	      nreported++;	    }	  free(safedesc);	}      if (nreported == 0) printf("\t[no hits above thresholds]\n");      /* 3. Report domain hits (sorted on sqto coordinate)       */      FullSortTophits(dhit);      namewidth = MAX(8, TophitsMaxName(dhit)); /* must print whole name, no truncation  */      printf("\nParsed for domains:\n");      printf("%-*s %7s %5s %5s    %5s %5s    %7s %8s\n",	     namewidth, "Model", "Domain ", "seq-f", "seq-t", "hmm-f", "hmm-t", "score", "E-value");      printf("%-*s %7s %5s %5s    %5s %5s    %7s %8s\n",	     namewidth, "--------", "-------", "-----", "-----", "-----", "-----", "-----", "-------");            for (i = 0, nreported = 0; i < dhit->num; i++)	{	  GetRankedHit(dhit, i, 		       &pvalue, &sc, &motherp, &mothersc,		       &name, &acc, NULL,		       &sqfrom, &sqto, NULL, 		       &hmmfrom, &hmmto, &hmmlen, 		       &domidx, &ndom,		       NULL);	  evalue = pvalue * (double) thresh.Z;	  				/* Does the "mother" (complete) sequence satisfy global thresholds? */	  if (motherp * (double)thresh. Z > thresh.globE || mothersc < thresh.globT) 	    continue;	  else if (evalue <= thresh.domE && sc >= thresh.domT) {	    printf("%-*s %3d/%-3d %5d %5d %c%c %5d %5d %c%c %7.1f %8.2g\n",		   namewidth, 		   (show_acc && acc != NULL) ?  acc : name,		   domidx, ndom,		   sqfrom, sqto, 		   sqfrom == 1 ? '[' : '.', sqto == sqinfo.len ? ']' : '.',		   hmmfrom, hmmto,		   hmmfrom == 1 ? '[':'.',  hmmto == hmmlen ? ']' : '.',		   sc, evalue);	    nreported++;	  }	}      if (nreported == 0) printf("\t[no hits above thresholds]\n");            /* 3. Alignment output, also by domain.       *    dhits is already sorted and namewidth is set, from above code.       *    Number of displayed alignments is limited by Alimit parameter;       *    also by domE (evalue threshold), domT (score theshold).       */      if (Alimit != 0)	{	  printf("\nAlignments of top-scoring domains:\n");	  for (i = 0, nreported = 0; i < dhit->num; i++)	    {	      if (nreported == Alimit) break; /* limit to Alimit output alignments */	      GetRankedHit(dhit, i, 			   &pvalue, &sc, &motherp, &mothersc,			   &name, &acc, NULL,			   &sqfrom, &sqto, NULL,              /* seq position info  */			   &hmmfrom, &hmmto, &hmmlen,         /* HMM position info  */			   &domidx, &ndom,                    /* domain info        */			   &ali);	                      /* alignment info     */	      evalue = pvalue * (double) thresh.Z;	      if (motherp * (double) thresh.Z > thresh.globE || mothersc < thresh.globT) 		continue;	      else if (evalue <= thresh.domE && sc >= thresh.domT) 		{		  printf("%s: domain %d of %d, from %d to %d: score %.1f, E = %.2g\n", 			 (show_acc && acc != NULL) ?  acc : name,			 domidx, ndom, sqfrom, sqto, sc, evalue);		  PrintFancyAli(stdout, ali);		  nreported++;		}	    }	  if (nreported == 0)      printf("\t[no hits above thresholds]\n");	  if (nreported == Alimit) printf("\t[output cut off at A = %d top alignments]\n", Alimit);	}      printf("//\n");      FreeSequence(seq, &sqinfo);       FreeTophits(ghit);      FreeTophits(dhit);      HMMFileRewind(hmmfp);    }  /***********************************************    * Clean-up and exit.   ***********************************************/  SeqfileClose(sqfp);  HMMFileClose(hmmfp);  SqdClean();  return 0;}/* Function: main_loop_serial() * Date:     SRE, Fri Aug  7 13:46:48 1998 [St. Louis] * * Purpose:  Search a sequence against an HMM database; *           main loop for the serial (non-PVM, non-threads) *           version.  *            *           On return, ghit and dhit contain info for all hits *           that satisfy the set thresholds. If an evalue *           cutoff is used at all, the lists will be overestimated -- *           because the evalue will be underestimated until *           we know the final Z. (Thus the main program must recheck *           thresholds before printing any results.) If only *           score cutoffs are used, then the lists are correct, *           and may be printed exactly as they come (after  *           appropriate sorting, anyway). This is especially *           important for dynamic thresholding using Pfam *           score cutoffs -- the main caller cannot afford to *           rescan the HMM file just to get the GA/TC/NC cutoffs *           back out for each HMM, and neither do I want to *           burn the space to store them as I make a pass thru *           Pfam. * * Args:     hmmfile - name of HMM file *           hmmfp   - open HMM file (and at start of file) *           dsq     - digitized sequence *           sqinfo  - ptr to SQINFO optional info for dsq *           thresh  - score/evalue threshold information *           do_xnu     - TRUE to apply XNU filter to sequence *           do_forward - TRUE to use Forward() scores *           do_null2   - TRUE to adjust scores w/ ad hoc null2 model *           num_threads- number of threads, if threaded *           ghit    - global hits list *           dhit    - domain hits list *           ret_nhmm    - number of HMMs searched. * * Returns:  (void) */static voidmain_loop_serial(char *hmmfile, HMMFILE *hmmfp, char *seq, SQINFO *sqinfo, 		 struct threshold_s *thresh, int do_xnu, int do_forward, int do_null2,		 int num_threads,		 struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nhmm) {  char              *dsq;       /* digitized sequence                      */  int     nhmm;			/* number of HMMs searched                 */#ifdef HMMER_THREADS  struct workpool_s *wpool;     /* pool of worker threads  */#endif  struct plan7_s    *hmm;       /* current HMM to search with              */   struct p7trace_s  *tr;	/* traceback of alignment                  */  float   sc;                   /* an alignment score                      */   double  pvalue;		/* pvalue of an HMM score                  */  double  evalue;		/* evalue of an HMM score                  */  /* Prepare sequence.   */  dsq = DigitizeSequence(seq, sqinfo->len);  if (do_xnu && Alphabet_type == hmmAMINO) XNU(dsq, sqinfo->len);#ifdef HMMER_THREADS  if (num_threads > 0) {    wpool = workpool_start(hmmfile, hmmfp, dsq, sqinfo->name, sqinfo->len, 			   do_forward, do_null2, thresh,			   ghit, dhit, num_threads);    workpool_stop(wpool);    nhmm = wpool->nhmm;    workpool_free(wpool);    free(dsq);    *ret_nhmm = nhmm;    return;  } #endif				/* unthreaded code: */  nhmm = 0;  while (HMMFileRead(hmmfp, &hmm)) {    if (hmm == NULL)       Die("HMM file %s may be corrupt or in incorrect format; parse failed", hmmfile);    P7Logoddsify(hmm, !(do_forward));    if (! SetAutocuts(thresh, hmm))       Die("HMM %s did not contain the GA, TC, or NC cutoffs you needed",	  hmm->name);    /* Score sequence, do alignment (Viterbi), recover trace     */    if (P7ViterbiSize(sqinfo->len, hmm->M) <= RAMLIMIT)      sc = P7Viterbi(dsq, sqinfo->len, hmm, &tr);    else      sc = P7SmallViterbi(dsq, sqinfo->len, hmm, &tr);    /* Implement do_forward; we'll override the whole_sc with a P7Forward()     * calculation.     * HMMER is so trace- (alignment-) dependent that this gets a bit hacky.     * Some important implications:      *   1) if --do_forward is selected, the domain (Viterbi) scores do not     *      necessarily add up to the whole sequence (Forward) score.     *   2) The implementation of null2 for a Forward score is undefined,     *      since the null2 correction is trace-dependent. As a total hack,      *      we use a null2 correction derived from the whole trace     *      (which was the behavior of HMMER 2.1.1 and earlier, anyway).     *      This could put the sum of domain scores and whole seq score even     *      further in disagreement.      *      * Note that you can't move the Forward calculation into      * PostprocessSignificantHit(). The Forward score will exceed the     * Viterbi score, so you can't apply thresholds until you     * know the Forward score. Also, since PostprocessSignificantHit()     * is wrapped by a mutex in the threaded implementation,     * you'd destroy all useful parallelism if PostprocessSignificantHit()     * did anything compute intensive.      */    if (do_forward) {      sc = P7Forward(dsq, sqinfo->len, hmm, NULL);      if (do_null2) sc -= TraceScoreCorrection(hmm, tr, dsq);    }	    /* Store scores/pvalue for each HMM aligned to this sequence, overall     */    pvalue = PValue(hmm, sc);    evalue = thresh->Z ? (double) thresh->Z * pvalue : (double) nhmm * pvalue;    if (sc >= thresh->globT && evalue <= thresh->globE) {      PostprocessSignificantHit(ghit, dhit, 				tr, hmm, dsq, sqinfo->len, 				sqinfo->name, NULL, NULL, /* won't need acc or desc even if we have 'em */				do_forward, sc,				do_null2,				thresh,				TRUE); /* TRUE -> hmmpfam mode */    }    P7FreeTrace(tr);    FreePlan7(hmm);    nhmm++;  }  free(dsq);  *ret_nhmm = nhmm;  return;}#ifdef HMMER_PVM/***************************************************************** * PVM specific functions ****************************************************************/ /* Function: main_loop_pvm() * Date:     SRE, Fri Aug  7 13:58:34 1998 [St. Louis] * * Purpose:  Search a sequence against an HMM database; *           main loop for the PVM version. * * Args:     hmmfile - name of HMM file *           hmmfp   - open HMM file (and at start of file) *           seq     - sequence to search against *           sqinfo  - ptr to SQINFO optional info for dsq *           thresh  - score/evalue threshold settings *           do_xnu     - TRUE to apply XNU filter to sequence *           do_forward - TRUE to use Forward() scores *           do_null2   - TRUE to adjust scores w/ ad hoc null2 model *           ghit    - global hits list *           dhit    - domain hits list *           nhmm    - number of HMMs searched. * * Returns:  (void) */static voidmain_loop_pvm(char *hmmfile, HMMFILE *hmmfp, char *seq, SQINFO *sqinfo, 	      struct threshold_s *thresh, int do_xnu, int do_forward, int do_null2,	      struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nhmm){  struct plan7_s   *hmm;        /* HMM that was searched with */  struct p7trace_s *tr;         /* a traceback structure */  char  *dsq;                   /* digitized sequence */   float  sc;			/* score of an HMM match */  int    master_tid;		/* master's ID */  int   *slave_tid;             /* array of slave IDs */  int   *hmmlist;               /* array of hmm indexes being worked on by slaves */  int    nslaves;	       	/* number of slaves in virtual machine   */  int    nhmm;			/* number of HMMs searched */  int    slaveidx;		/* index of a slave wanting work         */  int    slave, msg;  int    sent_trace;		/* TRUE if slave sent us a trace */  char   slavename[32];		/* name of HMM that slave actually did */  double pvalue;		/* pvalue of HMM score */  int    arglen;  /* Sanity checks.   */  if (hmmfp->ssi == NULL)    Die("HMM file %s needs an SSI index to use PVM. See: hmmindex.", hmmfile);  /* Prepare sequence.   */  dsq = DigitizeSequence(seq, sqinfo->len);  if (do_xnu && Alphabet_type == hmmAMINO) XNU(dsq, sqinfo->len);  /* Initialize PVM   */  master_tid = pvm_mytid();#if DEBUGLEVEL >= 1    pvm_catchout(stderr);		/* catch output for debugging */#endif  SQD_DPRINTF1(("Spawning slaves...\n"));  PVMSpawnSlaves("hmmpfam-pvm", &slave_tid, &nslaves);  hmmlist   = MallocOrDie(sizeof(int) * nslaves);  SQD_DPRINTF1(("Spawned a total of %d slaves...\n", nslaves));  /* Initialize the slaves   */  SQD_DPRINTF1(("Broadcasting to %d slaves...\n", nslaves));  pvm_initsend(PvmDataDefault);  arglen = strlen(hmmfile);  pvm_pkint(&arglen, 1, 1);  pvm_pkstr(hmmfile);  pvm_pkint(&(sqinfo->len), 1, 1);  pvm_pkstr(seq);  pvm_pkfloat(&(thresh->globT), 1, 1);  pvm_pkdouble(&(thresh->globE), 1, 1);  pvm_pkint(&(thresh->Z), 1, 1);  pvm_pkint((int *)&(thresh->autocut), 1, 1);  pvm_pkint(&do_xnu, 1, 1);  pvm_pkint(&do_forward, 1, 1);  pvm_pkint(&do_null2, 1, 1);  pvm_pkint(&Alphabet_type, 1, 1);  pvm_mcast(slave_tid, nslaves, HMMPVM_INIT);  SQD_DPRINTF1(("Slaves should be ready...\n"));

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -