📄 hmmpfam.c
字号:
NULL); /* alignment info*/ evalue = pvalue * (double) thresh.Z; /* safedesc is a workaround for an apparent Linux printf() * bug with the *.*s format. dbmalloc crashes with a memchr() ptr out of bounds * flaw if the malloc'ed space for desc is short. The workaround * is to make sure the ptr for *.* has a big malloc space. */ if (desc != NULL && strlen(desc) < 80) { safedesc = MallocOrDie(sizeof(char) * 80); strcpy(safedesc, desc); } else safedesc = Strdup(desc); /* sneaky trick warning: * if we're using dynamic Pfam score cutoffs (GA, TC, NC), * then the list of hits is already correct and does not * need any score cutoffs. Unset the thresholds. They'll * be reset in the main_loop if we still have sequences * to process. */ if (thresh.autocut != CUT_NONE) { thresh.globE = thresh.domE = FLT_MAX; thresh.globT = thresh.domT = -FLT_MAX; } if (evalue <= thresh.globE && sc >= thresh.globT) { printf("%-*s %-*.*s %7.1f %10.2g %3d\n", namewidth, (show_acc && acc != NULL) ? acc : name, descwidth, descwidth, safedesc != NULL ? safedesc : "", sc, evalue, ndom); nreported++; } free(safedesc); } if (nreported == 0) printf("\t[no hits above thresholds]\n"); /* 3. Report domain hits (sorted on sqto coordinate) */ FullSortTophits(dhit); namewidth = MAX(8, TophitsMaxName(dhit)); /* must print whole name, no truncation */ printf("\nParsed for domains:\n"); printf("%-*s %7s %5s %5s %5s %5s %7s %8s\n", namewidth, "Model", "Domain ", "seq-f", "seq-t", "hmm-f", "hmm-t", "score", "E-value"); printf("%-*s %7s %5s %5s %5s %5s %7s %8s\n", namewidth, "--------", "-------", "-----", "-----", "-----", "-----", "-----", "-------"); for (i = 0, nreported = 0; i < dhit->num; i++) { GetRankedHit(dhit, i, &pvalue, &sc, &motherp, &mothersc, &name, &acc, NULL, &sqfrom, &sqto, NULL, &hmmfrom, &hmmto, &hmmlen, &domidx, &ndom, NULL); evalue = pvalue * (double) thresh.Z; /* Does the "mother" (complete) sequence satisfy global thresholds? */ if (motherp * (double)thresh. Z > thresh.globE || mothersc < thresh.globT) continue; else if (evalue <= thresh.domE && sc >= thresh.domT) { printf("%-*s %3d/%-3d %5d %5d %c%c %5d %5d %c%c %7.1f %8.2g\n", namewidth, (show_acc && acc != NULL) ? acc : name, domidx, ndom, sqfrom, sqto, sqfrom == 1 ? '[' : '.', sqto == sqinfo.len ? ']' : '.', hmmfrom, hmmto, hmmfrom == 1 ? '[':'.', hmmto == hmmlen ? ']' : '.', sc, evalue); nreported++; } } if (nreported == 0) printf("\t[no hits above thresholds]\n"); /* 3. Alignment output, also by domain. * dhits is already sorted and namewidth is set, from above code. * Number of displayed alignments is limited by Alimit parameter; * also by domE (evalue threshold), domT (score theshold). */ if (Alimit != 0) { printf("\nAlignments of top-scoring domains:\n"); for (i = 0, nreported = 0; i < dhit->num; i++) { if (nreported == Alimit) break; /* limit to Alimit output alignments */ GetRankedHit(dhit, i, &pvalue, &sc, &motherp, &mothersc, &name, &acc, NULL, &sqfrom, &sqto, NULL, /* seq position info */ &hmmfrom, &hmmto, &hmmlen, /* HMM position info */ &domidx, &ndom, /* domain info */ &ali); /* alignment info */ evalue = pvalue * (double) thresh.Z; if (motherp * (double) thresh.Z > thresh.globE || mothersc < thresh.globT) continue; else if (evalue <= thresh.domE && sc >= thresh.domT) { printf("%s: domain %d of %d, from %d to %d: score %.1f, E = %.2g\n", (show_acc && acc != NULL) ? acc : name, domidx, ndom, sqfrom, sqto, sc, evalue); PrintFancyAli(stdout, ali); nreported++; } } if (nreported == 0) printf("\t[no hits above thresholds]\n"); if (nreported == Alimit) printf("\t[output cut off at A = %d top alignments]\n", Alimit); } printf("//\n"); FreeSequence(seq, &sqinfo); FreeTophits(ghit); FreeTophits(dhit); HMMFileRewind(hmmfp); } /*********************************************** * Clean-up and exit. ***********************************************/ SeqfileClose(sqfp); HMMFileClose(hmmfp); SqdClean(); return 0;}/* Function: main_loop_serial() * Date: SRE, Fri Aug 7 13:46:48 1998 [St. Louis] * * Purpose: Search a sequence against an HMM database; * main loop for the serial (non-PVM, non-threads) * version. * * On return, ghit and dhit contain info for all hits * that satisfy the set thresholds. If an evalue * cutoff is used at all, the lists will be overestimated -- * because the evalue will be underestimated until * we know the final Z. (Thus the main program must recheck * thresholds before printing any results.) If only * score cutoffs are used, then the lists are correct, * and may be printed exactly as they come (after * appropriate sorting, anyway). This is especially * important for dynamic thresholding using Pfam * score cutoffs -- the main caller cannot afford to * rescan the HMM file just to get the GA/TC/NC cutoffs * back out for each HMM, and neither do I want to * burn the space to store them as I make a pass thru * Pfam. * * Args: hmmfile - name of HMM file * hmmfp - open HMM file (and at start of file) * dsq - digitized sequence * sqinfo - ptr to SQINFO optional info for dsq * thresh - score/evalue threshold information * do_xnu - TRUE to apply XNU filter to sequence * do_forward - TRUE to use Forward() scores * do_null2 - TRUE to adjust scores w/ ad hoc null2 model * num_threads- number of threads, if threaded * ghit - global hits list * dhit - domain hits list * ret_nhmm - number of HMMs searched. * * Returns: (void) */static voidmain_loop_serial(char *hmmfile, HMMFILE *hmmfp, char *seq, SQINFO *sqinfo, struct threshold_s *thresh, int do_xnu, int do_forward, int do_null2, int num_threads, struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nhmm) { char *dsq; /* digitized sequence */ int nhmm; /* number of HMMs searched */#ifdef HMMER_THREADS struct workpool_s *wpool; /* pool of worker threads */#endif struct plan7_s *hmm; /* current HMM to search with */ struct p7trace_s *tr; /* traceback of alignment */ float sc; /* an alignment score */ double pvalue; /* pvalue of an HMM score */ double evalue; /* evalue of an HMM score */ /* Prepare sequence. */ dsq = DigitizeSequence(seq, sqinfo->len); if (do_xnu && Alphabet_type == hmmAMINO) XNU(dsq, sqinfo->len);#ifdef HMMER_THREADS if (num_threads > 0) { wpool = workpool_start(hmmfile, hmmfp, dsq, sqinfo->name, sqinfo->len, do_forward, do_null2, thresh, ghit, dhit, num_threads); workpool_stop(wpool); nhmm = wpool->nhmm; workpool_free(wpool); free(dsq); *ret_nhmm = nhmm; return; } #endif /* unthreaded code: */ nhmm = 0; while (HMMFileRead(hmmfp, &hmm)) { if (hmm == NULL) Die("HMM file %s may be corrupt or in incorrect format; parse failed", hmmfile); P7Logoddsify(hmm, !(do_forward)); if (! SetAutocuts(thresh, hmm)) Die("HMM %s did not contain the GA, TC, or NC cutoffs you needed", hmm->name); /* Score sequence, do alignment (Viterbi), recover trace */ if (P7ViterbiSize(sqinfo->len, hmm->M) <= RAMLIMIT) sc = P7Viterbi(dsq, sqinfo->len, hmm, &tr); else sc = P7SmallViterbi(dsq, sqinfo->len, hmm, &tr); /* Implement do_forward; we'll override the whole_sc with a P7Forward() * calculation. * HMMER is so trace- (alignment-) dependent that this gets a bit hacky. * Some important implications: * 1) if --do_forward is selected, the domain (Viterbi) scores do not * necessarily add up to the whole sequence (Forward) score. * 2) The implementation of null2 for a Forward score is undefined, * since the null2 correction is trace-dependent. As a total hack, * we use a null2 correction derived from the whole trace * (which was the behavior of HMMER 2.1.1 and earlier, anyway). * This could put the sum of domain scores and whole seq score even * further in disagreement. * * Note that you can't move the Forward calculation into * PostprocessSignificantHit(). The Forward score will exceed the * Viterbi score, so you can't apply thresholds until you * know the Forward score. Also, since PostprocessSignificantHit() * is wrapped by a mutex in the threaded implementation, * you'd destroy all useful parallelism if PostprocessSignificantHit() * did anything compute intensive. */ if (do_forward) { sc = P7Forward(dsq, sqinfo->len, hmm, NULL); if (do_null2) sc -= TraceScoreCorrection(hmm, tr, dsq); } /* Store scores/pvalue for each HMM aligned to this sequence, overall */ pvalue = PValue(hmm, sc); evalue = thresh->Z ? (double) thresh->Z * pvalue : (double) nhmm * pvalue; if (sc >= thresh->globT && evalue <= thresh->globE) { PostprocessSignificantHit(ghit, dhit, tr, hmm, dsq, sqinfo->len, sqinfo->name, NULL, NULL, /* won't need acc or desc even if we have 'em */ do_forward, sc, do_null2, thresh, TRUE); /* TRUE -> hmmpfam mode */ } P7FreeTrace(tr); FreePlan7(hmm); nhmm++; } free(dsq); *ret_nhmm = nhmm; return;}#ifdef HMMER_PVM/***************************************************************** * PVM specific functions ****************************************************************/ /* Function: main_loop_pvm() * Date: SRE, Fri Aug 7 13:58:34 1998 [St. Louis] * * Purpose: Search a sequence against an HMM database; * main loop for the PVM version. * * Args: hmmfile - name of HMM file * hmmfp - open HMM file (and at start of file) * seq - sequence to search against * sqinfo - ptr to SQINFO optional info for dsq * thresh - score/evalue threshold settings * do_xnu - TRUE to apply XNU filter to sequence * do_forward - TRUE to use Forward() scores * do_null2 - TRUE to adjust scores w/ ad hoc null2 model * ghit - global hits list * dhit - domain hits list * nhmm - number of HMMs searched. * * Returns: (void) */static voidmain_loop_pvm(char *hmmfile, HMMFILE *hmmfp, char *seq, SQINFO *sqinfo, struct threshold_s *thresh, int do_xnu, int do_forward, int do_null2, struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nhmm){ struct plan7_s *hmm; /* HMM that was searched with */ struct p7trace_s *tr; /* a traceback structure */ char *dsq; /* digitized sequence */ float sc; /* score of an HMM match */ int master_tid; /* master's ID */ int *slave_tid; /* array of slave IDs */ int *hmmlist; /* array of hmm indexes being worked on by slaves */ int nslaves; /* number of slaves in virtual machine */ int nhmm; /* number of HMMs searched */ int slaveidx; /* index of a slave wanting work */ int slave, msg; int sent_trace; /* TRUE if slave sent us a trace */ char slavename[32]; /* name of HMM that slave actually did */ double pvalue; /* pvalue of HMM score */ int arglen; /* Sanity checks. */ if (hmmfp->ssi == NULL) Die("HMM file %s needs an SSI index to use PVM. See: hmmindex.", hmmfile); /* Prepare sequence. */ dsq = DigitizeSequence(seq, sqinfo->len); if (do_xnu && Alphabet_type == hmmAMINO) XNU(dsq, sqinfo->len); /* Initialize PVM */ master_tid = pvm_mytid();#if DEBUGLEVEL >= 1 pvm_catchout(stderr); /* catch output for debugging */#endif SQD_DPRINTF1(("Spawning slaves...\n")); PVMSpawnSlaves("hmmpfam-pvm", &slave_tid, &nslaves); hmmlist = MallocOrDie(sizeof(int) * nslaves); SQD_DPRINTF1(("Spawned a total of %d slaves...\n", nslaves)); /* Initialize the slaves */ SQD_DPRINTF1(("Broadcasting to %d slaves...\n", nslaves)); pvm_initsend(PvmDataDefault); arglen = strlen(hmmfile); pvm_pkint(&arglen, 1, 1); pvm_pkstr(hmmfile); pvm_pkint(&(sqinfo->len), 1, 1); pvm_pkstr(seq); pvm_pkfloat(&(thresh->globT), 1, 1); pvm_pkdouble(&(thresh->globE), 1, 1); pvm_pkint(&(thresh->Z), 1, 1); pvm_pkint((int *)&(thresh->autocut), 1, 1); pvm_pkint(&do_xnu, 1, 1); pvm_pkint(&do_forward, 1, 1); pvm_pkint(&do_null2, 1, 1); pvm_pkint(&Alphabet_type, 1, 1); pvm_mcast(slave_tid, nslaves, HMMPVM_INIT); SQD_DPRINTF1(("Slaves should be ready...\n"));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -