📄 hmmpostal.c
字号:
printf("done. [%.0f]\n", eff_nseq); } /* Weight the sequences (optional), */ /* Weight the sequences (optional), */ if (w_strategy == WGT_GSC || w_strategy == WGT_BLOSUM || w_strategy == WGT_VORONOI) { printf("%-40s ... ", "Weighting sequences heuristically"); fflush(stdout); if (w_strategy == WGT_GSC) GSCWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt); else if (w_strategy == WGT_BLOSUM) BlosumWeights(msa->aseq, msa->nseq, msa->alen, blosumlevel, msa->wgt); else if (w_strategy == WGT_VORONOI) VoronoiWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt); printf("done.\n"); } /* Set the effective sequence number (if do_eff is FALSE, eff_nseq * was set to nseq). */ FNorm(msa->wgt, msa->nseq); FScale(msa->wgt, msa->nseq, eff_nseq); /* Build a model architecture. * If we're not doing MD or ME, that's all we need to do. * We get an allocated, counts-based HMM back. */ printf("%-40s ... ", "Constructing model architecture"); fflush(stdout); checksum = GCGMultchecksum(msa->aseq, msa->nseq); if (c_strategy == P7_FAST_CONSTRUCTION) P7Fastmodelmaker(msa, dsq, gapmax, &hmm, &tr); else if (c_strategy == P7_HAND_CONSTRUCTION) P7Handmodelmaker(msa, dsq, &hmm, &tr); else P7Maxmodelmaker(msa, dsq, gapmax, pri, randomseq, p1, archpri, &hmm, &tr); hmm->checksum = checksum; printf("done.\n"); /* Save the count vectors if asked. Used primarily for * making the data files for training priors. */ if (cfile != NULL) { save_countvectors(cfile, hmm); } /* Record the null model in the HMM; * add prior contributions in pseudocounts and renormalize. */ Plan7SetNullModel(hmm, randomseq, p1); P7PriorifyHMM(hmm, pri); /* Model configuration, temporary. * hmmbuild assumes that it's given an alignment of single domains, * and the alignment may contain fragments. So, for the purpose of * scoring the sequences (or, optionally, MD/ME weighting), * configure the model into hmmsw mode. Later we'll * configure the model according to how the user wants to * use it. */ Plan7SWConfig(hmm, 0.5, 0.5); /* Do model-dependent "weighting" strategies. */ /* if (w_strategy == WGT_ME) { maximum_entropy(hmm, dsq, &ainfo, ainfo.nseq, eff_nseq, pri, tr); } */ /* Give the model a name; by default, the name of the alignment file * without any filename extension (i.e. "globins.slx" becomes "globins" */ if (name == NULL) name = FileTail(seqfile, TRUE); Plan7SetName(hmm, name); Plan7ComlogAppend(hmm, argc, argv); Plan7SetCtime(hmm); hmm->nseq = msa->nseq; free(name); /* Configure the model for chosen algorithm */ switch (cfg_strategy) { case P7_BASE_CONFIG: Plan7GlobalConfig(hmm); break; case P7_SW_CONFIG: Plan7SWConfig(hmm, swentry, swexit); break; case P7_LS_CONFIG: Plan7LSConfig(hmm); break; case P7_FS_CONFIG: Plan7FSConfig(hmm, swentry, swexit); break; default: Die("bogus configuration choice"); } } /* Optionally save new HMM to disk: open a file for appending or writing. */ P7Logoddsify(hmm, TRUE); if (hmmfile) save_model(hmm, hmmfile, do_append, do_binary); /* Display posterior probabilities for each sequence, re-aligning them to the model if user requested that */ for (idx = 0; idx < msa->nseq; idx++) { printf ("#\n# Sequence %d: %s\n#\n", idx + 1, msa->sqname[idx]); len = DealignedLength(msa->aseq[idx]); if (P7ViterbiSize(len, hmm->M) * 2 > RAMLIMIT) Die("insufficient memory"); (void) P7Forward (dsq[idx], len, hmm, &forward_mx); (void) P7Backward (dsq[idx],len, hmm, &backward_mx); if (r_strategy == REALIGN_VITERBI) { if (tr[idx]) P7FreeTrace (tr[idx]); if (P7ViterbiSize(len, hmm->M) * 3 <= RAMLIMIT) (void) P7Viterbi(dsq[idx], len, hmm, &(tr[idx])); else (void) P7SmallViterbi(dsq[idx], len, hmm, &(tr[idx])); } else if (r_strategy == REALIGN_OPTACC) { if (tr[idx]) P7FreeTrace (tr[idx]); if (P7ViterbiSize(len, hmm->M) * 4 > RAMLIMIT) Die("insufficient memory"); posterior_mx = AllocPlan7Matrix (len + 1, hmm->M, 0, 0, 0, 0); P7EmitterPosterior (len, hmm, forward_mx, backward_mx, posterior_mx); optacc_mx = AllocPlan7Matrix (len + 1, hmm->M, 0, 0, 0, 0); (void) P7FillOptimalAccuracy (len, hmm->M, posterior_mx, optacc_mx, &(tr[idx])); FreePlan7Matrix (posterior_mx); FreePlan7Matrix (optacc_mx); } posterior_mx = AllocPlan7Matrix (len + 1, hmm->M, 0, 0, 0, 0); P7EmitterPosterior (len, hmm, forward_mx, backward_mx, posterior_mx); Postcode(len, posterior_mx, tr[idx]); /* DisplayPlan7Matrix(dsq[idx], len, hmm, posterior_mx); */ /* DisplayPlan7PostAlign (len, hmm, forward_mx, backward_mx, &(tr[idx]), 1); */ FreePlan7Matrix (backward_mx); FreePlan7Matrix (forward_mx); } /* the annotated alignment may be resaved */ if (align_ofile != NULL) { MSA *new_msa; SQINFO *sqinfo; sqinfo = MSAToSqinfo(msa); new_msa = P7Traces2Alignment(dsq, sqinfo, msa->wgt, msa->nseq, hmm->M, tr, FALSE); if ((fp = fopen(align_ofile, "w")) == NULL) { Warn("Failed to open alignment resave file %s; using stdout instead", align_ofile); fp = stdout; } WriteStockholm(fp, new_msa); MSAFree(new_msa); for (idx = 0; idx < msa->nseq; idx++) FreeSequence(NULL, &(sqinfo[idx])); free(sqinfo); if (fp != stdout) fclose(fp); } /* Verbose output; show scores for each sequence */ /* if (verbose) print_all_scores(stdout, hmm, dsq, msq, tr); */ /* Clean up and exit */ for (idx = 0; idx < msa->nseq; idx++) P7FreeTrace(tr[idx]); free(tr); FreePlan7(hmm); P7FreePrior(pri); Free2DArray((void **) dsq, msa->nseq); MSAFree(msa); SqdClean(); return 0;}/* Function: save_model() * * Purpose: Save the new model to a file. * * Args: hmm - model to save * hmmfile - file to save to (if NULL, use stdout) * do_append - TRUE to append to file * do_binary - TRUE to write a binary file * * Return: (void) */ static voidsave_model(struct plan7_s *hmm, char *hmmfile, int do_append, int do_binary){ FILE *fp; if (hmmfile == NULL) fp = stdout; else if (do_append) { /* check that it looks like an HMM file */#ifdef REMOVED /* This code induces an unresolved Linux/SGI NFS bug! */ if (FileExists(hmmfile)) { HMMFILE *hmmfp; hmmfp = HMMFileOpen(hmmfile, NULL); if (hmmfp == NULL) { Warn("%s not an HMM file; can't append to it; using stdout instead", hmmfile); fp = stdout; puts(""); /* do a newline before stdout HMM starts */ } else { HMMFileClose(hmmfp); } }#endif if ((fp = fopen(hmmfile, "a")) == NULL) { Warn("hey, where'd your HMM file go? Using stdout instead."); fp = stdout; puts(""); /* do a newline before stdout HMM starts */ } } else { if ((fp = fopen(hmmfile, "w")) == NULL) { Warn("Failed to open HMM save file %s; using stdout instead", hmmfile); fp = stdout; puts(""); /* do a newline before stdout HMM starts */ } } if (do_binary) WriteBinHMM(fp, hmm); else WriteAscHMM(fp, hmm); if (fp != stdout) fclose(fp); return;}/* Function: print_all_scores() * * Purpose: For each training sequence, print its score under * the final model. * * Args: fp - where to print the output (usu. stdout) * hmm - newly constructed HMM, with prob's. * ainfo- info with aseq * dsq - digitized unaligned training sequences. * nseq - number of training sequences * tr - array of tracebacks * * Return: (void) */static voidprint_all_scores(FILE *fp, struct plan7_s *hmm, AINFO *ainfo, char **dsq, int nseq, struct p7trace_s **tr){ int idx; /* counter for sequences */ /* make sure model scores are ready */ P7Logoddsify(hmm, TRUE); /* header */ fputs("**\n", fp); fputs("Individual training sequence scores:\n", fp); /* score for each sequence */ for (idx = 0; idx < nseq; idx++) { fprintf(fp, "%7.2f %-12s %s\n", P7TraceScore(hmm, dsq[idx], tr[idx]), ainfo->sqinfo[idx].name, (ainfo->sqinfo[idx].flags & SQINFO_DESC) ? ainfo->sqinfo[idx].desc : ""); P7PrintTrace(fp, tr[idx], hmm, dsq[idx]); } fputs("\n", fp);}/* Function: save_countvectors() * * Purpose: Save emission/transition count vectors to a file. * Used for gathering the data on which to train a * prior (e.g. mixture Dirichlet, etc.) * * The format of the file is one vector per line: * M <f> <f> ...: 20 match emission counts in order AC..WY. * I <f> <f> ...: 20 insert emission counts in order AC..WY. * T <f> <f> ...: 7 transition counts in order TMM, TMI, TMD, * TIM, TII, TDM, TDD. (see structs.h) * * Args: cfile - counts file to make * hmm - counts-based HMM */static voidsave_countvectors(char *cfile, struct plan7_s *hmm){ FILE *fp; int k, x; if ((fp = fopen(cfile, "w")) == NULL) Die("failed to open count vector file %s for writing", cfile); /* match emission vectors */ for (k = 1; k <= hmm->M; k++) { fputs("M ", fp); for (x = 0; x < Alphabet_size; x++) fprintf(fp, "%.2f ", hmm->mat[k][x]); fputs("\n", fp); } /* insert emission vectors */ for (k = 1; k < hmm->M; k++) { fputs("I ", fp); for (x = 0; x < Alphabet_size; x++) fprintf(fp, "%.2f ", hmm->ins[k][x]); fputs("\n", fp); } /* transition vectors */ for (k = 1; k < hmm->M; k++) { fputs("T ", fp); for (x = 0; x < 7; x++) fprintf(fp, "%.2f ", hmm->t[k][x]); fputs("\n", fp); } fclose(fp);}/* Function: position_average_score()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -