📄 plan7.c
字号:
/************************************************************ * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved * * This source code is distributed under the terms of the * GNU General Public License. See the files COPYING and LICENSE * for details. ************************************************************//* plan7.c * SRE, Sat Nov 16 14:19:56 1996 * * Support for Plan 7 HMM data structure, plan7_s. */#include <stdio.h>#include <string.h>#include <ctype.h>#include <time.h>#include "funcs.h"#include "config.h"#include "structs.h"#include "squid.h"/* Functions: AllocPlan7(), AllocPlan7Shell(), AllocPlan7Body(), FreePlan7() * * Purpose: Allocate or free a Plan7 HMM structure. * Can either allocate all at one (AllocPlan7()) or * in two steps (AllocPlan7Shell(), AllocPlan7Body()). * The two step method is used in hmmio.c where we start * parsing the header of an HMM file but don't * see the size of the model 'til partway thru the header. */struct plan7_s *AllocPlan7(int M) { struct plan7_s *hmm; hmm = AllocPlan7Shell(); AllocPlan7Body(hmm, M); return hmm;} struct plan7_s *AllocPlan7Shell(void) { struct plan7_s *hmm; hmm = (struct plan7_s *) MallocOrDie (sizeof(struct plan7_s)); hmm->M = 0; hmm->name = NULL; hmm->acc = NULL; hmm->desc = NULL; hmm->rf = NULL; hmm->cs = NULL; hmm->ca = NULL; hmm->comlog = NULL; hmm->nseq = 0; hmm->ctime = NULL; hmm->map = NULL; hmm->checksum = 0; hmm->tpri = NULL; hmm->mpri = NULL; hmm->ipri = NULL; hmm->ga1 = hmm->ga2 = 0.0; hmm->tc1 = hmm->tc2 = 0.0; hmm->nc1 = hmm->nc2 = 0.0; hmm->t = NULL; hmm->tsc = NULL; hmm->mat = NULL; hmm->ins = NULL; hmm->msc = NULL; hmm->isc = NULL; hmm->begin = NULL; hmm->bsc = NULL; hmm->end = NULL; hmm->esc = NULL; /* DNA translation is not enabled by default */ hmm->dnam = NULL; hmm->dnai = NULL; hmm->dna2 = -INFTY; hmm->dna4 = -INFTY; /* statistical parameters set to innocuous empty values */ hmm->mu = 0.; hmm->lambda = 0.; hmm->flags = 0; return hmm;} voidAllocPlan7Body(struct plan7_s *hmm, int M) { int k, x; hmm->M = M; hmm->rf = MallocOrDie ((M+2) * sizeof(char)); hmm->cs = MallocOrDie ((M+2) * sizeof(char)); hmm->ca = MallocOrDie ((M+2) * sizeof(char)); hmm->map = MallocOrDie ((M+1) * sizeof(int)); hmm->t = MallocOrDie (M * sizeof(float *)); hmm->tsc = MallocOrDie (M * sizeof(int *)); hmm->mat = MallocOrDie ((M+1) * sizeof(float *)); hmm->ins = MallocOrDie (M * sizeof(float *)); hmm->msc = MallocOrDie (MAXCODE * sizeof(int *)); hmm->isc = MallocOrDie (MAXCODE * sizeof(int *)); hmm->t[0] = MallocOrDie ((7*M) * sizeof(float)); hmm->tsc[0] = MallocOrDie ((7*M) * sizeof(int)); hmm->mat[0] = MallocOrDie ((MAXABET*(M+1)) * sizeof(float)); hmm->ins[0] = MallocOrDie ((MAXABET*M) * sizeof(float)); hmm->msc[0] = MallocOrDie ((MAXCODE*(M+1)) * sizeof(int)); hmm->isc[0] = MallocOrDie ((MAXCODE*M) * sizeof(int)); /* note allocation strategy for important 2D arrays -- trying * to keep locality as much as possible, cache efficiency etc. */ for (k = 1; k <= M; k++) { hmm->mat[k] = hmm->mat[0] + k * MAXABET; if (k < M) { hmm->ins[k] = hmm->ins[0] + k * MAXABET; hmm->t[k] = hmm->t[0] + k * 7; hmm->tsc[k] = hmm->tsc[0] + k * 7; } } for (x = 1; x < MAXCODE; x++) { hmm->msc[x] = hmm->msc[0] + x * (M+1); hmm->isc[x] = hmm->isc[0] + x * M; } /* tsc[0] is used as a boundary condition sometimes [Viterbi()], * so set to -inf always. */ for (x = 0; x < 7; x++) hmm->tsc[0][x] = -INFTY; hmm->begin = MallocOrDie ((M+1) * sizeof(float)); hmm->bsc = MallocOrDie ((M+1) * sizeof(int)); hmm->end = MallocOrDie ((M+1) * sizeof(float)); hmm->esc = MallocOrDie ((M+1) * sizeof(int)); return;} voidFreePlan7(struct plan7_s *hmm){ if (hmm->name != NULL) free(hmm->name); if (hmm->desc != NULL) free(hmm->desc); if (hmm->rf != NULL) free(hmm->rf); if (hmm->cs != NULL) free(hmm->cs); if (hmm->ca != NULL) free(hmm->ca); if (hmm->comlog != NULL) free(hmm->comlog); if (hmm->ctime != NULL) free(hmm->ctime); if (hmm->map != NULL) free(hmm->map); if (hmm->tpri != NULL) free(hmm->tpri); if (hmm->mpri != NULL) free(hmm->mpri); if (hmm->ipri != NULL) free(hmm->ipri); if (hmm->bsc != NULL) free(hmm->bsc); if (hmm->begin != NULL) free(hmm->begin); if (hmm->esc != NULL) free(hmm->esc); if (hmm->end != NULL) free(hmm->end); if (hmm->msc != NULL) free(hmm->msc[0]); if (hmm->mat != NULL) free(hmm->mat[0]); if (hmm->isc != NULL) free(hmm->isc[0]); if (hmm->ins != NULL) free(hmm->ins[0]); if (hmm->tsc != NULL) free(hmm->tsc[0]); if (hmm->t != NULL) free(hmm->t[0]); if (hmm->msc != NULL) free(hmm->msc); if (hmm->mat != NULL) free(hmm->mat); if (hmm->isc != NULL) free(hmm->isc); if (hmm->ins != NULL) free(hmm->ins); if (hmm->tsc != NULL) free(hmm->tsc); if (hmm->t != NULL) free(hmm->t); if (hmm->dnam != NULL) free(hmm->dnam); if (hmm->dnai != NULL) free(hmm->dnai); free(hmm);}/* Function: ZeroPlan7() * * Purpose: Zeros the counts/probabilities fields in a model. * Leaves null model untouched. */voidZeroPlan7(struct plan7_s *hmm){ int k; for (k = 1; k < hmm->M; k++) { FSet(hmm->t[k], 7, 0.); FSet(hmm->mat[k], Alphabet_size, 0.); FSet(hmm->ins[k], Alphabet_size, 0.); } FSet(hmm->mat[hmm->M], Alphabet_size, 0.); hmm->tbd1 = 0.; FSet(hmm->begin+1, hmm->M, 0.); FSet(hmm->end+1, hmm->M, 0.); for (k = 0; k < 4; k++) FSet(hmm->xt[k], 2, 0.); hmm->flags &= ~PLAN7_HASBITS; /* invalidates scores */ hmm->flags &= ~PLAN7_HASPROB; /* invalidates probabilities */}/* Function: Plan7SetName() * * Purpose: Change the name of a Plan7 HMM. Convenience function. * * Note: Trailing whitespace and \n's are chopped. */voidPlan7SetName(struct plan7_s *hmm, char *name){ if (hmm->name != NULL) free(hmm->name); hmm->name = Strdup(name); StringChop(hmm->name);}/* Function: Plan7SetAccession() * * Purpose: Change the accession number of a Plan7 HMM. Convenience function. * * Note: Trailing whitespace and \n's are chopped. */voidPlan7SetAccession(struct plan7_s *hmm, char *acc){ if (hmm->acc != NULL) free(hmm->acc); hmm->acc = Strdup(acc); StringChop(hmm->acc); hmm->flags |= PLAN7_ACC;}/* Function: Plan7SetDescription() * * Purpose: Change the description line of a Plan7 HMM. Convenience function. * * Note: Trailing whitespace and \n's are chopped. */voidPlan7SetDescription(struct plan7_s *hmm, char *desc){ if (hmm->desc != NULL) free(hmm->desc); hmm->desc = Strdup(desc); StringChop(hmm->desc); hmm->flags |= PLAN7_DESC;}/* Function: Plan7ComlogAppend() * Date: SRE, Wed Oct 29 09:57:30 1997 [TWA 721 over Greenland] * * Purpose: Concatenate command line options and append to the * command line log. */voidPlan7ComlogAppend(struct plan7_s *hmm, int argc, char **argv){ int len; int i; /* figure out length of command line, w/ spaces and \n */ len = argc; for (i = 0; i < argc; i++) len += strlen(argv[i]); /* allocate */ if (hmm->comlog != NULL) { len += strlen(hmm->comlog); hmm->comlog = ReallocOrDie(hmm->comlog, sizeof(char)* (len+1)); } else { hmm->comlog = MallocOrDie(sizeof(char)* (len+1)); *(hmm->comlog) = '\0'; /* need this to make strcat work */ } /* append */ strcat(hmm->comlog, "\n"); for (i = 0; i < argc; i++) { strcat(hmm->comlog, argv[i]); if (i < argc-1) strcat(hmm->comlog, " "); }}/* Function: Plan7SetCtime() * Date: SRE, Wed Oct 29 11:53:19 1997 [TWA 721 over the Atlantic] * * Purpose: Set the ctime field in a new HMM to the current time. */voidPlan7SetCtime(struct plan7_s *hmm){ time_t date = time(NULL); if (hmm->ctime != NULL) free(hmm->ctime); hmm->ctime = Strdup(ctime(&date)); StringChop(hmm->ctime);}/* Function: Plan7SetNullModel() * * Purpose: Set the null model section of an HMM. * Convenience function. */voidPlan7SetNullModel(struct plan7_s *hmm, float null[MAXABET], float p1){ int x; for (x = 0; x < Alphabet_size; x++) hmm->null[x] = null[x]; hmm->p1 = p1;}/* Function: P7Logoddsify() * * Purpose: Take an HMM with valid probabilities, and * fill in the integer log-odds score section of the model. * * Notes on log-odds scores: * type of parameter probability score * ----------------- ----------- ------ * any emission p_x log_2 p_x/null_x * N,J,C /assume/ p_x = null_x so /always/ score zero. * transition to emitters t_x log_2 t_x/p1 * (M,I; N,C; J) * NN and CC loops are often equal to p1, so usu. score zero. * C->T transition t_x log_2 t_x/p2 * often zero, usu. C->T = p2. * all other transitions t_x log_2 t_x * (no null model counterpart, so null prob is 1) * * Notes on entry/exit scores, B->M and M->E: * The probability form model includes delete states 1 and M. * these states are removed from a search form model to * prevent B->D...D->E->J->B mute cycles, which would complicate * dynamic programming algorithms. The data-independent
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -