📄 hmmio.c
字号:
int x; char byte; for (x = 0; x < nbytes / 2; x++) { byte = swap[nbytes - x - 1]; swap[nbytes - x - 1] = swap[x]; swap[x] = byte; }}/* Function: write_bin_string() * Date: SRE, Wed Oct 29 13:49:27 1997 [TWA 721 over Canada] * * Purpose: Write a string in binary save format: an integer * for the string length (including \0), followed by * the string. */static voidwrite_bin_string(FILE *fp, char *s){ int len; if (s != NULL) { len = strlen(s) + 1; fwrite((char *) &len, sizeof(int), 1, fp); fwrite((char *) s, sizeof(char), len, fp); } else { len = 0; fwrite((char *) &len, sizeof(int), 1, fp); }}/* Function: read_bin_string() * Date: SRE, Wed Oct 29 14:03:23 1997 [TWA 721] * * Purpose: Read in a string from a binary file, where * the first integer is the length (including '\0'). * * Args: fp - FILE to read from * doswap - TRUE to byteswap * ret_s - string to read into * * Return: 0 on failure. ret_s is malloc'ed here. */ static intread_bin_string(FILE *fp, int doswap, char **ret_s){ char *s; int len; if (! fread((char *) &len, sizeof(int), 1, fp)) return 0; if (doswap) byteswap((char *)&len, sizeof(int)); s = MallocOrDie (sizeof(char) * (len)); if (! fread((char *) s, sizeof(char), len, fp)) { free(s); return 0; } *ret_s = s; return 1;}/* Function: multiline() * Date: Mon Jan 5 14:57:50 1998 [StL] * * Purpose: Given a record (like the comlog) that contains * multiple lines, print it as multiple lines with * a given prefix. e.g.: * * given: "COM ", "foo\nbar\nbaz" * print: COM foo * COM bar * COM baz * * * Used to print the command log to ASCII save files. * * Args: fp: FILE to print to * pfx: prefix for each line * s: line to break up and print; tolerates a NULL * * Return: (void) */static voidmultiline(FILE *fp, char *pfx, char *s){ char *buf; char *sptr; if (s == NULL) return; buf = Strdup(s); sptr = strtok(buf, "\n"); while (sptr != NULL) { fprintf(fp, "%s%s\n", pfx, sptr); sptr = strtok(NULL, "\n"); } free(buf);}/***************************************************************** * HMMER 1.x save file reading functions, modified from the * corpse of 1.9m. *****************************************************************//* Function: read_plan9_binhmm() * * Read old (Plan9) binary HMM save files from HMMER 1.9 and earlier. * V1.0 saved regularizer and sympvec info, which V1.1 ignores. * V1.7 and later may include optional ref, cs annotation lines. * V1.9 added name, null model. * * Returns pointer to the HMM on success; NULL * on failure. Sets global alphabet information based on * whether it reads 4 or 20 as alphabet size (don't rely * on ancient HMMER macro definitions). */static struct plan9_s *read_plan9_binhmm(FILE *fp, int version, int swapped){ struct plan9_s *hmm; int M; /* length of model */ int k; /* state number */ int x; /* symbol or transition number */ int len; /* length of variable length string */ int asize; /* alphabet size */ int atype; /* alphabet type (read but ignored) */ char abet[20]; /* alphabet (read but ignored) */ /* read M and alphabet size */ if (! fread((char *) &(M), sizeof(int), 1, fp)) return NULL; if (! fread((char *) &asize, sizeof(int), 1, fp)) return NULL; if (swapped) { byteswap((char *) &M, sizeof(int)); byteswap((char *) &asize, sizeof(int)); } /* Set global alphabet information */ if (asize == 4) atype = hmmNUCLEIC; else if (asize == 20) atype = hmmAMINO; else Die("A nonbiological alphabet size of %d; so I can't convert plan9 to plan7", asize); if (Alphabet_type == hmmNOTSETYET) SetAlphabet(atype); else if (atype != Alphabet_type) Die("Alphabet mismatch error.\nI thought we were working with %s, but tried to read a %s HMM.\n", AlphabetType2String(Alphabet_type), AlphabetType2String(atype)); /* now, create space for hmm */ if ((hmm = P9AllocHMM(M)) == NULL) Die("malloc failed for reading hmm in\n"); /* version 1.9+ files have a name */ if (version == HMMER1_9B) { if (! fread((char *) &len, sizeof(int), 1, fp)) return NULL; if (swapped) byteswap((char *) &len, sizeof(int)); hmm->name = (char *) ReallocOrDie (hmm->name, sizeof(char) * (len+1)); if (! fread((char *) hmm->name, sizeof(char), len, fp)) return NULL; hmm->name[len] = '\0'; } /* read alphabet_type and alphabet, but ignore: we've already set them */ if (! fread((char *) &atype, sizeof(int), 1, fp)) return NULL; if (! fread((char *) abet, sizeof(char), Alphabet_size, fp)) return NULL; /* skip the random symbol frequencies in V1.0 */ if (version == HMMER1_0B) fseek(fp, (long) (sizeof(float) * Alphabet_size), SEEK_CUR); /* Get optional info in V1.7 and later */ if (version == HMMER1_7B || version == HMMER1_9B) { if (! fread((char *) &(hmm->flags), sizeof(int), 1, fp)) return NULL; if (swapped) byteswap((char *) &hmm->flags, sizeof(int)); if ((hmm->flags & HMM_REF) && ! fread((char *) hmm->ref, sizeof(char), hmm->M+1, fp)) return NULL; hmm->ref[hmm->M+1] = '\0'; if ((hmm->flags & HMM_CS) && ! fread((char *) hmm->cs, sizeof(char), hmm->M+1, fp)) return NULL; hmm->cs[hmm->M+1] = '\0'; } /* Get the null model in V1.9 and later */ if (version == HMMER1_9B) { if (! fread((char *) hmm->null, sizeof(float), Alphabet_size, fp)) return NULL; if (swapped) for (x = 0; x < Alphabet_size; x++) byteswap((char *) &(hmm->null[x]), sizeof(float)); } else P9DefaultNullModel(hmm->null); /* everything else is states */ for (k = 0; k <= hmm->M; k++) { /* get match state info */ if (! fread((char *) &(hmm->mat[k].t[MATCH]), sizeof(float), 1, fp)) return NULL; if (! fread((char *) &(hmm->mat[k].t[DELETE]), sizeof(float), 1, fp)) return NULL; if (! fread((char *) &(hmm->mat[k].t[INSERT]), sizeof(float), 1, fp)) return NULL; if (! fread((char *) hmm->mat[k].p, sizeof(float), Alphabet_size, fp)) return NULL; if (swapped) { byteswap((char *) &(hmm->mat[k].t[MATCH]), sizeof(float)); byteswap((char *) &(hmm->mat[k].t[DELETE]), sizeof(float)); byteswap((char *) &(hmm->mat[k].t[INSERT]), sizeof(float)); for (x = 0; x < Alphabet_size; x++) byteswap((char *) &(hmm->mat[k].p[x]), sizeof(float)); } /* skip the regularizer info in V1.0 */ if (version == HMMER1_0B) fseek(fp, (long)(sizeof(float) * (3 + Alphabet_size)), SEEK_CUR); /* get delete state info */ if (! fread((char *) &(hmm->del[k].t[MATCH]), sizeof(float), 1, fp)) return NULL; if (! fread((char *) &(hmm->del[k].t[DELETE]), sizeof(float), 1, fp)) return NULL; if (! fread((char *) &(hmm->del[k].t[INSERT]), sizeof(float), 1, fp)) return NULL; if (swapped) { byteswap((char *) &(hmm->del[k].t[MATCH]), sizeof(float)); byteswap((char *) &(hmm->del[k].t[DELETE]), sizeof(float)); byteswap((char *) &(hmm->del[k].t[INSERT]), sizeof(float)); } /* skip the regularizer info in V1.0 */ if (version == HMMER1_0B) fseek(fp, (long)(sizeof(float) * 3), SEEK_CUR); /* get insert state info */ if (! fread((char *) &(hmm->ins[k].t[MATCH]), sizeof(float), 1, fp)) return NULL; if (! fread((char *) &(hmm->ins[k].t[DELETE]), sizeof(float), 1, fp)) return NULL; if (! fread((char *) &(hmm->ins[k].t[INSERT]), sizeof(float), 1, fp)) return NULL; if (! fread((char *) hmm->ins[k].p, sizeof(float), Alphabet_size, fp)) return NULL; if (swapped) { byteswap((char *) &(hmm->ins[k].t[MATCH]), sizeof(float)); byteswap((char *) &(hmm->ins[k].t[DELETE]), sizeof(float)); byteswap((char *) &(hmm->ins[k].t[INSERT]), sizeof(float)); for (x = 0; x < Alphabet_size; x++) byteswap((char *) &(hmm->ins[k].p[x]), sizeof(float)); } /* skip the regularizer info in V1.0 */ if (version == HMMER1_0B) fseek(fp, (long)(sizeof(float) * (3 + Alphabet_size)), SEEK_CUR); } P9Renormalize(hmm); return hmm;}/* Function: read_plan9_aschmm() * * Purpose: Read ASCII-format save files from 1.8.4 and earlier. * V1.0 contained sympvec and regularizers; these are ignored * in V1.1 and later * V1.7 and later contain ref and cs annotation. * * Args: fp - open save file, header has been read already * version - HMMER1_7F, for instance * * Returns ptr to the (allocated) new HMM on success, * or NULL on failure. */static struct plan9_s *read_plan9_aschmm(FILE *fp, int version){ struct plan9_s *hmm; int M; /* length of model */ char buffer[512]; char *statetype; char *s; int k; /* state number */ int i; /* symbol number */ int asize; /* Alphabet size */ int atype; /* Alphabet type */ /* read M from first line */ if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; if (!isdigit((int) (*s))) return NULL; M = atoi(s); /* read alphabet_length */ if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; if (!isdigit((int) (*s))) return NULL; asize = atoi(s); /* Set global alphabet information */ if (asize == 4) atype = hmmNUCLEIC; else if (asize == 20) atype = hmmAMINO; else Die("A nonbiological alphabet size of %d; so I can't convert plan9 to plan7", asize); if (Alphabet_type == hmmNOTSETYET) SetAlphabet(atype); else if (atype != Alphabet_type) Die("Alphabet mismatch error.\nI thought we were working with %s, but tried to read a %s HMM.\n", AlphabetType2String(Alphabet_type), AlphabetType2String(atype)); /* now, create space for hmm */ if ((hmm = P9AllocHMM(M)) == NULL) Die("malloc failed for reading hmm in\n"); /* read alphabet_type but ignore */ if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; if (!isdigit((int) (*s))) return NULL; /* read alphabet but ignore */ if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; /* skip the random symbol frequencies in V1.0 files. now unused */ if (version == HMMER1_0F) for (i = 0; i < Alphabet_size; i++) if (fgets(buffer, 512, fp) == NULL) return NULL; /* V1.7 has lines for whether we have valid ref, cs info */ if (version == HMMER1_7F) { if (fgets(buffer, 512, fp) == NULL) return NULL; if (strncmp(buffer, "yes", 3) == 0) hmm->flags |= HMM_REF; if (fgets(buffer, 512, fp) == NULL) return NULL; if (strncmp(buffer, "yes", 3) == 0) hmm->flags |= HMM_CS; } /* everything else is states */ while (fgets(buffer, 512, fp) != NULL) { /* get state type and index info */ if ((statetype = strtok(buffer, " \t\n")) == NULL) return NULL; if ((s = strtok((char *) NULL, " \t\n")) == NULL) return NULL; if (!isdigit((int) (*s))) return NULL; k = atoi(s); if (k < 0 || k > hmm->M+1) return NULL; if (strcmp(statetype, "###MATCH_STATE") == 0) { /* V1.7: get ref, cs info: */ /* ###MATCH_STATE 16 (x) (H) */ if (version == HMMER1_7F) { s = strtok(NULL, "\n"); while (*s != '(' && *s != '\0') s++; if (*s != '(') return NULL; hmm->ref[k] = *(s+1); while (*s != '(' && *s != '\0') s++; if (*s != '(') return NULL; hmm->cs[k] = *(s+1); } if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; hmm->mat[k].t[MATCH] = (float) atof(s); if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; hmm->mat[k].t[DELETE] = (float) atof(s); if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; hmm->mat[k].t[INSERT] = (float) atof(s); for (i = 0; i < Alphabet_size; i++) { if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; hmm->mat[k].p[i] = (float) atof(s); } /* Skip all regularizer info for V1.0 */ if (version == HMMER1_0F) for (i = 0; i < Alphabet_size + 3; i++) if (fgets(buffer, 512, fp) == NULL) return NULL; } else if (strcmp(statetype, "###INSERT_STATE") == 0) { if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; hmm->ins[k].t[MATCH] = (float) atof(s); if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; hmm->ins[k].t[DELETE] = (float) atof(s); if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; hmm->ins[k].t[INSERT] = (float) atof(s); for (i = 0; i < Alphabet_size; i++) { if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; hmm->ins[k].p[i] = (float) atof(s); } /* Skip all regularizer info in V1.0 files */ if (version == HMMER1_0F) for (i = 0; i < Alphabet_size + 3; i++) if (fgets(buffer, 512, fp) == NULL) return NULL; } else if (strcmp(statetype, "###DELETE_STATE") == 0) { if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; hmm->del[k].t[MATCH] = (float) atof(s); if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; hmm->del[k].t[DELETE] = (float) atof(s); if (fgets(buffer, 512, fp) == NULL) return NULL; if ((s = strtok(buffer, " \t\n")) == NULL) return NULL; hmm->del[k].t[INSERT] = (float) atof(s); /* Skip all regularizer info in V1.0 files*/ if (version == HMMER1_0F) for (i = 0; i < 3; i++) if (fgets(buffer, 512, fp) == NULL) return NULL; } else return NULL; } P9DefaultNullModel(hmm->null); P9Renormalize(hmm); return hmm;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -