📄 hmmio.c
字号:
/* Print header */ fprintf(fp, "HMM "); for (x = 0; x < Alphabet_size; x++) fprintf(fp, " %c ", Alphabet[x]); fprintf(fp, "\n"); fprintf(fp, " %6s %6s %6s %6s %6s %6s %6s %6s %6s\n", "m->m", "m->i", "m->d", "i->m", "i->i", "d->m", "d->d", "b->m", "m->e"); /* Print HMM parameters (main section of the save file) */ fprintf(fp, " %6s %6s ", prob2ascii(1-hmm->tbd1, 1.0), "*"); fprintf(fp, "%6s\n", prob2ascii(hmm->tbd1, 1.0)); for (k = 1; k <= hmm->M; k++) { /* Line 1: k, match emissions, map */ fprintf(fp, " %5d ", k); for (x = 0; x < Alphabet_size; x++) fprintf(fp, "%6s ", prob2ascii(hmm->mat[k][x], hmm->null[x])); if (hmm->flags & PLAN7_MAP) fprintf(fp, "%5d", hmm->map[k]); fputs("\n", fp); /* Line 2: RF and insert emissions */ fprintf(fp, " %5c ", hmm->flags & PLAN7_RF ? hmm->rf[k] : '-'); for (x = 0; x < Alphabet_size; x++) fprintf(fp, "%6s ", (k < hmm->M) ? prob2ascii(hmm->ins[k][x], hmm->null[x]) : "*"); fputs("\n", fp); /* Line 3: CS and transition probs */ fprintf(fp, " %5c ", hmm->flags & PLAN7_CS ? hmm->cs[k] : '-'); for (ts = 0; ts < 7; ts++) fprintf(fp, "%6s ", (k < hmm->M) ? prob2ascii(hmm->t[k][ts], 1.0) : "*"); fprintf(fp, "%6s ", prob2ascii(hmm->begin[k], 1.0)); fprintf(fp, "%6s ", prob2ascii(hmm->end[k], 1.0)); fputs("\n", fp); } fputs("//\n", fp);}/* Function: WriteBinHMM() * * Purpose: Write an HMM in binary format. */voidWriteBinHMM(FILE *fp, struct plan7_s *hmm){ int k; /* ye olde magic number */ fwrite((char *) &(v20magic), sizeof(unsigned int), 1, fp); /* header section */ fwrite((char *) &(hmm->flags), sizeof(int), 1, fp); write_bin_string(fp, hmm->name); if (hmm->flags & PLAN7_ACC) write_bin_string(fp, hmm->acc); if (hmm->flags & PLAN7_DESC) write_bin_string(fp, hmm->desc); fwrite((char *) &(hmm->M), sizeof(int), 1, fp); fwrite((char *) &(Alphabet_type), sizeof(int), 1, fp); if (hmm->flags & PLAN7_RF) fwrite((char *) hmm->rf, sizeof(char), hmm->M+1, fp); if (hmm->flags & PLAN7_CS) fwrite((char *) hmm->cs, sizeof(char), hmm->M+1, fp); if (hmm->flags & PLAN7_MAP) fwrite((char *) hmm->map, sizeof(int), hmm->M+1, fp); write_bin_string(fp, hmm->comlog); fwrite((char *) &(hmm->nseq), sizeof(int), 1, fp); write_bin_string(fp, hmm->ctime); fwrite((char *) &(hmm->checksum), sizeof(int), 1, fp); if (hmm->flags & PLAN7_GA) { fwrite((char *) &(hmm->ga1), sizeof(float), 1, fp); fwrite((char *) &(hmm->ga2), sizeof(float), 1, fp); } if (hmm->flags & PLAN7_TC) { fwrite((char *) &(hmm->tc1), sizeof(float), 1, fp); fwrite((char *) &(hmm->tc2), sizeof(float), 1, fp); } if (hmm->flags & PLAN7_NC) { fwrite((char *) &(hmm->nc1), sizeof(float), 1, fp); fwrite((char *) &(hmm->nc2), sizeof(float), 1, fp); } /* Specials */ for (k = 0; k < 4; k++) fwrite((char *) hmm->xt[k], sizeof(float), 2, fp); /* Null model */ fwrite((char *)&(hmm->p1), sizeof(float), 1, fp); fwrite((char *) hmm->null, sizeof(float), Alphabet_size, fp); /* EVD stats */ if (hmm->flags & PLAN7_STATS) { fwrite((char *) &(hmm->mu), sizeof(float), 1, fp); fwrite((char *) &(hmm->lambda), sizeof(float), 1, fp); } /* entry/exit probabilities */ fwrite((char *)&(hmm->tbd1),sizeof(float), 1, fp); fwrite((char *) hmm->begin, sizeof(float), hmm->M+1, fp); fwrite((char *) hmm->end, sizeof(float), hmm->M+1, fp); /* main model */ for (k = 1; k <= hmm->M; k++) fwrite((char *) hmm->mat[k], sizeof(float), Alphabet_size, fp); for (k = 1; k < hmm->M; k++) fwrite((char *) hmm->ins[k], sizeof(float), Alphabet_size, fp); for (k = 1; k < hmm->M; k++) fwrite((char *) hmm->t[k], sizeof(float), 7, fp);}/***************************************************************** * * Internal: HMM file parsers for various releases of HMMER. * * read_{asc,bin}xxhmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) * * Upon return, *ret_hmm is an allocated Plan7 HMM. * Return 0 if no more HMMs in the file (normal). * Return 1 and *ret_hmm = something if we got an HMM (normal) * Return 1 if an error occurs (meaning "I tried to * read something...") and *ret_hmm == NULL (meaning * "...but it wasn't an HMM"). I know, this is a funny * way to handle errors. * *****************************************************************/static intread_asc20hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm) { struct plan7_s *hmm; char buffer[512]; char *s; int M; float p; int k, x; int atype; /* alphabet type, hmmAMINO or hmmNUCLEIC */ hmm = NULL; if (feof(hmmfp->f) || fgets(buffer, 512, hmmfp->f) == NULL) return 0; if (strncmp(buffer, "HMMER2.0", 8) != 0) goto FAILURE; /* Get the header information: tag/value pairs in any order, * ignore unknown tags, stop when "HMM" is reached (signaling * start of main model) */ hmm = AllocPlan7Shell(); M = -1; while (fgets(buffer, 512, hmmfp->f) != NULL) { if (strncmp(buffer, "NAME ", 5) == 0) Plan7SetName(hmm, buffer+6); else if (strncmp(buffer, "ACC ", 5) == 0) Plan7SetAccession(hmm, buffer+6); else if (strncmp(buffer, "DESC ", 5) == 0) Plan7SetDescription(hmm, buffer+6); else if (strncmp(buffer, "LENG ", 5) == 0) M = atoi(buffer+6); else if (strncmp(buffer, "NSEQ ", 5) == 0) hmm->nseq = atoi(buffer+6); else if (strncmp(buffer, "ALPH ", 5) == 0) { /* Alphabet type */ s2upper(buffer+6); if (strncmp(buffer+6, "AMINO", 5) == 0) atype = hmmAMINO; else if (strncmp(buffer+6, "NUCLEIC", 7) == 0) atype = hmmNUCLEIC; else goto FAILURE; if (Alphabet_type == hmmNOTSETYET) SetAlphabet(atype); else if (atype != Alphabet_type) Die("Alphabet mismatch error.\nI thought we were working with %s, but tried to read a %s HMM.\n", AlphabetType2String(Alphabet_type), AlphabetType2String(atype)); } else if (strncmp(buffer, "RF ", 5) == 0) { /* Reference annotation present? */ if (sre_toupper(*(buffer+6)) == 'Y') hmm->flags |= PLAN7_RF; } else if (strncmp(buffer, "CS ", 5) == 0) { /* Consensus annotation present? */ if (sre_toupper(*(buffer+6)) == 'Y') hmm->flags |= PLAN7_CS; } else if (strncmp(buffer, "MAP ", 5) == 0) { /* Map annotation present? */ if (sre_toupper(*(buffer+6)) == 'Y') hmm->flags |= PLAN7_MAP; } else if (strncmp(buffer, "COM ", 5) == 0) { /* Command line log */ StringChop(buffer+6); if (hmm->comlog == NULL) hmm->comlog = Strdup(buffer+6); else { hmm->comlog = ReallocOrDie(hmm->comlog, sizeof(char *) * (strlen(hmm->comlog) + 1 + strlen(buffer+6))); strcat(hmm->comlog, "\n"); strcat(hmm->comlog, buffer+6); } } else if (strncmp(buffer, "DATE ", 5) == 0) { /* Date file created */ StringChop(buffer+6); hmm->ctime= Strdup(buffer+6); } else if (strncmp(buffer, "GA ", 5) == 0) { if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; hmm->ga1 = atof(s); if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; hmm->ga2 = atof(s); hmm->flags |= PLAN7_GA; } else if (strncmp(buffer, "TC ", 5) == 0) { if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; hmm->tc1 = atof(s); if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; hmm->tc2 = atof(s); hmm->flags |= PLAN7_TC; } else if (strncmp(buffer, "NC ", 5) == 0) { if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; hmm->nc1 = atof(s); if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; hmm->nc2 = atof(s); hmm->flags |= PLAN7_NC; } else if (strncmp(buffer, "XT ", 5) == 0) { /* Special transition section */ if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; for (k = 0; k < 4; k++) for (x = 0; x < 2; x++) { if (s == NULL) goto FAILURE; hmm->xt[k][x] = ascii2prob(s, 1.0); s = strtok(NULL, " \t\n"); } } else if (strncmp(buffer, "NULT ", 5) == 0) { /* Null model transitions */ if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; hmm->p1 = ascii2prob(s, 1.); if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; hmm->p1 = hmm->p1 / (hmm->p1 + ascii2prob(s, 1.0)); } else if (strncmp(buffer, "NULE ", 5) == 0) { /* Null model emissions */ if (Alphabet_type == hmmNOTSETYET) Die("ALPH must precede NULE in HMM save files"); s = strtok(buffer+6, " \t\n"); for (x = 0; x < Alphabet_size; x++) { if (s == NULL) goto FAILURE; hmm->null[x] = ascii2prob(s, 1./(float)Alphabet_size); s = strtok(NULL, " \t\n"); } } else if (strncmp(buffer, "EVD ", 5) == 0) { /* EVD parameters */ hmm->flags |= PLAN7_STATS; if ((s = strtok(buffer+6, " \t\n")) == NULL) goto FAILURE; hmm->mu = atof(s); if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; hmm->lambda = atof(s); } else if (strncmp(buffer, "CKSUM", 5) == 0) hmm->checksum = atoi(buffer+6); else if (strncmp(buffer, "HMM ", 5) == 0) break; } /* partial check for mandatory fields */ if (feof(hmmfp->f)) goto FAILURE; if (M < 1) goto FAILURE; if (hmm->name == NULL) goto FAILURE; if (Alphabet_type == hmmNOTSETYET) goto FAILURE; /* Main model section. Read as integer log odds, convert * to probabilities */ AllocPlan7Body(hmm, M); /* skip an annotation line */ if (fgets(buffer, 512, hmmfp->f) == NULL) goto FAILURE; /* parse tbd1 line */ if (fgets(buffer, 512, hmmfp->f) == NULL) goto FAILURE; if ((s = strtok(buffer, " \t\n")) == NULL) goto FAILURE; p = ascii2prob(s, 1.0); if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; hmm->tbd1 = ascii2prob(s, 1.0); hmm->tbd1 = hmm->tbd1 / (p + hmm->tbd1); /* main model */ for (k = 1; k <= hmm->M; k++) { /* Line 1: k, match emissions, map */ if (fgets(buffer, 512, hmmfp->f) == NULL) goto FAILURE; if ((s = strtok(buffer, " \t\n")) == NULL) goto FAILURE; if (atoi(s) != k) goto FAILURE; for (x = 0; x < Alphabet_size; x++) { if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; hmm->mat[k][x] = ascii2prob(s, hmm->null[x]); } if (hmm->flags & PLAN7_MAP) { if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; hmm->map[k] = atoi(s); } /* Line 2: RF and insert emissions */ if (fgets(buffer, 512, hmmfp->f) == NULL) goto FAILURE; if ((s = strtok(buffer, " \t\n")) == NULL) goto FAILURE; if (hmm->flags & PLAN7_RF) hmm->rf[k] = *s; if (k < hmm->M) { for (x = 0; x < Alphabet_size; x++) { if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; hmm->ins[k][x] = ascii2prob(s, hmm->null[x]); } } /* Line 3: CS and transitions */ if (fgets(buffer, 512, hmmfp->f) == NULL) goto FAILURE; if ((s = strtok(buffer, " \t\n")) == NULL) goto FAILURE; if (hmm->flags & PLAN7_CS) hmm->cs[k] = *s; for (x = 0; x < 7; x++) { if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; if (k < hmm->M) hmm->t[k][x] = ascii2prob(s, 1.0); } if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; hmm->begin[k] = ascii2prob(s, 1.0); if ((s = strtok(NULL, " \t\n")) == NULL) goto FAILURE; hmm->end[k] = ascii2prob(s, 1.0); } /* end loop over main model */ /* Advance to record separator */ while (fgets(buffer, 512, hmmfp->f) != NULL) if (strncmp(buffer, "//", 2) == 0) break; Plan7Renormalize(hmm); /* Paracel reported bug 6/11/99 */ /* Set flags and return */ hmm->flags |= PLAN7_HASPROB; /* probabilities are valid */ hmm->flags &= ~PLAN7_HASBITS; /* scores are not valid */ *ret_hmm = hmm; return 1;FAILURE: if (hmm != NULL) FreePlan7(hmm); *ret_hmm = NULL; return 1;}static intread_bin20hmm(HMMFILE *hmmfp, struct plan7_s **ret_hmm){ struct plan7_s *hmm; int k,x; int type; unsigned int magic; hmm = NULL; /* Header section */ if (feof(hmmfp->f)) return 0; if (! fread((char *) &magic, sizeof(unsigned int), 1, hmmfp->f)) return 0; if (hmmfp->byteswap) byteswap((char *)&magic, sizeof(unsigned int)); if (magic != v20magic) goto FAILURE; /* allocate HMM shell for header info */ hmm = AllocPlan7Shell(); /* flags */ if (! fread((char *) &(hmm->flags), sizeof(int), 1, hmmfp->f)) goto FAILURE; if (hmmfp->byteswap) byteswap((char *)&(hmm->flags), sizeof(int)); /* name */ if (! read_bin_string(hmmfp->f, hmmfp->byteswap, &(hmm->name))) goto FAILURE; /* optional accession */ if ((hmm->flags & PLAN7_ACC) && ! read_bin_string(hmmfp->f, hmmfp->byteswap, &(hmm->acc))) goto FAILURE; /* optional description */ if ((hmm->flags & PLAN7_DESC) && ! read_bin_string(hmmfp->f, hmmfp->byteswap, &(hmm->desc))) goto FAILURE; /* length of model */ if (! fread((char *) &hmm->M, sizeof(int), 1, hmmfp->f)) goto FAILURE; if (hmmfp->byteswap) byteswap((char *)&(hmm->M), sizeof(int)); /* alphabet type */ if (! fread((char *) &type, sizeof(int), 1, hmmfp->f)) goto FAILURE; if (hmmfp->byteswap) byteswap((char *)&type, sizeof(int)); if (Alphabet_type == hmmNOTSETYET) SetAlphabet(type); else if (type != Alphabet_type) Die("Alphabet mismatch error.\nI thought we were working with %s, but tried to read a %s HMM.\n", AlphabetType2String(Alphabet_type), AlphabetType2String(type)); /* now allocate for rest of model */ AllocPlan7Body(hmm, hmm->M); /* optional #=RF alignment annotation */ if ((hmm->flags & PLAN7_RF) && !fread((char *) hmm->rf, sizeof(char), hmm->M+1, hmmfp->f)) goto FAILURE; hmm->rf[hmm->M+1] = '\0'; /* optional #=CS alignment annotation */ if ((hmm->flags & PLAN7_CS) && !fread((char *) hmm->cs, sizeof(char), hmm->M+1, hmmfp->f)) goto FAILURE; hmm->cs[hmm->M+1] = '\0'; /* optional alignment map annotation */ if ((hmm->flags & PLAN7_MAP) && !fread((char *) hmm->map, sizeof(int), hmm->M+1, hmmfp->f)) goto FAILURE; if (hmmfp->byteswap) for (k = 1; k <= hmm->M; k++) byteswap((char*)&(hmm->map[k]), sizeof(int)); /* command line log */ if (!read_bin_string(hmmfp->f, hmmfp->byteswap, &(hmm->comlog))) goto FAILURE; /* nseq */ if (!fread((char *) &(hmm->nseq),sizeof(int), 1, hmmfp->f)) goto FAILURE; if (hmmfp->byteswap) byteswap((char *)&(hmm->nseq), sizeof(int)); /* creation time */ if (!read_bin_string(hmmfp->f, hmmfp->byteswap, &(hmm->ctime))) goto FAILURE; /* checksum */ if (!fread((char *) &(hmm->checksum),sizeof(int), 1, hmmfp->f)) goto FAILURE; if (hmmfp->byteswap) byteswap((char *)&(hmm->checksum), sizeof(int)); /* Pfam gathering thresholds */ if (hmm->flags & PLAN7_GA) { if (! fread((char *) &(hmm->ga1), sizeof(float), 1, hmmfp->f)) goto FAILURE; if (! fread((char *) &(hmm->ga2), sizeof(float), 1, hmmfp->f)) goto FAILURE; if (hmmfp->byteswap) { byteswap((char *) &(hmm->ga1), sizeof(float)); byteswap((char *) &(hmm->ga2), sizeof(float)); } } /* Pfam trusted cutoffs */ if (hmm->flags & PLAN7_TC) { if (! fread((char *) &(hmm->tc1), sizeof(float), 1, hmmfp->f)) goto FAILURE; if (! fread((char *) &(hmm->tc2), sizeof(float), 1, hmmfp->f)) goto FAILURE; if (hmmfp->byteswap) { byteswap((char *) &(hmm->tc1), sizeof(float)); byteswap((char *) &(hmm->tc2), sizeof(float)); } } /* Pfam noise cutoffs */ if (hmm->flags & PLAN7_NC) { if (! fread((char *) &(hmm->nc1), sizeof(float), 1, hmmfp->f)) goto FAILURE; if (! fread((char *) &(hmm->nc2), sizeof(float), 1, hmmfp->f)) goto FAILURE; if (hmmfp->byteswap) { byteswap((char *) &(hmm->nc1), sizeof(float)); byteswap((char *) &(hmm->nc2), sizeof(float)); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -