⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lm_3g.c

📁 WinCE平台上的语音识别程序
💻 C
📖 第 1 页 / 共 5 页
字号:
 * -1 otherwise. */int32lm_set_current(char const *name){    int32 i;    if ((i = lmname_to_id(name)) < 0)        return (-1);    lmp = lmset[i].lm;    return (0);}static int32lmname_to_id(char const *name){    int32 i;    for (i = 0; (i < n_lm) && (strcmp(lmset[i].name, name) != 0); i++);    return ((i < n_lm) ? i : -1);}lm_t *lm_name2lm(char const *name){    int32 i;    i = lmname_to_id(name);    return ((i >= 0) ? lmset[i].lm : NULL);}char *get_current_lmname(){    int32 i;    for (i = 0; (i < n_lm) && (lmset[i].lm != lmp); i++);    return ((i < n_lm) ? lmset[i].name : NULL);}lm_t *lm_get_current(){    return (lmp);}int32get_n_lm(){    return (n_lm);}int32lm3g_n_lm(void){    return n_lm;}char *lm3g_index2name(int k){    if ((k >= 0) && (k < n_lm))        return (lmset[k].name);    else        return NULL;}/* * dict base wid; check if present in LM.   * return TRUE if present, FALSE otherwise. */int32dictwd_in_lm(wid)int32 wid;{    return (lmp->dictwid_map[wid] >= 0);}/* * Load pre-compiled trigram LM file, if it exists, into model.  If * file does not exist, or is not a dump file, return 0.  Otherwise, * if successful, return 1. */static int32lm3g_load(char const *file, char const *lmname,          lm_t **out_model, char const *lmfile){    int32 i, j, k, vn, ts, err;    int32 n_unigram;    int32 n_bigram;    int32 n_trigram;    int32 dict_size;    FILE *fp;    char str[1024];    unigram_t *ugptr;    bigram_t *bgptr;    trigram_t *tgptr;    char *tmp_word_str;    int do_mmap, do_swap, fd = -1;    char *map_base = NULL;    size_t offset = 0, filesize;    lm_t *model;    err = 0;    E_INFO("Trying to read %s as precompiled dump file\n", file);    if ((fp = fopen(file, "rb")) == NULL) {        E_INFO("Precompiled file not found\n");        return -1;    }    do_swap = 0;    fread(&k, sizeof(k), 1, fp);    if (k != strlen(darpa_hdr)+1) {        SWAP_INT32(&k);        if (k != strlen(darpa_hdr)+1) {            E_ERROR("Wrong magic header size number %x: %s is not a dump file\n", k);            fclose(fp);            return -1;        }        do_swap = 1;    }    if (fread(str, sizeof(char), k, fp) != (size_t) k)        E_FATAL("Cannot read header\n");    if (strncmp(str, darpa_hdr, k) != 0) {        E_ERROR("Wrong header %s: %s is not a dump file\n", darpa_hdr);        fclose(fp);        return -1;    }    E_INFO("%s\n", str);    do_mmap = cmd_ln_boolean("-mmap");    if (do_mmap) {        if (do_swap) {            E_INFO                ("Byteswapping required, will not use memory-mapped I/O for LM file\n");            do_mmap = 0;        }        else {            E_INFO("Will use memory-mapped I/O for LM file\n");            fd = fileno(fp);        }    }    fread(&k, sizeof(k), 1, fp);    if (do_swap) SWAP_INT32(&k);    if (fread(str, sizeof(char), k, fp) != (size_t) k)        E_FATAL("Cannot read LM filename in header\n");    /* read version#, if present (must be <= 0) */    fread(&vn, sizeof(vn), 1, fp);    if (do_swap) SWAP_INT32(&vn);    if (vn <= 0) {        /* read and don't compare timestamps (we don't care) */        fread(&ts, sizeof(ts), 1, fp);        if (do_swap) SWAP_INT32(&ts);        /* read and skip format description */        for (;;) {            fread(&k, sizeof(k), 1, fp);            if (do_swap) SWAP_INT32(&k);            if (k == 0)                break;            if (fread(str, sizeof(char), k, fp) != (size_t) k)                E_FATAL("fread(word) failed\n");        }        /* read model->ucount */        fread(&n_unigram, sizeof(n_unigram), 1, fp);        if (do_swap) SWAP_INT32(&n_unigram);    }    else {        n_unigram = vn;    }    /* read model->bcount, tcount */    fread(&n_bigram, sizeof(n_bigram), 1, fp);    if (do_swap) SWAP_INT32(&n_bigram);    fread(&n_trigram, sizeof(n_trigram), 1, fp);    if (do_swap) SWAP_INT32(&n_trigram);    E_INFO("ngrams 1=%d, 2=%d, 3=%d\n", n_unigram, n_bigram, n_trigram);    /* Determine dictionary size (for dict-wid -> LM-wid map) */    dict_size = get_dict_size(&n_unigram, lmname);    /* Allocate space for word strings. */    word_str = ckd_calloc(n_unigram, sizeof(char *));    /* Allocate space for LM, including initial OOVs and placeholders; initialize it */    model = NewModel(n_unigram, n_bigram, n_trigram, dict_size);    /* read unigrams (always in memory, as they contain dictionary     * mappings that can't be precomputed, and also could have OOVs added) */    if (fread(model->unigrams, sizeof(unigram_t), model->ucount + 1, fp)        != (size_t) model->ucount + 1)        E_FATAL("fread(unigrams) failed\n");    if (do_swap) {        for (i = 0, ugptr = model->unigrams; i <= model->ucount;             i++, ugptr++) {            SWAP_INT32(&ugptr->mapid);            SWAP_INT32(&ugptr->prob1.l);            SWAP_INT32(&ugptr->bo_wt1.l);            SWAP_INT32(&ugptr->bigrams);        }    }    for (i = 0, ugptr = model->unigrams; i < model->ucount; i++, ugptr++) {        if (ugptr->mapid != i)            err = 1;        ugptr->mapid = i;    }    if (err)        E_WARN("Corrected corrupted dump file created by buggy fbs8\n");    E_INFO("%8d = LM.unigrams(+trailer) read\n", model->ucount);    /* Now mmap() the file and read in the rest of the (read-only) stuff. */    if (do_mmap) {        offset = ftell(fp);        fseek(fp, 0, SEEK_END);        filesize = ftell(fp);        fseek(fp, offset, SEEK_SET);        map_base = s2_mmap(file);    }    /* read bigrams */    if (do_mmap) {        model->bigrams = (bigram_t *) (map_base + offset);        offset += (model->bcount + 1) * sizeof(bigram_t);    }    else {        model->bigrams =            ckd_calloc(model->bcount + 1, sizeof(bigram_t));        if (fread(model->bigrams, sizeof(bigram_t), model->bcount + 1, fp)            != (size_t) model->bcount + 1)            E_FATAL("fread(bigrams) failed\n");        if (do_swap) {            for (i = 0, bgptr = model->bigrams; i <= model->bcount;                 i++, bgptr++) {                SWAP_INT16(&bgptr->wid);                SWAP_INT16(&bgptr->prob2);                SWAP_INT16(&bgptr->bo_wt2);                SWAP_INT16(&bgptr->trigrams);            }        }    }    E_INFO("%8d = LM.bigrams(+trailer) read\n", model->bcount);    /* read trigrams */    if (model->tcount > 0) {        if (do_mmap) {            model->trigrams = (trigram_t *) (map_base + offset);            offset += model->tcount * sizeof(trigram_t);        }        else {            model->trigrams =                ckd_calloc(model->tcount, sizeof(trigram_t));            if (fread                (model->trigrams, sizeof(trigram_t), model->tcount, fp)                != (size_t) model->tcount)                E_FATAL("fread(trigrams) failed\n");            if (do_swap) {                for (i = 0, tgptr = model->trigrams; i < model->tcount;                     i++, tgptr++) {                    SWAP_INT16(&tgptr->wid);                    SWAP_INT16(&tgptr->prob3);                }            }        }        E_INFO("%8d = LM.trigrams read\n", model->tcount);    }    /* read n_prob2 and prob2 array (in memory, should be pre-scaled on disk) */    if (do_mmap)        fseek(fp, offset, SEEK_SET);    fread(&k, sizeof(k), 1, fp);    if (do_swap) SWAP_INT32(&k);    model->n_prob2 = k;    model->prob2 = ckd_calloc(k, sizeof(log_t));    if (fread(model->prob2, sizeof(log_t), k, fp) != (size_t) k)        E_FATAL("fread(prob2) failed\n");    if (do_swap)        for (i = 0; i < k; i++)            SWAP_INT32(&model->prob2[i].l);    E_INFO("%8d = LM.prob2 entries read\n", k);    /* read n_bo_wt2 and bo_wt2 array (in memory) */    if (model->tcount > 0) {        fread(&k, sizeof(k), 1, fp);        if (do_swap) SWAP_INT32(&k);        model->n_bo_wt2 = k;        model->bo_wt2 = ckd_calloc(k, sizeof(log_t));        if (fread(model->bo_wt2, sizeof(log_t), k, fp) != (size_t) k)            E_FATAL("fread(bo_wt2) failed\n");        if (do_swap)            for (i = 0; i < k; i++)                SWAP_INT32(&model->bo_wt2[i].l);        E_INFO("%8d = LM.bo_wt2 entries read\n", k);    }    /* read n_prob3 and prob3 array (in memory) */    if (model->tcount > 0) {        fread(&k, sizeof(k), 1, fp);        if (do_swap) SWAP_INT32(&k);        model->n_prob3 = k;        model->prob3 = ckd_calloc(k, sizeof(log_t));        if (fread(model->prob3, sizeof(log_t), k, fp) != (size_t) k)            E_FATAL("fread(prob3) failed\n");        if (do_swap)            for (i = 0; i < k; i++)                SWAP_INT32(&model->prob3[i].l);        E_INFO("%8d = LM.prob3 entries read\n", k);    }    /* read tseg_base size and tseg_base */    /* FIXME: There could be alignment issues here. */    if (do_mmap)        offset = ftell(fp);    if (model->tcount > 0) {        if (do_mmap) {            k = *(int32 *) (map_base + offset);            offset += sizeof(int32);            model->tseg_base = (int32 *) (map_base + offset);            offset += k * sizeof(int32);        }        else {            k = (model->bcount + 1) / BG_SEG_SZ + 1;            fread(&k, sizeof(k), 1, fp);            if (do_swap) SWAP_INT32(&k);            model->tseg_base = ckd_calloc(k, sizeof(int32));            if (fread(model->tseg_base, sizeof(int32), k, fp) !=                (size_t) k)                E_FATAL("fread(tseg_base) failed\n");            if (do_swap)                for (i = 0; i < k; i++)                    SWAP_INT32(&model->tseg_base[i]);        }        E_INFO("%8d = LM.tseg_base entries read\n", k);    }    /* read ascii word strings */    if (do_mmap) {        k = *(int32 *) (map_base + offset);        offset += sizeof(int32);        tmp_word_str = (char *) (map_base + offset);        offset += k;    }    else {        fread(&k, sizeof(k), 1, fp);        if (do_swap) SWAP_INT32(&k);        tmp_word_str = ckd_calloc(k, sizeof(char));        if (fread(tmp_word_str, sizeof(char), k, fp) != (size_t) k)            E_FATAL("fread(word-string) failed\n");    }    /* First make sure string just read contains ucount words (PARANOIA!!) */    for (i = 0, j = 0; i < k; i++)        if (tmp_word_str[i] == '\0')            j++;    if (j != model->ucount)        E_FATAL("Error reading word strings\n");    /* Break up string just read into words */    if (do_mmap) {        j = 0;        for (i = 0; i < model->ucount; i++) {            word_str[i] = tmp_word_str + j;            j += strlen(word_str[i]) + 1;        }    }    else {        j = 0;        for (i = 0; i < model->ucount; i++) {            word_str[i] = ckd_salloc(tmp_word_str + j);            j += strlen(word_str[i]) + 1;        }        free(tmp_word_str);    }    E_INFO("%8d = ascii word strings read\n", i);    *out_model = model;    return 0;}static char const *fmtdesc[] = {    "BEGIN FILE FORMAT DESCRIPTION",    "Header string length (int32) and string (including trailing 0)",    "Original LM filename string-length (int32) and filename (including trailing 0)",    "(int32) version number (present iff value <= 0)",    "(int32) original LM file modification timestamp (iff version# present)",    "(int32) string-length and string (including trailing 0) (iff version# present)",    "... previous entry continued any number of times (iff version# present)",    "(int32) 0 (terminating sequence of strings) (iff version# present)",    "(int32) lm_t.ucount (must be > 0)",    "(int32) lm_t.bcount",    "(int32) lm_t.tcount",    "lm_t.ucount+1 unigrams (including sentinel)",    "lm_t.bcount+1 bigrams (including sentinel)",    "lm_t.tcount trigrams (present iff lm_t.tcount > 0)",    "(int32) lm_t.n_prob2",    "(int32) lm_t.prob2[]",    "(int32) lm_t.n_bo_wt2 (present iff lm_t.tcount > 0)",    "(int32) lm_t.bo_wt2[] (present iff lm_t.tcount > 0)",    "(int32) lm_t.n_prob3 (present iff lm_t.tcount > 0)",    "(int32) lm_t.prob3[] (present iff lm_t.tcount > 0)",    "(int32) (lm_t.bcount+1)/BG_SEG_SZ+1 (present iff lm_t.tcount > 0)",    "(int32) lm_t.tseg_base[] (present iff lm_t.tcount > 0)",    "(int32) Sum(all word string-lengths, including trailing 0 for each)",    "All word strings (including trailing 0 for each)",    "END FILE FORMAT DESCRIPTION",    NULL,};/* * Dump internal LM to file.  Format described above * We don't swap bytes because it could be mmap()ed */static int32lm3g_dump(char const *file, lm_t * model, char const *lmfile){    int32 i, k, zero = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -