⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lmclass.c

📁 WinCE平台上的语音识别程序
💻 C
字号:
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- *//* ==================================================================== * Copyright (c) 1999-2001 Carnegie Mellon University.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * This work was supported in part by funding from the Defense Advanced  * Research Projects Agency and the National Science Foundation of the  * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * lmclass.c -- Class-of-words objects in language models. *  * HISTORY *  *  * 24-Mar-1998	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Started. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <math.h>#include "s2types.h"#include "ckd_alloc.h"#include "pio.h"#include "str2words.h"#include "strfuncs.h"#include "err.h"#include "log.h"#include "lmclass.h"#define LMCLASS_UNDEFINED_PROB		32001   /* Some large +ve non-logprob value */voidlmclass_dump(lmclass_t cl, FILE * fp){    lmclass_word_t w;    assert(cl != NULL);    fprintf(fp, "LMCLASS %s\n", cl->name);    for (w = cl->wordlist; w; w = w->next)        fprintf(fp, "    %s\t%d\n", w->word, w->LOGprob);    fprintf(fp, "END %s\n", cl->name);    fflush(fp);}voidlmclass_set_dump(lmclass_set_t set, FILE * fp){    lmclass_t cl;    assert(set != NULL);    for (cl = set->lmclass_list; cl; cl = cl->next)        lmclass_dump(cl, fp);}lmclass_set_tlmclass_newset(void){    lmclass_set_t set;    set = ckd_calloc(1, sizeof(struct lmclass_set_s));    set->lmclass_list = NULL;    return set;}static lmclass_set_tlmclass_add(lmclass_set_t set, lmclass_t new){    lmclass_t cl, prev;    assert(set != NULL);    assert(new != NULL);    prev = NULL;    for (cl = set->lmclass_list;         cl && (strcmp(cl->name, new->name) != 0); cl = cl->next) {        prev = cl;    }    if (cl)        return NULL;            /* Duplicate */    if (prev)        prev->next = new;    else        set->lmclass_list = new;    new->next = NULL;    return set;}static lmclass_tlmclass_addword(lmclass_t class, lmclass_word_t new){    lmclass_word_t w, prev;    assert(class != NULL);    assert(new != NULL);    prev = NULL;    for (w = class->wordlist; w && (strcmp(w->word, new->word) != 0);         w = w->next)        prev = w;    if (w)        return NULL;    if (prev)        prev->next = new;    else        class->wordlist = new;    new->next = NULL;    return class;}lmclass_set_tlmclass_loadfile(lmclass_set_t lmclass_set, char *file){    FILE *fp;    char line[16384], *word[4096], *_eof;    int32 lineno, nwd;    lmclass_t lmclass;    lmclass_word_t lmclass_word;    float SUMp, p;    int32 n_implicit_prob, n_word;    assert(lmclass_set != NULL);    E_INFO("Reading LM Class file '%s'\n", file);    fp = (FILE *) myfopen(file, "r");    lineno = 0;    for (;;) {                  /* Read successive LM classes in this file */        /* Read a non-empty line, presumably the beginning of a new LM class */        while (((_eof = fgets(line, sizeof(line), fp)) != NULL) &&               ((line[0] == '#')                || ((nwd = str2words(line, word, 4096)) == 0))) {            lineno++;        }        if (!_eof)            break;              /* EOF */        lineno++;        if (nwd < 0)            E_FATAL("Line %d: Line too long:\n\t%s\n", lineno, line);        if ((nwd != 2) || (strcmp(word[0], "LMCLASS") != 0))            E_FATAL("Line %d: Expecting LMCLASS <classname>\n", lineno);        if ((word[1][0] != '[') || (word[1][strlen(word[1]) - 1] != ']'))            E_WARN("Line %d: LM class name(%s) not enclosed in []\n",                   lineno, word[1]);        /* Initialize a new LM class object */        lmclass = ckd_calloc(1, sizeof(struct lmclass_s));        lmclass->name = ckd_salloc(word[1]);        lmclass->wordlist = NULL;        /* Add the new class to the existing set */        if ((lmclass_set = lmclass_add(lmclass_set, lmclass)) == NULL)            E_FATAL("Line %d: lmclass_add(%s) failed (duplicate?)\n",                    lineno, word[1]);        /* Read in the LMclass body */        SUMp = 0.0;        n_implicit_prob = 0;        n_word = 0;        for (;;) {            int32 LOGp = 0;     /* No, GCC, it won't be used uninitialized */            while (((_eof = fgets(line, sizeof(line), fp)) != NULL) &&                   ((line[0] == '#')                    || ((nwd = str2words(line, word, 4096)) == 0))) {                lineno++;            }            if (!_eof)                E_FATAL("Premature EOF(%s)\n", file);            lineno++;            if ((nwd != 1) && (nwd != 2))                E_FATAL("Line %d: Syntax error\n", lineno);            if ((nwd == 2) &&                (strcmp(word[0], "END") == 0) &&                (strcmp(word[1], lmclass->name) == 0))                break;            if (nwd == 2) {                if (sscanf(word[1], "%e", &p) == 1) {   /* Not perfect parsing */                    if ((p <= 0.0) || (p >= 1.0))                        E_FATAL("Line %d: Prob(%s) out of range (0,1)\n",                                lineno, word[1]);                    LOGp = LOG(p);                    SUMp += p;                }                else                    E_FATAL("Line %d: Syntax error\n", lineno);            }            else {                LOGp = LMCLASS_UNDEFINED_PROB;  /* Some large +ve quantity */                n_implicit_prob++;            }            /* Create a new word object */            lmclass_word =                ckd_calloc(1, sizeof(struct lmclass_word_s));            lmclass_word->word = ckd_salloc(word[0]);            lmclass_word->dictwid = -1; /* To be filled in by application */            lmclass_word->LOGprob = LOGp;            /* Add the new word to the class */            if ((lmclass = lmclass_addword(lmclass, lmclass_word)) == NULL)                E_FATAL                    ("Line %d: lmclass_addword(%s) failed (duplicate?)\n",                     lineno, word[0]);            n_word++;        }        if (n_implicit_prob > 0) {            int32 LOGp;            if (SUMp >= 1.0)                E_FATAL("Sum(prob)(LMClass %s) = %e\n", lmclass->name,                        SUMp);            p = (1.0 - SUMp) / (float) n_implicit_prob;            LOGp = LOG(p);            for (lmclass_word = lmclass->wordlist;                 lmclass_word; lmclass_word = lmclass_word->next) {                if (lmclass_word->LOGprob == LMCLASS_UNDEFINED_PROB)                    lmclass_word->LOGprob = LOGp;            }        }        else {            /* A generous slack of 0.1 for numerical errors */            if ((SUMp >= 1.1) || (SUMp <= 0.9))                E_WARN("Sum(prob)(LMClass %s) = %e\n", lmclass->name,                       SUMp);        }        E_INFO("Loaded LM Class '%s'; %d words\n", lmclass->name, n_word);    }    fclose(fp);    return lmclass_set;}voidlmclass_set_dictwid(lmclass_word_t w, int32 dictwid){    assert(w != NULL);    w->dictwid = dictwid;}lmclass_tlmclass_get_lmclass(lmclass_set_t set, char *name){    lmclass_t cl;    for (cl = set->lmclass_list;         cl && (strcmp(cl->name, name) != 0); cl = cl->next);    return cl;}int32lmclass_get_nclass(lmclass_set_t set){    lmclass_t cl;    int32 n;    for (n = 0, cl = set->lmclass_list; cl; cl = cl->next, n++);    return n;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -