lmclass.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 290 行
C
290 行
/* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * lmclass.c -- Class-of-words objects in language models. * * HISTORY * * 19-Feb-2004 A Chan (archan@cs.cmu.edu) at Carnegie Mellon University * copied from sphinx 2 code base. * 24-Mar-1998 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Started. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <math.h>#include "lmclass.h"#include "logs3.h"#define LMCLASS_UNDEFINED_PROB 32001 /* Some large +ve non-logprob value */void lmclass_dump (lmclass_t cl, FILE *fp){ lmclass_word_t w; assert (cl != NULL); fprintf (fp, "LMCLASS %s\n", cl->name); for (w = cl->wordlist; w; w = w->next) fprintf (fp, " %s\t%d\n", w->word, w->LOGprob); fprintf (fp, "END %s\n", cl->name); fflush (fp);}void lmclass_set_dump (lmclass_set_t set, FILE *fp){ lmclass_t cl; assert (set != NULL); for (cl = set->lmclass_list; cl; cl = cl->next) lmclass_dump (cl, fp);}lmclass_set_t lmclass_newset ( void ){ lmclass_set_t set; set = (lmclass_set_t) ckd_calloc (1, sizeof(struct lmclass_set_s)); set->lmclass_list = NULL; return set;}static lmclass_set_t lmclass_add (lmclass_set_t set, lmclass_t new){ lmclass_t cl, prev; assert (set != NULL); assert (new != NULL); prev = NULL; for (cl = set->lmclass_list; cl && (strcmp (cl->name, new->name) != 0); cl = cl->next) { prev = cl; } if (cl) return NULL; /* Duplicate */ if (prev) prev->next = new; else set->lmclass_list = new; new->next = NULL; return set;}static lmclass_t lmclass_addword (lmclass_t class, lmclass_word_t new){ lmclass_word_t w, prev; assert (class != NULL); assert (new != NULL); prev = NULL; for (w = class->wordlist; w && (strcmp (w->word, new->word) != 0); w = w->next) prev = w; if (w) return NULL; if (prev) prev->next = new; else class->wordlist = new; new->next = NULL; return class;}lmclass_set_t lmclass_loadfile (lmclass_set_t lmclass_set, char *file){ FILE *fp; char line[16384], *word[4096], *_eof; int32 lineno, nwd; lmclass_t lmclass; lmclass_word_t lmclass_word; float SUMp, p; int32 n_implicit_prob, n_word; assert (lmclass_set != NULL); E_INFO("Reading LM Class file '%s'\n", file); fp = myfopen(file,"r"); /*(FILE *) CM_fopen(file, "r");*/ lineno = 0; for (;;) { /* Read successive LM classes in this file */ /* Read a non-empty line, presumably the beginning of a new LM class */ while (((_eof = fgets (line, sizeof(line), fp)) != NULL) && ((line[0] == '#') || ((nwd = str2words(line, word, 4096)) == 0))) { lineno++; } if (! _eof) break; /* EOF */ lineno++; if (nwd < 0) E_FATAL("Line %d: Line too long:\n\t%s\n", lineno, line); if ((nwd != 2) || (strcmp (word[0], "LMCLASS") != 0)) E_FATAL("Line %d: Expecting LMCLASS <classname>\n", lineno); if ((word[1][0] != '[') || (word[1][strlen(word[1])-1] != ']')) E_WARN("Line %d: LM class name(%s) not enclosed in []\n", lineno, word[1]); /* Initialize a new LM class object */ lmclass = (lmclass_t) ckd_calloc (1, sizeof(struct lmclass_s)); lmclass->name = ckd_salloc(word[1]); lmclass->wordlist = NULL; /* Add the new class to the existing set */ if ((lmclass_set = lmclass_add (lmclass_set, lmclass)) == NULL) E_FATAL("Line %d: lmclass_add(%s) failed (duplicate?)\n", lineno, word[1]); /* Read in the LMclass body */ SUMp = 0.0; n_implicit_prob = 0; n_word = 0; for (;;) { int32 LOGp = 0; /* No, GCC, it won't be used uninitialized */ while (((_eof = fgets (line, sizeof(line), fp)) != NULL) && ((line[0] == '#') || ((nwd = str2words(line, word, 4096)) == 0))) { lineno++; } if (! _eof) E_FATAL("Premature EOF(%s)\n", file); lineno++; if ((nwd != 1) && (nwd != 2)) E_FATAL("Line %d: Syntax error\n", lineno); if ((nwd == 2) && (strcmp (word[0], "END") == 0) && (strcmp (word[1], lmclass->name) == 0)) break; if (nwd == 2) { if (sscanf (word[1], "%e", &p) == 1) { /* Not perfect parsing */ if ((p <= 0.0) || (p >= 1.0)) E_FATAL("Line %d: Prob(%s) out of range (0,1)\n", lineno, word[1]); /*fix me!*/ LOGp = logs3(p); SUMp += p; } else E_FATAL("Line %d: Syntax error\n", lineno); } else { LOGp = LMCLASS_UNDEFINED_PROB; /* Some large +ve quantity */ n_implicit_prob++; } /* Create a new word object */ lmclass_word = (lmclass_word_t) ckd_calloc (1, sizeof(struct lmclass_word_s)); lmclass_word->word = ckd_salloc(word[0]); lmclass_word->dictwid = -1; /* To be filled in by application */ lmclass_word->LOGprob = LOGp; /* Add the new word to the class */ if ((lmclass = lmclass_addword (lmclass, lmclass_word)) == NULL) E_FATAL("Line %d: lmclass_addword(%s) failed (duplicate?)\n", lineno, word[0]); n_word++; } if (n_implicit_prob > 0) { int32 LOGp; if (SUMp >= 1.0) E_FATAL("Sum(prob)(LMClass %s) = %e\n", lmclass->name, SUMp); p = ((float32)1.0 - SUMp) / (float32)n_implicit_prob; LOGp = logs3(p); for (lmclass_word = lmclass->wordlist; lmclass_word; lmclass_word = lmclass_word->next) { if (lmclass_word->LOGprob == LMCLASS_UNDEFINED_PROB) lmclass_word->LOGprob = LOGp; } } else { /* A generous slack of 0.1 for numerical errors */ if ((SUMp >= 1.1) || (SUMp <= 0.9)) E_INFO("Sum(prob)(LMClass %s) = %e\n", lmclass->name, SUMp); } E_INFO("Loaded LM Class '%s'; %d words\n", lmclass->name, n_word); } fclose (fp); return lmclass_set;}void lmclass_set_dictwid (lmclass_word_t w, int32 dictwid){ assert (w != NULL); w->dictwid = dictwid;}lmclass_t lmclass_get_lmclass (lmclass_set_t set, char *name){ lmclass_t cl; for (cl = set->lmclass_list; cl && (strcmp (cl->name, name) != 0); cl = cl->next); return cl;}int32 lmclass_get_nclass (lmclass_set_t set){ lmclass_t cl; int32 n; for (n = 0, cl = set->lmclass_list; cl; cl = cl->next, n++); return n;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?