📄 dict.c
字号:
/* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * dict.c -- Pronunciation dictionary. * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1997 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** * * HISTORY * 19-Apr-01 Ricky Houghton, added code for freeing memory that is allocated internally. * * 23-Apr-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * Made usage of mdef optional. If no mdef is specified while loading * a dictionary, it maintains the needed CI phone information internally. * Added dict_ciphone_str(). * * 02-Jul-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * Added startwid, finishwid, silwid to dict_t. Modified dict_filler_word * to check for start and finishwid. * * 07-Feb-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * Created from previous Sphinx-3 version. */#include "dict.h"#define DELIM " \t\n" /* Set of field separator characters */#define DEFAULT_NUM_PHONE (MAX_S3CIPID+1)static s3cipid_t dict_ciphone_id (dict_t *d, char *str){ int32 id; if (d->mdef) return mdef_ciphone_id (d->mdef, str); else { if (hash_lookup (d->pht, str, &id) < 0) { id = (d->n_ciphone)++; if (id >= MAX_S3CIPID) E_FATAL("Too many CIphones in dictionary; increase MAX_S3CIPID\n"); d->ciphone_str[id] = (char *) ckd_salloc(str); /* Freed in dict_free()*/ if (hash_enter (d->pht, d->ciphone_str[id], id) != id) E_FATAL("hash_enter(local-phonetable, %s) failed\n", str); } return id; }}const char *dict_ciphone_str (dict_t *d, s3wid_t wid, int32 pos){ assert (d != NULL); assert ((wid >= 0) && (wid < d->n_word)); assert ((pos >= 0) && (pos < d->word[wid].pronlen)); if (d->mdef) return mdef_ciphone_str (d->mdef, d->word[wid].ciphone[pos]); else return (d->ciphone_str[(int)d->word[wid].ciphone[pos]]);}s3wid_t dict_add_word (dict_t *d, char *word, s3cipid_t *p, int32 np){ int32 w, len; dictword_t *wordp; s3wid_t newwid; if (d->n_word >= d->max_words) { E_ERROR("Dictionary full; add(%s) failed\n", word); return (BAD_S3WID); } wordp = d->word + d->n_word; wordp->word = (char *) ckd_salloc (word); /* Freed in dict_free */ /* Associate word string with d->n_word in hash table */ if (hash_enter (d->ht, wordp->word, d->n_word) != d->n_word) { ckd_free (wordp->word); return (BAD_S3WID); } /* Fill in word entry, and set defaults */ if (p && (np > 0)) { wordp->ciphone = (s3cipid_t *) ckd_malloc (np * sizeof(s3cipid_t)); /* Freed in dict_free */ memcpy (wordp->ciphone, p, np*sizeof(s3cipid_t)); wordp->pronlen = np; } else { wordp->ciphone = NULL; wordp->pronlen = 0; } wordp->alt = BAD_S3WID; wordp->basewid = d->n_word; wordp->n_comp = 0; wordp->comp = NULL; /* Determine base/alt wids */ if ((len = dict_word2basestr (word)) > 0) { /* Truncated to a baseword string; find its ID */ if (hash_lookup (d->ht, word, &w) < 0) { word[len] = '('; /* Get back the original word */ E_FATAL("Missing base word for: %s\n", word); } else word[len] = '('; /* Get back the original word */ /* Link into alt list */ wordp->basewid = w; wordp->alt = d->word[w].alt; d->word[w].alt = d->n_word; } newwid = d->n_word++; return (newwid);}static int32 dict_read (FILE *fp, dict_t *d){ char line[16384], **wptr; s3cipid_t p[4096]; int32 lineno, nwd; s3wid_t w; int32 i, maxwd; maxwd = 4092; wptr = (char **) ckd_calloc (maxwd, sizeof(char *)); /* Freed below */ lineno = 0; while (fgets (line, sizeof(line), fp) != NULL) { lineno++; if (line[0] == '#') /* Comment line */ continue; if ((nwd = str2words (line, wptr, maxwd)) < 0) E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line, maxwd); if (nwd == 0) /* Empty line */ continue; /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */ if (nwd == 1) { E_ERROR("Line %d: No pronunciation for word %s; ignored\n", lineno, wptr[0]); continue; } /* Convert pronunciation string to CI-phone-ids */ for (i = 1; i < nwd; i++) { p[i-1] = dict_ciphone_id (d, wptr[i]); if (NOT_S3CIPID(p[i-1])) { E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n", lineno, wptr[i], wptr[0]); break; } } if (i == nwd) { /* All CI-phones successfully converted to IDs */ w = dict_add_word (d, wptr[0], p, nwd-1); if (NOT_S3WID(w)) E_ERROR("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n", lineno, wptr[0]); } } ckd_free (wptr); return 0;}static s3wid_t *dict_comp_head (dict_t *d){ int32 w; s3wid_t *comp_head; comp_head = (s3wid_t *) ckd_calloc (d->n_word, sizeof(s3wid_t)); /* freed in dict_free */ for (w = 0; w < d->n_word; w++) comp_head[w] = BAD_S3WID; for (w = 0; w < d->n_word; w++) { if (d->word[w].n_comp > 0) { comp_head[w] = comp_head[d->word[w].comp[0]]; comp_head[d->word[w].comp[0]] = w; } } return comp_head;}/* * Scan the dictionary for compound words. This function should be called just after * loading the dictionary. For the moment, compound words in a compound word are * assumed to be separated by the given sep character, (underscore in the CMU dict). * Return value: #compound words found in dictionary. */static int32 dict_build_comp (dict_t *d, char sep) /* Separator character */{ char wd[4096]; int32 w, cwid; dictword_t *wordp; int32 nc; /* # compound words in dictionary */ int32 i, j, l, n; nc = 0; for (w = 0; w < d->n_word; w++) { wordp = d->word + dict_basewid(d, w); strcpy (wd, wordp->word); l = strlen(wd); if ((wd[0] == sep) || (wd[l-1] == sep)) E_FATAL("Bad compound word %s: leading or trailing separator\n", wordp->word); /* Count no. of components in this word */ n = 1; for (i = 1; i < l-1; i++) /* 0 and l-1 already checked above */ if (wd[i] == sep) n++; if (n == 1) continue; /* Not a compound word */ nc++; if ((w == d->startwid) || (w == d->finishwid) || dict_filler_word (d, w)) E_FATAL("Compound special/filler word (%s) not allowed\n", wordp->word); /* Allocate and fill in component word info */ wordp->n_comp = n; wordp->comp = (s3wid_t *) ckd_calloc (n, sizeof(s3wid_t)); /* freed in dict_free */ /* Parse word string into components */ n = 0; for (i = 0; i < l; i++) { for (j = i; (i < l) && (wd[i] != sep); i++); if (j == i) E_FATAL("Bad compound word %s: successive separators\n", wordp->word); wd[i] = '\0'; cwid = dict_wordid (d, wd+j); if (NOT_S3WID(cwid)) E_FATAL("Component word %s of %s not in dictionary\n", wd+j, wordp->word); wordp->comp[n] = cwid; n++; } } if (nc > 0) d->comp_head = dict_comp_head (d); return nc;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -