📄 dict.c

📁 CMU大名鼎鼎的SPHINX－3大词汇量连续语音识别系统
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * This work was supported in part by funding from the Defense Advanced  * Research Projects Agency and the National Science Foundation of the  * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * dict.c -- Pronunciation dictionary. * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1997 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** *  * HISTORY * 19-Apr-01    Ricky Houghton, added code for freeing memory that is allocated internally. *  * 23-Apr-98	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * 		Made usage of mdef optional.  If no mdef is specified while loading *		a dictionary, it maintains the needed CI phone information internally. * 		Added dict_ciphone_str(). *  * 02-Jul-97	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * 		Added startwid, finishwid, silwid to dict_t.  Modified dict_filler_word * 		to check for start and finishwid. *  * 07-Feb-97	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * 		Created from previous Sphinx-3 version. */#include "dict.h"#define DELIM	" \t\n"		/* Set of field separator characters */#define DEFAULT_NUM_PHONE	(MAX_S3CIPID+1)static s3cipid_t dict_ciphone_id (dict_t *d, char *str){    int32 id;        if (d->mdef)	return mdef_ciphone_id (d->mdef, str);    else {	if (hash_lookup (d->pht, str, &id) < 0) {	    id = (d->n_ciphone)++;	    	    if (id >= MAX_S3CIPID)		E_FATAL("Too many CIphones in dictionary; increase MAX_S3CIPID\n");	    d->ciphone_str[id] = (char *) ckd_salloc(str); /* Freed in dict_free()*/	    	    if (hash_enter (d->pht, d->ciphone_str[id], id) != id)		E_FATAL("hash_enter(local-phonetable, %s) failed\n", str);	}	return id;    }}const char *dict_ciphone_str (dict_t *d, s3wid_t wid, int32 pos){    assert (d != NULL);    assert ((wid >= 0) && (wid < d->n_word));    assert ((pos >= 0) && (pos < d->word[wid].pronlen));        if (d->mdef)	return mdef_ciphone_str (d->mdef, d->word[wid].ciphone[pos]);    else	return (d->ciphone_str[(int)d->word[wid].ciphone[pos]]);}s3wid_t dict_add_word (dict_t *d, char *word, s3cipid_t *p, int32 np){    int32 w, len;    dictword_t *wordp;    s3wid_t newwid;        if (d->n_word >= d->max_words) {	E_ERROR("Dictionary full; add(%s) failed\n", word);	return (BAD_S3WID);    }        wordp = d->word + d->n_word;    wordp->word = (char *) ckd_salloc (word); /* Freed in dict_free */        /* Associate word string with d->n_word in hash table */    if (hash_enter (d->ht, wordp->word, d->n_word) != d->n_word) {	ckd_free (wordp->word);	return (BAD_S3WID);    }    /* Fill in word entry, and set defaults */    if (p && (np > 0)) {	wordp->ciphone = (s3cipid_t *) ckd_malloc (np * sizeof(s3cipid_t)); /* Freed in dict_free */	memcpy (wordp->ciphone, p, np*sizeof(s3cipid_t));	wordp->pronlen = np;    } else {	wordp->ciphone = NULL;	wordp->pronlen = 0;    }    wordp->alt = BAD_S3WID;    wordp->basewid = d->n_word;    wordp->n_comp = 0;    wordp->comp = NULL;        /* Determine base/alt wids */    if ((len = dict_word2basestr (word)) > 0) {	/* Truncated to a baseword string; find its ID */	if (hash_lookup (d->ht, word, &w) < 0) {	    word[len] = '(';	/* Get back the original word */	    E_FATAL("Missing base word for: %s\n", word);	} else	    word[len] = '(';	/* Get back the original word */		/* Link into alt list */	wordp->basewid = w;	wordp->alt = d->word[w].alt;	d->word[w].alt = d->n_word;    }        newwid = d->n_word++;        return (newwid);}static int32 dict_read (FILE *fp, dict_t *d){    char line[16384], **wptr;    s3cipid_t p[4096];    int32 lineno, nwd;    s3wid_t w;    int32 i, maxwd;        maxwd = 4092;    wptr = (char **) ckd_calloc (maxwd, sizeof(char *)); /* Freed below */        lineno = 0;    while (fgets (line, sizeof(line), fp) != NULL) {        lineno++;	if (line[0] == '#')	/* Comment line */	    continue;		if ((nwd = str2words (line, wptr, maxwd)) < 0)	    E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line, maxwd);	if (nwd == 0)	    /* Empty line */	    continue;	/* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */	if (nwd == 1) {	    E_ERROR("Line %d: No pronunciation for word %s; ignored\n", lineno, wptr[0]);	    continue;	}		/* Convert pronunciation string to CI-phone-ids */	for (i = 1; i < nwd; i++) {	    p[i-1] = dict_ciphone_id (d, wptr[i]);	    if (NOT_S3CIPID(p[i-1])) {		E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n",			lineno, wptr[i], wptr[0]);		break;	    }	}		if (i == nwd) {	/* All CI-phones successfully converted to IDs */	    w = dict_add_word (d, wptr[0], p, nwd-1);	    if (NOT_S3WID(w))		E_ERROR("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n",			lineno, wptr[0]);	}    }    ckd_free (wptr);        return 0;}static s3wid_t *dict_comp_head (dict_t *d){    int32 w;    s3wid_t *comp_head;        comp_head = (s3wid_t *) ckd_calloc (d->n_word, sizeof(s3wid_t)); /* freed in dict_free */        for (w = 0; w < d->n_word; w++)	comp_head[w] = BAD_S3WID;    for (w = 0; w < d->n_word; w++) {	if (d->word[w].n_comp > 0) {	    comp_head[w] = comp_head[d->word[w].comp[0]];	    comp_head[d->word[w].comp[0]] = w;	}    }    return comp_head;}/* * Scan the dictionary for compound words.  This function should be called just after * loading the dictionary.  For the moment, compound words in a compound word are * assumed to be separated by the given sep character, (underscore in the CMU dict). * Return value: #compound words found in dictionary. */static int32 dict_build_comp (dict_t *d,			      char sep)		/* Separator character */{    char wd[4096];    int32 w, cwid;    dictword_t *wordp;    int32 nc;		/* # compound words in dictionary */    int32 i, j, l, n;        nc = 0;    for (w = 0; w < d->n_word; w++) {	wordp = d->word + dict_basewid(d, w);	strcpy (wd, wordp->word);	l = strlen(wd);	if ((wd[0] == sep) || (wd[l-1] == sep))	    E_FATAL("Bad compound word %s: leading or trailing separator\n", wordp->word);		/* Count no. of components in this word */	n = 1;	for (i = 1; i < l-1; i++)	/* 0 and l-1 already checked above */	    if (wd[i] == sep)		n++;	if (n == 1)	    continue;		/* Not a compound word */	nc++;		if ((w == d->startwid) || (w == d->finishwid) || dict_filler_word (d, w))	    E_FATAL("Compound special/filler word (%s) not allowed\n", wordp->word);		/* Allocate and fill in component word info */	wordp->n_comp = n;	wordp->comp = (s3wid_t *) ckd_calloc (n, sizeof(s3wid_t)); /* freed in dict_free */		/* Parse word string into components */	n = 0;	for (i = 0; i < l; i++) {	    for (j = i; (i < l) && (wd[i] != sep); i++);	    if (j == i)		E_FATAL("Bad compound word %s: successive separators\n", wordp->word);	    	    wd[i] = '\0';	    cwid = dict_wordid (d, wd+j);	    if (NOT_S3WID(cwid))		E_FATAL("Component word %s of %s not in dictionary\n", wd+j, wordp->word);	    wordp->comp[n] = cwid;	    n++;	}    }        if (nc > 0)	d->comp_head = dict_comp_head (d);        return nc;}
12 下一页
💿 文件大小 710 K
👤 上传用户 wanghao891207
📂 所属分类多媒体处理
🏷️ 相关标签

#SPHINX #CMU #词汇 #语音识别系统
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -