📄 dict.h

📁 CMU大名鼎鼎的SPHINX－3大词汇量连续语音识别系统
💻 H
字号:
/* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * This work was supported in part by funding from the Defense Advanced  * Research Projects Agency and the National Science Foundation of the  * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * dict.h -- Pronunciation dictionary structures * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1997 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** *  * HISTORY * 19-Apr-01    Ricky Houghton, added code for freeing memory that is allocated internally. *  * 23-Apr-98	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * 		Made usage of mdef optional.  If no mdef is specified while loading *		a dictionary, it maintains the needed CI phone information internally. * 		Added dict_ciphone_str(). *  * 02-Jul-97	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * 		Added startwid, finishwid, silwid to dict_t structure. *  * 07-Feb-97	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * 		Created from previous Sphinx-3 version. */#ifndef _S3_DICT_H_#define _S3_DICT_H_/** \file dict.h * \brief Operations on dictionary.  */#include <s3types.h>#include "mdef.h"	/* This is still a sore point; dict should be independent of mdef */#ifdef __cplusplusextern "C" {#endiftypedef struct {    char *word;		/** Ascii word string */    s3cipid_t *ciphone;	/** Pronunciation */    int32 pronlen;	/** Pronunciation length */    s3wid_t alt;	/** Next alternative pronunciation id, NOT_S3WID if none */    s3wid_t basewid;	/** Base pronunciation id */    int32 n_comp;	/** If this is a compound word, no. of component words; else 0.			   (Compound words are concatenations of other words, such as			   GOING_TO, whose pronunciation is quite different from the			   mere concatenation of their components.) */    s3wid_t *comp;	/** If n_comp > 0, its components */} dictword_t;typedef struct {    mdef_t *mdef;	/** Model definition used for phone IDs; NULL if none used */    hash_table_t *pht;	/** Used only if CI phones handled internally (mdef == NULL) */    char **ciphone_str;	/** Used only if CI phones handled internally (mdef == NULL) */    int32 n_ciphone;	/** Used only if CI phones handled internally (mdef == NULL) */    dictword_t *word;	/** Array of entries in dictionary */    hash_table_t *ht;	/** Hash table for mapping word strings to word ids */    int32 max_words;	/** #Entries allocated in dict, including empty slots */    int32 n_word;	/** #Occupied entries in dict; ie, excluding empty slots */    int32 filler_start;	/** First filler word id (read from filler dict) */    int32 filler_end;	/** Last filler word id (read from filler dict) */    s3wid_t *comp_head;	/** comp_head[w] = wid of a compound word with 1st component = w;			   comp_head[comp_head[w]] = next such compound word, and so on,			   until we hit BAD_S3WID.  NULL if no compound word in dict. */    s3wid_t startwid;	/** FOR INTERNAL-USE ONLY */    s3wid_t finishwid;	/** FOR INTERNAL-USE ONLY */    s3wid_t silwid;	/** FOR INTERNAL-USE ONLY */} dict_t;  /** * Initialize with given main and filler dictionary files.  fillerfile can be NULL * (but external modules might impose their own requirements). * Return ptr to dict_t if successful, NULL otherwise. */dict_t *dict_init (mdef_t *mdef,	/** For looking up CI phone IDs; NULL if none,					   in which case CI phones kept internally */		   char *dictfile,	/** Main dictionary file */		   char *fillerfile,	/** Filler dictionary file */		   char comp_sep);	/** Compound word separator character, or 0 if					   no compound words */  /** Return word id for given word string if present.  Otherwise return BAD_S3WID */s3wid_t dict_wordid (dict_t *d, char *word);  /** * Return 1 if w is a filler word, 0 if not.  A filler word is one that was read in from the * filler dictionary; however, sentence START and FINISH words are not filler words. */int32 dict_filler_word (dict_t *d, s3wid_t w);  /** * Add a word with the given ciphone pronunciation list to the dictionary. * Return value: Result word id if successful, BAD_S3WID otherwise */s3wid_t dict_add_word (dict_t *d, char *word, s3cipid_t *p, int32 np);  /** * Look for a compound word that matches the given word-id sequence. * Return value: Base ID of compound word if found, else BAD_S3WID. */s3wid_t dict_wids2compwid (dict_t *d,		/** In: Dictionary to look up */			   s3wid_t *wid,	/** In: Component words to look for */			   int32 len);		/** In: No. of component words */  /** * Return value: CI phone string for the given word, phone position. */const char *dict_ciphone_str (dict_t *d,	/** In: Dictionary to look up */			      s3wid_t wid,	/** In: Component word being looked up */			      int32 pos);	/** In: Pronunciation phone position */  /** Packaged macro access to dictionary members */#define dict_size(d)		((d)->n_word)#define dict_basewid(d,w)	((d)->word[w].basewid)#define dict_wordstr(d,w)	((d)->word[w].word)#define dict_nextalt(d,w)	((d)->word[w].alt)#define dict_pronlen(d,w)	((d)->word[w].pronlen)#define dict_pron(d,w,p)	((d)->word[w].ciphone[p])#define dict_filler_start(d)	((d)->filler_start)#define dict_filler_end(d)	((d)->filler_end)#define dict_startwid(d)	((d)->startwid)#define dict_finishwid(d)	((d)->finishwid)#define dict_silwid(d)		((d)->silwid)#define dict_first_phone(d,w)	((d)->word[w].ciphone[0])#define dict_last_phone(d,w)	((d)->word[w].ciphone[(d)->word[w].pronlen - 1])/* Function versions of some of the above macros; note the leading underscore. */  /** * Return base word id for given word id w (which may be itself).  w must be valid. */s3wid_t _dict_basewid (dict_t *d, s3wid_t w);  /** * Return word string for given word id, which must be valid. */char *_dict_wordstr (dict_t *d, s3wid_t wid);  /** * Return the next alternative word id for the given word id, which must be valid. * The returned id may be BAD_S3WID if there is none. */s3wid_t _dict_nextalt (dict_t *d, s3wid_t wid);  /** * If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative * pronunciation specification), strip that trailing portion from it.  Note that the given * string is modified. * Return value: If string was modified, the character position at which the original string * was truncated; otherwise -1. */int32 dict_word2basestr (char *word);  /* RAH, free memory allocated for the dictionary */  /** Free memory allocated for the dictionary */void dict_free (dict_t *d);#ifdef __cplusplus}#endif#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -