📄 dict.c

📁 CMU大名鼎鼎的SPHINX－3大词汇量连续语音识别系统
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
dict_t *dict_init (mdef_t *mdef, char *dictfile, char *fillerfile, char comp_sep){    FILE *fp, *fp2;    int32 n ;    char line[1024];    dict_t *d;        if (! dictfile)	E_FATAL("No dictionary file\n");    /*     * First obtain #words in dictionary (for hash table allocation).     * Reason: The PC NT system doesn't like to grow memory gradually.  Better to allocate     * all the required memory in one go.     */    if ((fp = fopen(dictfile, "r")) == NULL)	E_FATAL_SYSTEM("fopen(%s,r) failed\n", dictfile);    n = 0;    while (fgets (line, sizeof(line), fp) != NULL) {	if (line[0] != '#')	    n++;    }    rewind (fp);    fp2 = NULL;    if (fillerfile) {	if ((fp2 = fopen(fillerfile, "r")) == NULL)	    E_FATAL_SYSTEM("fopen(%s,r) failed\n", fillerfile);	while (fgets (line, sizeof(line), fp2) != NULL) {	    if (line[0] != '#')		n++;	}	rewind (fp2);    }        /*     * Allocate dict entries.  HACK!!  Allow some extra entries for words not in file.     * Also check for type size restrictions.     */    d = (dict_t *) ckd_calloc (1, sizeof(dict_t)); /* freed in dict_free() */    d->max_words = (n+1024 < MAX_S3WID) ? n+1024 : MAX_S3WID;    if (n >= MAX_S3WID)	E_FATAL("#Words in dictionaries (%d) exceeds limit (%d)\n", n, MAX_S3WID);        d->word = (dictword_t *) ckd_calloc (d->max_words, sizeof(dictword_t)); /* freed in dict_free() */    d->n_word = 0;    d->mdef = mdef;    if (mdef) {	d->pht = NULL;	d->ciphone_str = NULL;    } else {	d->pht = hash_new (DEFAULT_NUM_PHONE, 1 /* No case */);	d->ciphone_str = (char **) ckd_calloc (DEFAULT_NUM_PHONE, sizeof(char *)); /* freed in dict_free() */    }    d->n_ciphone = 0;        /* Create new hash table for word strings; case-insensitive word strings */    d->ht = hash_new (d->max_words, 1 /* no-case */);    /* Initialize with no compound words */    d->comp_head = NULL;        /* Digest main dictionary file */    E_INFO("Reading main dictionary: %s\n", dictfile);    dict_read (fp, d);    fclose (fp);    E_INFO("%d words read\n", d->n_word);    /* Now the filler dictionary file, if it exists */    d->filler_start = d->n_word;    if (fillerfile) {        E_INFO("Reading filler dictionary: %s\n", fillerfile);	dict_read (fp2, d);	fclose (fp2);	E_INFO("%d words read\n", d->n_word - d->filler_start);    }    d->filler_end = d->n_word-1;    /* Initialize distinguished word-ids */    d->startwid = dict_wordid (d, S3_START_WORD);    d->finishwid = dict_wordid (d, S3_FINISH_WORD);    d->silwid = dict_wordid (d, S3_SILENCE_WORD);#if 0    if (NOT_S3WID(d->startwid))	E_WARN("%s not in dictionary\n", S3_START_WORD);    if (NOT_S3WID(d->finishwid))	E_WARN("%s not in dictionary\n", S3_FINISH_WORD);    if (NOT_S3WID(d->silwid))	E_WARN("%s not in dictionary\n", S3_SILENCE_WORD);#endif    /* Identify compound words if indicated */    if (comp_sep) {	E_INFO("Building compound words (separator = '%c')\n", comp_sep);	n = dict_build_comp (d, comp_sep);	E_INFO("%d compound words\n", n);    }        return d;}s3wid_t dict_wordid (dict_t *d, char *word){    int32 w;        assert (d);    assert (word);        if (hash_lookup (d->ht, word, &w) < 0)	return (BAD_S3WID);    return ((s3wid_t) w);}s3wid_t _dict_basewid (dict_t *d, s3wid_t w){    assert (d);    assert ((w >= 0) && (w < d->n_word));        return (d->word[w].basewid);}char *_dict_wordstr (dict_t *d, s3wid_t wid){    assert (d);    assert (IS_S3WID(wid) && (wid < d->n_word));        return (d->word[wid].word);}s3wid_t _dict_nextalt (dict_t *d, s3wid_t wid){    assert (d);    assert (IS_S3WID(wid) && (wid < d->n_word));        return (d->word[wid].alt);}int32 dict_filler_word (dict_t *d, s3wid_t w){    assert (d);    assert ((w >= 0) && (w < d->n_word));        w = dict_basewid(d, w);    if ((w == d->startwid) || (w == d->finishwid))	return 0;    if ((w >= d->filler_start) && (w <= d->filler_end))	return 1;    return 0;}s3wid_t dict_wids2compwid (dict_t *d, s3wid_t *wid, int32 len){    s3wid_t w;    int32 i;        if (! d->comp_head)	return BAD_S3WID;        assert (len > 1);        for (w = d->comp_head[wid[0]]; IS_S3WID(w); w = d->comp_head[w]) {	/* w is a compound word beginning with wid[0]; check if rest matches */	assert (d->word[w].n_comp > 1);	assert (d->word[w].comp[0] == wid[0]);		if (d->word[w].n_comp == len) {	    for (i = 0; (i < len) && (d->word[w].comp[i] == wid[i]); i++);	    if (i == len)		return (dict_basewid(d, w));	}    }    return BAD_S3WID;}int32 dict_word2basestr (char *word){    int32 i, len;        len = strlen(word);    if (word[len-1] == ')') {	for (i = len-2; (i > 0) && (word[i] != '('); --i);		if (i > 0) {	    /* The word is of the form <baseword>(...); strip from left-paren */	    word[i] = '\0';	    return i;	}    }    return -1;}/* RAH 4.19.01, try to free memory allocated by the calls above.   All testing I've done shows that this gets all the memory, however I've    likely not tested all cases.  */void dict_free (dict_t *d){  int i;  dictword_t *word;  if (d) { /* Clean up the dictionary stuff*/    /* First Step, free all memory allocated for each word */    for (i=0;i<d->n_word;i++) {      word = (dictword_t *) &(d->word[i]);      if (word->word) 	ckd_free ((void *)word->word);      if (word->ciphone)       ckd_free ((void *)word->ciphone);      if (word->comp) 	ckd_free ((void *)word->comp);    }        if (d->word)       ckd_free ((void *)d->word);    for (i=0;i<d->n_ciphone;i++) {      if (d->ciphone_str[i]) 	ckd_free ((void *)d->ciphone_str[i]);    }    if (d->comp_head)       ckd_free ((void *)d->comp_head);    if (d->ciphone_str)      ckd_free ((void *)d->ciphone_str);    if (d->pht)      hash_free (d->pht);    if (d->ht)      hash_free (d->ht);    ckd_free ((void *)d);  }}#if (_DICT_TEST_)main (int32 argc, char *argv[]){    mdef_t *m;    dict_t *d;    char wd[1024];    s3wid_t wid;    int32 p;        if (argc < 3)	E_FATAL("Usage: %s {mdeffile | NULL} dict [fillerdict]\n", argv[0]);        m = (strcmp (argv[1], "NULL") != 0) ? mdef_init (argv[1]) : NULL;    /*  d = dict_init (m, argv[2], ((argc > 3) ? argv[3] : NULL), '_'); */ /*  */  d = dict_init (m, argv[2], ((argc > 3) ? argv[3] : NULL), ' '); */ /* RAH, remove compound word separator */    #define _DICT_MEM_LEAK_TEST_ 0#if (_DICT_MEM_LEAK_TEST_)  if (0) { /* RAH For now, just exit so we can check for memory leaks */    strcpy (wd,"empty");    while ((strcmp(wd,"q") !=  0)) {	/* RAH, changed this from: for (;;) */#else    for (;;) {#endif	printf ("word> ");	scanf ("%s", wd);		wid = dict_wordid (d, wd);	if (NOT_S3WID(wid))	    E_ERROR("Unknown word\n");	else {	    for (wid = dict_basewid(d, wid); IS_S3WID(wid); wid = d->word[wid].alt) {		printf ("%s\t", dict_wordstr(d, wid));		for (p = 0; p < d->word[wid].pronlen; p++)		    printf (" %s", dict_ciphone_str (d, wid, p));		printf ("\n");	    }	}    }}#if (_DICT_MEM_LEAK_TEST_)  mdef_free (m);		/* RAH, added freeing of memory */  dict_free (d);		/* RAH, added freeing of the memory*/  exit (0);#endif}#endif
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -