📄 dict.c
字号:
dict_t *dict_init (mdef_t *mdef, char *dictfile, char *fillerfile, char comp_sep){ FILE *fp, *fp2; int32 n ; char line[1024]; dict_t *d; if (! dictfile) E_FATAL("No dictionary file\n"); /* * First obtain #words in dictionary (for hash table allocation). * Reason: The PC NT system doesn't like to grow memory gradually. Better to allocate * all the required memory in one go. */ if ((fp = fopen(dictfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", dictfile); n = 0; while (fgets (line, sizeof(line), fp) != NULL) { if (line[0] != '#') n++; } rewind (fp); fp2 = NULL; if (fillerfile) { if ((fp2 = fopen(fillerfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", fillerfile); while (fgets (line, sizeof(line), fp2) != NULL) { if (line[0] != '#') n++; } rewind (fp2); } /* * Allocate dict entries. HACK!! Allow some extra entries for words not in file. * Also check for type size restrictions. */ d = (dict_t *) ckd_calloc (1, sizeof(dict_t)); /* freed in dict_free() */ d->max_words = (n+1024 < MAX_S3WID) ? n+1024 : MAX_S3WID; if (n >= MAX_S3WID) E_FATAL("#Words in dictionaries (%d) exceeds limit (%d)\n", n, MAX_S3WID); d->word = (dictword_t *) ckd_calloc (d->max_words, sizeof(dictword_t)); /* freed in dict_free() */ d->n_word = 0; d->mdef = mdef; if (mdef) { d->pht = NULL; d->ciphone_str = NULL; } else { d->pht = hash_new (DEFAULT_NUM_PHONE, 1 /* No case */); d->ciphone_str = (char **) ckd_calloc (DEFAULT_NUM_PHONE, sizeof(char *)); /* freed in dict_free() */ } d->n_ciphone = 0; /* Create new hash table for word strings; case-insensitive word strings */ d->ht = hash_new (d->max_words, 1 /* no-case */); /* Initialize with no compound words */ d->comp_head = NULL; /* Digest main dictionary file */ E_INFO("Reading main dictionary: %s\n", dictfile); dict_read (fp, d); fclose (fp); E_INFO("%d words read\n", d->n_word); /* Now the filler dictionary file, if it exists */ d->filler_start = d->n_word; if (fillerfile) { E_INFO("Reading filler dictionary: %s\n", fillerfile); dict_read (fp2, d); fclose (fp2); E_INFO("%d words read\n", d->n_word - d->filler_start); } d->filler_end = d->n_word-1; /* Initialize distinguished word-ids */ d->startwid = dict_wordid (d, S3_START_WORD); d->finishwid = dict_wordid (d, S3_FINISH_WORD); d->silwid = dict_wordid (d, S3_SILENCE_WORD);#if 0 if (NOT_S3WID(d->startwid)) E_WARN("%s not in dictionary\n", S3_START_WORD); if (NOT_S3WID(d->finishwid)) E_WARN("%s not in dictionary\n", S3_FINISH_WORD); if (NOT_S3WID(d->silwid)) E_WARN("%s not in dictionary\n", S3_SILENCE_WORD);#endif /* Identify compound words if indicated */ if (comp_sep) { E_INFO("Building compound words (separator = '%c')\n", comp_sep); n = dict_build_comp (d, comp_sep); E_INFO("%d compound words\n", n); } return d;}s3wid_t dict_wordid (dict_t *d, char *word){ int32 w; assert (d); assert (word); if (hash_lookup (d->ht, word, &w) < 0) return (BAD_S3WID); return ((s3wid_t) w);}s3wid_t _dict_basewid (dict_t *d, s3wid_t w){ assert (d); assert ((w >= 0) && (w < d->n_word)); return (d->word[w].basewid);}char *_dict_wordstr (dict_t *d, s3wid_t wid){ assert (d); assert (IS_S3WID(wid) && (wid < d->n_word)); return (d->word[wid].word);}s3wid_t _dict_nextalt (dict_t *d, s3wid_t wid){ assert (d); assert (IS_S3WID(wid) && (wid < d->n_word)); return (d->word[wid].alt);}int32 dict_filler_word (dict_t *d, s3wid_t w){ assert (d); assert ((w >= 0) && (w < d->n_word)); w = dict_basewid(d, w); if ((w == d->startwid) || (w == d->finishwid)) return 0; if ((w >= d->filler_start) && (w <= d->filler_end)) return 1; return 0;}s3wid_t dict_wids2compwid (dict_t *d, s3wid_t *wid, int32 len){ s3wid_t w; int32 i; if (! d->comp_head) return BAD_S3WID; assert (len > 1); for (w = d->comp_head[wid[0]]; IS_S3WID(w); w = d->comp_head[w]) { /* w is a compound word beginning with wid[0]; check if rest matches */ assert (d->word[w].n_comp > 1); assert (d->word[w].comp[0] == wid[0]); if (d->word[w].n_comp == len) { for (i = 0; (i < len) && (d->word[w].comp[i] == wid[i]); i++); if (i == len) return (dict_basewid(d, w)); } } return BAD_S3WID;}int32 dict_word2basestr (char *word){ int32 i, len; len = strlen(word); if (word[len-1] == ')') { for (i = len-2; (i > 0) && (word[i] != '('); --i); if (i > 0) { /* The word is of the form <baseword>(...); strip from left-paren */ word[i] = '\0'; return i; } } return -1;}/* RAH 4.19.01, try to free memory allocated by the calls above. All testing I've done shows that this gets all the memory, however I've likely not tested all cases. */void dict_free (dict_t *d){ int i; dictword_t *word; if (d) { /* Clean up the dictionary stuff*/ /* First Step, free all memory allocated for each word */ for (i=0;i<d->n_word;i++) { word = (dictword_t *) &(d->word[i]); if (word->word) ckd_free ((void *)word->word); if (word->ciphone) ckd_free ((void *)word->ciphone); if (word->comp) ckd_free ((void *)word->comp); } if (d->word) ckd_free ((void *)d->word); for (i=0;i<d->n_ciphone;i++) { if (d->ciphone_str[i]) ckd_free ((void *)d->ciphone_str[i]); } if (d->comp_head) ckd_free ((void *)d->comp_head); if (d->ciphone_str) ckd_free ((void *)d->ciphone_str); if (d->pht) hash_free (d->pht); if (d->ht) hash_free (d->ht); ckd_free ((void *)d); }}#if (_DICT_TEST_)main (int32 argc, char *argv[]){ mdef_t *m; dict_t *d; char wd[1024]; s3wid_t wid; int32 p; if (argc < 3) E_FATAL("Usage: %s {mdeffile | NULL} dict [fillerdict]\n", argv[0]); m = (strcmp (argv[1], "NULL") != 0) ? mdef_init (argv[1]) : NULL; /* d = dict_init (m, argv[2], ((argc > 3) ? argv[3] : NULL), '_'); */ /* */ d = dict_init (m, argv[2], ((argc > 3) ? argv[3] : NULL), ' '); */ /* RAH, remove compound word separator */ #define _DICT_MEM_LEAK_TEST_ 0#if (_DICT_MEM_LEAK_TEST_) if (0) { /* RAH For now, just exit so we can check for memory leaks */ strcpy (wd,"empty"); while ((strcmp(wd,"q") != 0)) { /* RAH, changed this from: for (;;) */#else for (;;) {#endif printf ("word> "); scanf ("%s", wd); wid = dict_wordid (d, wd); if (NOT_S3WID(wid)) E_ERROR("Unknown word\n"); else { for (wid = dict_basewid(d, wid); IS_S3WID(wid); wid = d->word[wid].alt) { printf ("%s\t", dict_wordstr(d, wid)); for (p = 0; p < d->word[wid].pronlen; p++) printf (" %s", dict_ciphone_str (d, wid, p)); printf ("\n"); } } }}#if (_DICT_MEM_LEAK_TEST_) mdef_free (m); /* RAH, added freeing of memory */ dict_free (d); /* RAH, added freeing of the memory*/ exit (0);#endif}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -