dict.c
来自「WinCE平台上的语音识别程序」· C语言 代码 · 共 1,292 行 · 第 1/3 页
C
1,292 行
E_ERROR("Dictionary full; cannot add word\n"); return -1; } wid = first_dummy++; new_entry = 1; } entry = dict->dict_list[wid]; if (!replace_dict_entry(dict, entry, word, pron, TRUE, new_entry)) return -1; hash_table_enter(dict->dict, entry->word, (void *) wid); return (wid);}static void_dict_list_add(dictT * dict, dict_entry_t * entry)/*------------------------------------------------------------*/{ if (!dict->dict_list) dict->dict_list = (dict_entry_t **) ckd_calloc(hash_table_size(dict->dict), sizeof(dict_entry_t *)); if (dict->dict_entry_count >= hash_table_size(dict->dict)) { E_FATAL("dict size (%d) exceeded\n", hash_table_size(dict->dict)); dict->dict_list = (dict_entry_t **) ckd_realloc(dict->dict_list, (hash_table_size(dict->dict) + 16) * sizeof(dict_entry_t *)); } dict->dict_list[dict->dict_entry_count++] = entry;}dict_entry_t *dict_get_entry(dictT * dict, int i){ return ((i < dict->dict_entry_count) ? dict->dict_list[i] : (dict_entry_t *) 0);}/* FIXME: could be extern inline */int32dict_count(dictT * dict){ return dict->dict_entry_count;}dictT *dict_new(void){ return ckd_calloc(sizeof(dictT), 1);}static voidrecordMissingTriphone(char *triphoneStr){ void * idx; char *cp; if (-1 == hash_table_lookup(mtpHT, triphoneStr, &idx)) { cp = ckd_salloc(triphoneStr); E_INFO("Missing triphone: %s\n", triphoneStr); hash_table_enter(mtpHT, cp, cp); }}glist_tdict_mtpList(void){ return mtpList;}static int32addToContextTable(char *diphone, hash_table_t * table, list_t * list){ void * idx; char *cp; if (-1 == hash_table_lookup(table, diphone, &idx)) { cp = ckd_salloc(diphone); idx = (void *) table->inuse; list_insert(list, cp); hash_table_enter(table, cp, idx); } return ((int32) idx);}static int32addToLeftContextTable(char *diphone){ return addToContextTable(diphone, lcHT, &lcList);}static int32addToRightContextTable(char *diphone){ return addToContextTable(diphone, rcHT, &rcList);}static intcmp(void const *a, void const *b){ return (*(int32 const *) a - *(int32 const *) b);}int32 *linkTable;static intcmpPT(void const *a, void const *b){ return (linkTable[*(int32 const *) a] - linkTable[*(int32 const *) b]);}static voidbuildEntryTable(list_t * list, int32 *** table_p){ int32 ciCount = phoneCiCount(); int32 i, j; char triphoneStr[128]; int32 silContext = 0; int32 triphoneContext = 0; int32 noContext = 0; int32 **table; *table_p = ckd_calloc(list->in_use, sizeof(int32 *)); table = *table_p; E_INFO("Entry Context table contains\n\t%6d entries\n", list->in_use); E_INFO("\t%6d possible cross word triphones.\n", list->in_use * ciCount); for (i = 0; i < list->in_use; i++) { table[i] = ckd_calloc(ciCount, sizeof(int32)); for (j = 0; j < ciCount; j++) { /* * Look for the triphone */ sprintf(triphoneStr, list->list[i], phone_from_id(j)); table[i][j] = phone_to_id(triphoneStr, FALSE); if (table[i][j] >= 0) triphoneContext++; /* * If we can't find the desired right context use "SIL" */ if (table[i][j] < 0) { sprintf(triphoneStr, list->list[i], "SIL"); table[i][j] = phone_to_id(triphoneStr, FALSE); if (table[i][j] >= 0) silContext++; } /* * If we can't find "SIL" use context indepedent */ if (table[i][j] < 0) { char stmp[32]; char *p; strcpy(stmp, list->list[i]); p = strchr(stmp, '('); *p = '\0'; table[i][j] = phone_to_id(stmp, TRUE); noContext++; } table[i][j] = bin_mdef_pid2ssid(mdef,phone_map(table[i][j])); } } E_INFO("\t%6d triphones\n\t%6d pseudo diphones\n\t%6d uniphones\n", triphoneContext, silContext, noContext);}static voidbuildExitTable(list_t * list, int32 *** table_p, int32 *** permuTab_p, int32 ** sizeTab_p){ int32 ciCount = phoneCiCount(); int32 i, j, k; char triphoneStr[128]; int32 silContext = 0; int32 triphoneContext = 0; int32 noContext = 0; int32 entries = 0; int32 **table; int32 **permuTab; int32 *sizeTab; int32 ptab[128]; *table_p = (int32 **) ckd_calloc_2d(list->in_use, ciCount + 1, sizeof(int32 *)); table = *table_p; *permuTab_p = (int32 **) ckd_calloc_2d(list->in_use, ciCount + 1, sizeof(int32 *)); permuTab = *permuTab_p; *sizeTab_p = ckd_calloc(list->in_use, sizeof(int32 *)); sizeTab = *sizeTab_p; E_INFO("Exit Context table contains\n\t%6d entries\n", list->in_use); E_INFO("\t%6d possible cross word triphones.\n", list->in_use * ciCount); for (i = 0; i < list->in_use; i++) { for (j = 0; j < ciCount; j++) { /* * Look for the triphone */ sprintf(triphoneStr, list->list[i], phone_from_id(j)); table[i][j] = phone_to_id(triphoneStr, FALSE); if (table[i][j] >= 0) triphoneContext++; /* * If we can't find the desired context use "SIL" */ if (table[i][j] < 0) { sprintf(triphoneStr, list->list[i], "SIL"); table[i][j] = phone_to_id(triphoneStr, FALSE); if (table[i][j] >= 0) silContext++; } /* * If we can't find "SIL" use context indepedent */ if (table[i][j] < 0) { char stmp[32]; char *p; strcpy(stmp, list->list[i]); p = strchr(stmp, '('); *p = '\0'; table[i][j] = phone_to_id(stmp, TRUE); noContext++; } table[i][j] = bin_mdef_pid2ssid(mdef,phone_map(table[i][j])); } } /* * Now compress the table to eliminate duplicate entries. */ for (i = 0; i < list->in_use; i++) { /* * Set up the permutation table */ for (k = 0; k < ciCount; k++) { ptab[k] = k; } linkTable = table[i]; qsort(ptab, ciCount, sizeof(int32), cmpPT); qsort(table[i], ciCount, sizeof(int32), cmp); for (k = 0, j = 0; j < ciCount; j++) { if (table[i][k] != table[i][j]) { k = k + 1; table[i][k] = table[i][j]; } /* * Mirror the compression in the permutation table */ permuTab[i][ptab[j]] = k; } table[i][k + 1] = -1; /* End of table Marker */ sizeTab[i] = k + 1; entries += k + 1; } E_INFO("\t%6d triphones\n\t%6d pseudo diphones\n\t%6d uniphones\n", triphoneContext, silContext, noContext); E_INFO("\t%6d right context entries\n", entries); E_INFO("\t%6d ave entries per exit context\n", ((list->in_use == 0) ? 0 : entries / list->in_use));}int32 **dict_right_context_fwd(void){ return rcFwdTable;}int32 **dict_right_context_fwd_perm(void){ return rcFwdPermTable;}int32 *dict_right_context_fwd_size(void){ return rcFwdSizeTable;}int32 **dict_left_context_fwd(void){ return lcFwdTable;}int32 **dict_right_context_bwd(void){ return rcBwdTable;}int32 **dict_left_context_bwd(void){ return lcBwdTable;}int32 **dict_left_context_bwd_perm(void){ return lcBwdPermTable;}int32 *dict_left_context_bwd_size(void){ return lcBwdSizeTable;}int32dict_get_num_main_words(dictT * dict){ return ((int32) dictStrToWordId(dict, cmd_ln_str("-lmendsym"), FALSE));}int32dictid_to_baseid(dictT * dict, int32 wid){ return (dict->dict_list[wid]->wid);}int32dict_get_first_initial_oov(void){ return (first_initial_oov);}int32dict_get_last_initial_oov(void){ return (last_initial_oov);}/* * Return TRUE iff wid is new word dynamically added at run time. */int32dict_is_new_word(int32 wid){ return ((wid >= initial_dummy) && (wid <= last_dummy));}int32dict_pron(dictT * dict, int32 w, int32 ** pron){ *pron = dict->dict_list[w]->ci_phone_ids; return (dict->dict_list[w]->len);}int32dict_next_alt(dictT * dict, int32 w){ return (dict->dict_list[w]->alt);}/* Write OOV words added at run time to the given file and return #words written */int32dict_write_oovdict(dictT * dict, char const *file){ int32 w, p; FILE *fp; /* If no new words added at run time, no need to write a new file */ if (initial_dummy == first_dummy) { E_ERROR("No new word added; no OOV file written\n"); return 0; } if ((fp = fopen(file, "w")) == NULL) { E_ERROR("fopen(%s,w) failed\n", file); return -1; } /* Write OOV words added at run time */ for (w = initial_dummy; w < first_dummy; w++) { fprintf(fp, "%s\t", dict->dict_list[w]->word); for (p = 0; p < dict->dict_list[w]->len; p++) fprintf(fp, " %s", phone_from_id(dict->dict_list[w]->ci_phone_ids[p])); fprintf(fp, "\n"); } fclose(fp); return (first_dummy - initial_dummy);}voiddict_dump(dictT * dict, FILE * out){ int32 w; dict_entry_t *de; int32 i; fprintf(out, "<dict>"); for (w = 0; w < dict->dict_entry_count; w++) { de = dict->dict_list[w]; fprintf(out, " <word index=\"%d\">\n", w); fprintf(out, " <string>%s</string>\n", de->word); fprintf(out, " <len>%d</len>\n", de->len); fprintf(out, " <ci>"); for (i = 0; i < de->len; i++) fprintf(out, " %d", de->ci_phone_ids[i]); fprintf(out, " </ci>\n"); fprintf(out, " <pid>"); for (i = 0; i < de->len; i++) fprintf(out, " %d", de->phone_ids[i]); fprintf(out, " </pid>\n"); fprintf(out, " <wid>%d</wid>\n", de->wid); fprintf(out, " <fwid>%d</fwid>\n", de->fwid); fprintf(out, " <alt>%d</alt>\n", de->alt); fprintf(out, " </word>\n\n"); fflush(out); } fprintf(out, "</dict>");}int32dict_is_filler_word(dictT * dict, int32 wid){ return (wid >= dict->filler_start);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?