📄 voca_load_htkdict.c
字号:
}/** * Append a single entry to the existing word dictionary. * * @param entry [in] dictionary entry string to be appended. * @param winfo [out] pointer to word dictionary to append the data. * @param hmminfo [in] HTK %HMM definition data. if NULL, phonemes are ignored. * @param ignore_tri_conv [in] TRUE if triphone conversion is ignored * * @return TRUE on success, FALSE on any error word. */booleanvoca_append_htkdict(char *entry, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv){ boolean ok_flag = TRUE; boolean do_conv = FALSE; if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv)) do_conv = TRUE; if (winfo->num >= winfo->maxnum) winfo_expand(winfo); strcpy(buf, entry); /* const buffer not allowed in voca_load_htkdict_line() */ voca_load_htkdict_line(buf, winfo->num, winfo, hmminfo, do_conv, &ok_flag); if (ok_flag == TRUE) { winfo->num++; /* re-compute maxwn */ set_maxwn(winfo); set_maxwlen(winfo); } else { if (winfo->errph_root != NULL) list_error(winfo); } return(ok_flag);}/** * Sub function to Add a dictionary entry line to the word dictionary. * * @param buf [i/o] buffer to hold the input string, will be modified in this function * @param vnum [in] current number of words in @a winfo * @param winfo [out] pointer to word dictionary to append the data. * @param hmminfo [in] HTK %HMM definition data. if NULL, phonemes are ignored. * @param do_conv [in] TRUE if performing triphone conversion * @param ok_flag [out] will be set to FALSE if an error occured for this input. * * @return FALSE if buf == "DICEND", else TRUE will be returned. */booleanvoca_load_htkdict_line(char *buf, int vnum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag){ char *ptmp, *lp = NULL, *p; static char cbuf[MAX_HMMNAME_LEN]; static HMM_Logical **tmpwseq = NULL; static int tmpmaxlen; int len; HMM_Logical *tmplg; boolean pok; if (strmatch(buf, "DICEND")) return FALSE; /* allocate temporal work area for the first call */ if (tmpwseq == NULL) { tmpmaxlen = PHONEMELEN_STEP; tmpwseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * tmpmaxlen); } /* backup whole line for debug output */ strcpy(bufbak, buf); /* GrammarEntry */ if ((ptmp = mystrtok(buf, " \t\n")) == NULL) { j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } winfo->wname[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp); /* just move pointer to next token */ if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) { j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; }#ifdef CLASS_NGRAM winfo->cprob[vnum] = 0.0; /* prob = 1.0, logprob = 0.0 */#endif if (ptmp[0] == '@') { /* class N-gram prob */#ifdef CLASS_NGRAM /* word probability within the class (for class N-gram) */ /* format: classname @classprob wordname [output] phoneseq */ /* classname equals to wname, and wordname will be omitted */ /* format: @%f (log scale) */ /* if "@" not found or "@0", it means class == word */ if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) { j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } if (ptmp[1] == '\0') { /* space between '@' and figures */ j_printerr("line %d: value after '@' missing, maybe wrong space?\n> %s\n", vnum+1, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } winfo->cprob[vnum] = atof(&(ptmp[1])); if (winfo->cprob[vnum] != 0.0) winfo->cwnum++; /* read next word entry (just skip them) */ if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) { j_printerr("line %d: corrupted data:\n> %s\n", vnum+1,bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } /* move to the next word entry */ if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) { j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; }#else /* ~CLASS_NGRAM */ j_printerr("line %d: cannot handle in-class word probability\n> %s\n", vnum+1, ptmp, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE;#endif /* CLASS_NGRAM */ } /* OutputString */ switch(ptmp[0]) { case '[': /* not transparent word */ winfo->is_transparent[vnum] = FALSE; ptmp = mystrtok_quotation(NULL, " \t\n", '[', ']', 0); break; case '{': /* transparent word */ winfo->is_transparent[vnum] = TRUE; ptmp = mystrtok_quotation(NULL, " \t\n", '{', '}', 0); break; default: j_printerr("line %d: missing output string??\n> %s\n", vnum+1, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } if (ptmp == NULL) { j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } winfo->woutput[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp); /* phoneme sequence */ if (hmminfo == NULL) { /* don't read */ winfo->wseq[vnum] = NULL; winfo->wlen[vnum] = 0; } else { /* store converted phone sequence to temporal bufffer */ len = 0; if (do_conv) { /* convert phoneme to triphone expression (word-internal) */ cycle_triphone(NULL); if ((lp = mystrtok(NULL, " \t\n")) == NULL) { j_printerr("line %d: word %s has no phoneme:\n> %s\n", vnum+1, winfo->wname[vnum], bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } cycle_triphone(lp); } pok = TRUE; for (;;) { if (do_conv) {/* if (lp != NULL) j_printf(" %d%s",len,lp);*/ if (lp != NULL) lp = mystrtok(NULL, " \t\n"); if (lp != NULL) p = cycle_triphone(lp); else p = cycle_triphone_flush(); } else { p = mystrtok(NULL, " \t\n"); } if (p == NULL) break; /* both defined/pseudo phone is allowed */ tmplg = htk_hmmdata_lookup_logical(hmminfo, p); if (tmplg == NULL) { /* not found */ if (do_conv) { /* both defined or pseudo phone are not found */ if (len == 0 && lp == NULL) { j_printerr("line %d: triphone \"*-%s+*\" or monophone \"%s\" not found\n", vnum+1, p, p); snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s+* or monophone %s", p, p); } else if (len == 0) { j_printerr("line %d: triphone \"*-%s\" or biphone \"%s\" not found\n", vnum+1, p, p); snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s or biphone %s", p, p); } else if (lp == NULL) { j_printerr("line %d: triphone \"%s+*\" or biphone \"%s\" not found\n", vnum+1, p, p); snprintf(cbuf,MAX_HMMNAME_LEN,"%s+* or biphone %s", p, p); } else { j_printerr("line %d: triphone \"%s\" not found\n", vnum+1, p); snprintf(cbuf,MAX_HMMNAME_LEN,"%s", p); } } else { j_printerr("line %d: phone \"%s\" not found\n", vnum+1, p); snprintf(cbuf, MAX_HMMNAME_LEN, "%s", p); } add_to_error(winfo, cbuf); pok = FALSE; } else { /* found */ if (len >= tmpmaxlen) { /* expand wseq area by PHONEMELEN_STEP */ tmpmaxlen += PHONEMELEN_STEP; tmpwseq = (HMM_Logical **)myrealloc(tmpwseq, sizeof(HMM_Logical *) * tmpmaxlen); } /* store to temporal buffer */ tmpwseq[len] = tmplg; } len++; } if (!pok) { /* error in phoneme */ j_printerr("> %s\n", bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } if (len == 0) { j_printerr("line %d: no phone specified:\n> %s\n", vnum+1, bufbak); winfo->errnum++; *ok_flag = FALSE; return TRUE; } /* store to winfo */ winfo->wseq[vnum] = (HMM_Logical **)mybmalloc2(sizeof(HMM_Logical *) * len, &(winfo->mroot)); memcpy(winfo->wseq[vnum], tmpwseq, sizeof(HMM_Logical *) * len); winfo->wlen[vnum] = len; } return(TRUE);}/** * @brief Convert whole words in word dictionary to word-internal triphone. * * Normally triphone conversion will be performed directly when reading * dictionary file. This function is for post conversion only. * * @param winfo [i/o] word dictionary information * @param hmminfo [in] HTK %HMM definition * * @return TRUE on success, FALSE on failure. */booleanvoca_mono2tri(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo){ WORD_ID w; int ph; char *p; HMM_Logical *tmplg; boolean ok_flag = TRUE; for (w=0;w<winfo->num;w++) { cycle_triphone(NULL); cycle_triphone(winfo->wseq[w][0]->name); for (ph = 0; ph < winfo->wlen[w] ; ph++) { if (ph == winfo->wlen[w] - 1) { p = cycle_triphone_flush(); } else { p = cycle_triphone(winfo->wseq[w][ph + 1]->name); } if ((tmplg = htk_hmmdata_lookup_logical(hmminfo, p)) == NULL) { j_printerr("voca_mono2tri: word \"%s[%s]\"(id=%d): HMM \"%s\" not found\n", winfo->wname[w], winfo->woutput[w], w, p); ok_flag = FALSE; continue; } winfo->wseq[w][ph] = tmplg; } } return (ok_flag);}/** * Append one word dictionary to other, for multiple grammar handling. * Assumes that the same %HMM definition is used on both word dictionary. * * @param dstinfo [i/o] word dictionary * @param srcinfo [in] word dictionary to be appended to @a dst * @param coffset [in] category id offset in @a dst where the new data * should be stored * @param woffset [in] word id offset in @a dst where the new data * should be stored */voidvoca_append(WORD_INFO *dstinfo, WORD_INFO *srcinfo, int coffset, int woffset){ WORD_ID n, w; int i; n = woffset; for(w=0;w<srcinfo->num;w++) { /* copy data */ dstinfo->wlen[n] = srcinfo->wlen[w]; dstinfo->wname[n] = strcpy((char *)mymalloc(strlen(srcinfo->wname[w])+1), srcinfo->wname[w]); dstinfo->woutput[n] = strcpy((char *)mymalloc(strlen(srcinfo->woutput[w])+1), srcinfo->woutput[w]); dstinfo->wseq[n] = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * srcinfo->wlen[w]); for(i=0;i<srcinfo->wlen[w];i++) { dstinfo->wseq[n][i] = srcinfo->wseq[w][i]; } dstinfo->is_transparent[n] = srcinfo->is_transparent[w]; /* offset category ID by coffset */ dstinfo->wton[n] = srcinfo->wton[w] + coffset; n++; if (n >= dstinfo->maxnum) winfo_expand(dstinfo); } dstinfo->num = n; /* compute maxwn */ set_maxwn(dstinfo); set_maxwlen(dstinfo);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -