📄 mdef.c
字号:
else if (strcmp (word, "n/a") == 0) m->ciphone[(int)ci].filler = 0; else E_FATAL("Bad filler attribute field: %s\n", line); triphone_add (m, ci, BAD_S3CIPID, BAD_S3CIPID, WORD_POSN_UNDEFINED, p); /* Parse remainder of line: transition matrix and state->senone mappings */ parse_tmat_senmap (m, line, lp-line, p);}static void parse_tri_line (mdef_t *m, char *line, s3pid_t p){ int32 wlen; char word[1024], *lp; s3cipid_t ci, lc, rc; word_posn_t wpos = WORD_POSN_BEGIN; lp = line; /* Read base phone name */ if (sscanf (lp, "%s%n", word, &wlen) != 1) E_FATAL("Missing base phone name: %s\n", line); lp += wlen; ci = mdef_ciphone_id (m, word); if (NOT_S3CIPID(ci)) E_FATAL("Unknown base phone: %s\n", line); /* Read lc */ if (sscanf (lp, "%s%n", word, &wlen) != 1) E_FATAL("Missing left context: %s\n", line); lp += wlen; lc = mdef_ciphone_id (m, word); if (NOT_S3CIPID(lc)) E_FATAL("Unknown left context: %s\n", line); /* Read rc */ if (sscanf (lp, "%s%n", word, &wlen) != 1) E_FATAL("Missing right context: %s\n", line); lp += wlen; rc = mdef_ciphone_id (m, word); if (NOT_S3CIPID(rc)) E_FATAL("Unknown right context: %s\n", line); /* Read tripone word-position within word */ if ((sscanf (lp, "%s%n", word, &wlen) != 1) || (word[1] != '\0')) E_FATAL("Missing or bad word-position spec: %s\n", line); lp += wlen; switch (word[0]) { case 'b': wpos = WORD_POSN_BEGIN; break; case 'e': wpos = WORD_POSN_END; break; case 's': wpos = WORD_POSN_SINGLE; break; case 'i': wpos = WORD_POSN_INTERNAL; break; default: E_FATAL("Bad word-position spec: %s\n", line); } /* Read filler attribute, if present. Must match base phone attribute */ if (sscanf (lp, "%s%n", word, &wlen) != 1) E_FATAL("Missing filler attribute field: %s\n", line); lp += wlen; if (((strcmp (word, "filler") == 0) && (m->ciphone[(int)ci].filler)) || ((strcmp (word, "n/a") == 0) && (! m->ciphone[(int)ci].filler))) { /* Everything is fine */ } else E_FATAL("Bad filler attribute field: %s\n", line); triphone_add (m, ci, lc, rc, wpos, p); /* Parse remainder of line: transition matrix and state->senone mappings */ parse_tmat_senmap (m, line, lp-line, p);}static void sseq_compress (mdef_t *m){ hash_table_t *h; s3senid_t **sseq; int32 n_sseq; int32 p, j, k; glist_t g; gnode_t *gn; hash_entry_t *he; k = m->n_emit_state * sizeof(s3senid_t); h = hash_new (m->n_phone, HASH_CASE_YES); n_sseq = 0; /* Identify unique senone-sequence IDs. BUG: tmat-id not being considered!! */ for (p = 0; p < m->n_phone; p++) { /* Add senone sequence to hash table */ if ((j = hash_enter_bkey (h, (char *)(m->sseq[p]), k, n_sseq)) == n_sseq) n_sseq++; m->phone[p].ssid = j; } /* Generate compacted sseq table */ sseq = (s3senid_t **) ckd_calloc_2d (n_sseq, m->n_emit_state, sizeof(s3senid_t));/* freed in mdef_free() */ g = hash_tolist (h, &j); assert (j == n_sseq); for (gn = g; gn; gn = gnode_next(gn)) { he = (hash_entry_t *) gnode_ptr (gn); j = hash_entry_val(he); memcpy (sseq[j], hash_entry_key(he), k); } glist_free (g); /* Free the old, temporary senone sequence table, replace with compacted one */ ckd_free_2d ((void **) m->sseq); m->sseq = sseq; m->n_sseq = n_sseq; hash_free (h);}static int32 noncomment_line(char *line, int32 size, FILE *fp){ while (fgets (line, size, fp) != NULL) { if (line[0] != '#') return 0; } return -1;}/* * Initialize phones (ci and triphones) and state->senone mappings from .mdef file. */mdef_t *mdef_init (char *mdeffile){ FILE *fp; int32 n_ci, n_tri, n_map, n; char tag[1024], buf[1024]; s3senid_t **senmap; s3senid_t *tempsenmap; s3pid_t p; int32 s, ci, cd; mdef_t *m; int32 *cdsen_start, *cdsen_end; if (! mdeffile) E_FATAL("No mdef-file\n"); E_INFO("Reading model definition: %s\n", mdeffile); m = (mdef_t *) ckd_calloc (1, sizeof(mdef_t)); /* freed in mdef_free */ if ((fp = fopen(mdeffile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", mdeffile); if (noncomment_line(buf, sizeof(buf), fp) < 0) E_FATAL("Empty file: %s\n", mdeffile); if (strncmp(buf, MODEL_DEF_VERSION, strlen(MODEL_DEF_VERSION)) != 0) E_FATAL("Version error: Expecing %s, but read %s\n", MODEL_DEF_VERSION, buf); /* Read #base phones, #triphones, #senone mappings defined in header */ n_ci = -1; n_tri = -1; n_map = -1; m->n_ci_sen = -1; m->n_sen = -1; m->n_tmat = -1; do { if (noncomment_line(buf, sizeof(buf), fp) < 0) E_FATAL("Incomplete header\n"); if ((sscanf(buf, "%d %s", &n, tag) != 2) || (n < 0)) E_FATAL("Error in header: %s\n", buf); if (strcmp(tag, "n_base") == 0) n_ci = n; else if (strcmp(tag, "n_tri") == 0) n_tri = n; else if (strcmp(tag, "n_state_map") == 0) n_map = n; else if (strcmp(tag, "n_tied_ci_state") == 0) m->n_ci_sen = n; else if (strcmp(tag, "n_tied_state") == 0) m->n_sen = n; else if (strcmp(tag, "n_tied_tmat") == 0) m->n_tmat = n; else E_FATAL("Unknown header line: %s\n", buf); } while ((n_ci < 0) || (n_tri < 0) || (n_map < 0) || (m->n_ci_sen < 0) || (m->n_sen < 0) || (m->n_tmat < 0)); if ((n_ci == 0) || (m->n_ci_sen == 0) || (m->n_tmat == 0) || (m->n_ci_sen > m->n_sen)) E_FATAL("%s: Error in header\n", mdeffile); /* Check typesize limits */ if (n_ci >= MAX_S3CIPID) E_FATAL("%s: #CI phones (%d) exceeds limit (%d)\n", mdeffile, n_ci, MAX_S3CIPID); if (n_ci + n_tri >= MAX_S3PID) E_FATAL("%s: #Phones (%d) exceeds limit (%d)\n", mdeffile, n_ci+n_tri, MAX_S3PID); if (m->n_sen >= MAX_S3SENID) E_FATAL("%s: #senones (%d) exceeds limit (%d)\n", mdeffile, m->n_sen, MAX_S3SENID); if (m->n_tmat >= MAX_S3TMATID) E_FATAL("%s: #tmats (%d) exceeds limit (%d)\n", mdeffile, m->n_tmat, MAX_S3TMATID); m->n_emit_state = (n_map / (n_ci+n_tri)) - 1; if ((m->n_emit_state+1) * (n_ci+n_tri) != n_map) E_FATAL("Header error: n_state_map not a multiple of n_ci*n_tri\n"); /* Initialize ciphone info */ m->n_ciphone = n_ci; m->ciphone_ht = hash_new (n_ci, 1); /* With case-insensitive string names */ /* freed in mdef_free */ m->ciphone = (ciphone_t *) ckd_calloc (n_ci, sizeof(ciphone_t)); /* freed in mdef_free */ /* RAH, let's null the pointers so that we can reliably deallocate them */ /* for (i=0;i<m->n_ciphone;i++) { */ /* */ /*m->ciphone[i].name = NULL; */ /* */ /*} */ /* */ /* Initialize phones info (ciphones + triphones) */ m->n_phone = n_ci + n_tri; m->phone = (phone_t *) ckd_calloc (m->n_phone, sizeof(phone_t)); /* freed in mdef_free */ /* Allocate space for state->senone map for each phone */ /* Fast decoder-specific */ senmap = (s3senid_t **) ckd_calloc_2d (m->n_phone, m->n_emit_state, sizeof(s3senid_t));/* freed in mdef_free */ m->sseq = senmap; /* TEMPORARY; until it is compressed into just the unique ones */ /* Flat decoder-specific */ /* Allocate space for state->senone map for each phone */ /* ARCHAN 20040820, this sacrifice readability and may cause pointer problems in future. However, this is a less evil than duplication of code. This is trick point all the state mapping to the global mapping and avoid duplicated memory. */ /* S3 xwdpid_compress will compress the below list phone list. */ /* ARCHAN, this part should not be used when one of the recognizer is used. */ tempsenmap = (s3senid_t *) ckd_calloc (m->n_phone * m->n_emit_state, sizeof(s3senid_t)); for (p = 0; p < m->n_phone; p++) m->phone[p].state = tempsenmap + (p * m->n_emit_state); /* Allocate initial space for <ci,lc,rc,wpos> -> pid mapping */ m->wpos_ci_lclist = (ph_lc_t ***) ckd_calloc_2d (N_WORD_POSN, m->n_ciphone, sizeof(ph_lc_t *)); /* freed in mdef_free */ /* * Read base phones and triphones. They'll simply be assigned a running sequence * number as their "phone-id". If the phone-id < n_ci, it's a ciphone. */ /* Read base phones */ for (p = 0; p < n_ci; p++) { if (noncomment_line(buf, sizeof(buf), fp) < 0) E_FATAL("Premature EOF reading CIphone %d\n", p); parse_base_line (m, buf, p); } m->sil = mdef_ciphone_id (m, S3_SILENCE_CIPHONE); /* Read triphones, if any */ for (; p < m->n_phone; p++) { if (noncomment_line(buf, sizeof(buf), fp) < 0) E_FATAL("Premature EOF reading phone %d\n", p); parse_tri_line (m, buf, p); } if (noncomment_line(buf, sizeof(buf), fp) >= 0) E_ERROR("Non-empty file beyond expected #phones (%d)\n", m->n_phone); /* Build CD senones to CI senones map */ if (m->n_ciphone * m->n_emit_state != m->n_ci_sen) E_FATAL("#CI-senones(%d) != #CI-phone(%d) x #emitting-states(%d)\n", m->n_ci_sen, m->n_ciphone, m->n_emit_state); m->cd2cisen = (s3senid_t *) ckd_calloc (m->n_sen, sizeof(s3senid_t)); /* freed in mdef_free */ m->sen2cimap = (s3cipid_t *) ckd_calloc (m->n_sen, sizeof(s3cipid_t)); /* freed in mdef_free */ for (s = 0; s < m->n_sen; s++) m->sen2cimap[s] = BAD_S3CIPID; for (s = 0; s < m->n_ci_sen; s++) { /* CI senones */ m->cd2cisen[s] = (s3senid_t) s; m->sen2cimap[s] = s / m->n_emit_state; } for (p = n_ci; p < m->n_phone; p++) { /* CD senones */ for (s = 0; s < m->n_emit_state; s++) { cd = m->sseq[p][s]; ci = m->sseq[(int)m->phone[p].ci][s]; m->cd2cisen[cd] = (s3senid_t) ci; m->sen2cimap[cd] = m->phone[p].ci; } } /* * Count #senones (CI+CD) for each CI phone. * HACK!! For handling holes in senone-CIphone mappings. Does not work if holes * are present at the beginning or end of senones for a given CIphone. */ cdsen_start = (int32 *) ckd_calloc (m->n_ciphone, sizeof(int32)); /* freed locally */ cdsen_end = (int32 *) ckd_calloc (m->n_ciphone, sizeof(int32)); /* freed locally */ for (s = m->n_ci_sen; s < m->n_sen; s++) { if (NOT_S3CIPID(m->sen2cimap[s])) continue; if (! cdsen_start[(int)m->sen2cimap[s]]) cdsen_start[(int)m->sen2cimap[s]] = s; cdsen_end[(int)m->sen2cimap[s]] = s; } /* Fill up holes */ for (s = m->n_ci_sen; s < m->n_sen; s++) { if (IS_S3CIPID(m->sen2cimap[s])) continue; /* Check if properly inside the observed ranges above */ for (p = 0; p < m->n_ciphone; p++) { if ((s > cdsen_start[p]) && (s < cdsen_end[p])) break; } if (p >= m->n_ciphone) E_FATAL("Unreferenced senone %d; cannot determine parent CIphone\n", s); m->sen2cimap[s] = p; } /* Build #CD-senones for each CIphone */ m->ciphone2n_cd_sen = (int32 *) ckd_calloc (m->n_ciphone, sizeof(int32));/* freed mdef_free */ n = 0; for (p = 0; p < m->n_ciphone; p++) { if (cdsen_start[p] > 0) { m->ciphone2n_cd_sen[p] = cdsen_end[p] - cdsen_start[p] + 1; n += m->ciphone2n_cd_sen[p]; } } n += m->n_ci_sen; assert (n == m->n_sen); ckd_free (cdsen_start); ckd_free (cdsen_end); sseq_compress (m); E_INFO("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n", m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state, m->n_ci_sen, m->n_sen, m->n_sseq); fclose (fp); return m;}void mdef_sseq2sen_active (mdef_t *mdef, int32 *sseq, int32 *sen){ int32 ss, i; s3senid_t *sp; for (ss = 0; ss < mdef_n_sseq(mdef); ss++) { if (sseq[ss]) { sp = mdef->sseq[ss]; for (i = 0; i < mdef_n_emit_state(mdef); i++) sen[sp[i]] = 1; } }}/* RAH 4.23.01, Need to step down the ->next list to see if there are any more things to free *//* RAH 4.19.01, Attempt to free memory that was allocated within this module I have not verified that all the memory has been freed. I've taken only a reasonable effort for now. RAH 4.24.01 - verified that all memory is released. */void mdef_free_recursive_lc (ph_lc_t *lc){ if (lc == NULL) return; if (lc->rclist) mdef_free_recursive_rc (lc->rclist); if (lc->next) mdef_free_recursive_lc (lc->next); ckd_free ((void *) lc);}void mdef_free_recursive_rc (ph_rc_t *rc){ if (rc == NULL) return; if (rc->next) mdef_free_recursive_rc (rc->next); ckd_free ((void *) rc);}/* RAH, Free memory that was allocated in mdef_init Rational purify shows that no leaks exist */ void mdef_free (mdef_t *m){ int i,j; if (m) { if (m->ciphone2n_cd_sen) ckd_free ((void *)m->ciphone2n_cd_sen); if (m->sen2cimap) ckd_free ((void *)m->sen2cimap); if (m->cd2cisen) ckd_free ((void *)m->cd2cisen); /* RAH, go down the ->next list and delete all the pieces */ for (i=0;i<N_WORD_POSN;i++) for (j=0;j<m->n_ciphone;j++) if (m->wpos_ci_lclist[i][j]) { mdef_free_recursive_lc (m->wpos_ci_lclist[i][j]->next); mdef_free_recursive_rc (m->wpos_ci_lclist[i][j]->rclist); } for (i=0;i<N_WORD_POSN;i++) for (j=0;j<m->n_ciphone;j++) if (m->wpos_ci_lclist[i][j]) ckd_free ((void *) m->wpos_ci_lclist[i][j]); if (m->wpos_ci_lclist) ckd_free_2d ((void *)m->wpos_ci_lclist); if (m->sseq) ckd_free_2d ((void *)m->sseq); /* Free phone context */ if (m->phone) ckd_free ((void *)m->phone); if (m->ciphone_ht) hash_free (m->ciphone_ht); for (i=0;i<m->n_ciphone;i++) { if (m->ciphone[i].name) ckd_free ((void *)m->ciphone[i].name); } if (m->ciphone) ckd_free ((void *)m->ciphone); ckd_free ((void *)m); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -