⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mdef.c

📁 CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统
💻 C
📖 第 1 页 / 共 2 页
字号:
    else if (strcmp (word, "n/a") == 0)        m->ciphone[(int)ci].filler = 0;    else        E_FATAL("Bad filler attribute field: %s\n", line);    triphone_add (m, ci, BAD_S3CIPID, BAD_S3CIPID, WORD_POSN_UNDEFINED, p);    /* Parse remainder of line: transition matrix and state->senone mappings */    parse_tmat_senmap (m, line, lp-line, p);}static void parse_tri_line (mdef_t *m, char *line, s3pid_t p){    int32 wlen;    char word[1024], *lp;    s3cipid_t ci, lc, rc;    word_posn_t wpos = WORD_POSN_BEGIN;    lp = line;        /* Read base phone name */    if (sscanf (lp, "%s%n", word, &wlen) != 1)	E_FATAL("Missing base phone name: %s\n", line);    lp += wlen;    ci = mdef_ciphone_id (m, word);    if (NOT_S3CIPID(ci))        E_FATAL("Unknown base phone: %s\n", line);    /* Read lc */    if (sscanf (lp, "%s%n", word, &wlen) != 1)	E_FATAL("Missing left context: %s\n", line);    lp += wlen;    lc = mdef_ciphone_id (m, word);    if (NOT_S3CIPID(lc))        E_FATAL("Unknown left context: %s\n", line);    /* Read rc */    if (sscanf (lp, "%s%n", word, &wlen) != 1)	E_FATAL("Missing right context: %s\n", line);    lp += wlen;    rc = mdef_ciphone_id (m, word);    if (NOT_S3CIPID(rc))        E_FATAL("Unknown right  context: %s\n", line);        /* Read tripone word-position within word */    if ((sscanf (lp, "%s%n", word, &wlen) != 1) || (word[1] != '\0'))        E_FATAL("Missing or bad word-position spec: %s\n", line);    lp += wlen;    switch (word[0]) {    case 'b': wpos = WORD_POSN_BEGIN; break;    case 'e': wpos = WORD_POSN_END; break;    case 's': wpos = WORD_POSN_SINGLE; break;    case 'i': wpos = WORD_POSN_INTERNAL; break;    default: E_FATAL("Bad word-position spec: %s\n", line);    }    /* Read filler attribute, if present.  Must match base phone attribute */    if (sscanf (lp, "%s%n", word, &wlen) != 1)	E_FATAL("Missing filler attribute field: %s\n", line);    lp += wlen;    if (((strcmp (word, "filler") == 0) && (m->ciphone[(int)ci].filler)) ||	((strcmp (word, "n/a") == 0) && (! m->ciphone[(int)ci].filler))) {	/* Everything is fine */    } else        E_FATAL("Bad filler attribute field: %s\n", line);        triphone_add (m, ci, lc, rc, wpos, p);    /* Parse remainder of line: transition matrix and state->senone mappings */    parse_tmat_senmap (m, line, lp-line, p);}static void sseq_compress (mdef_t *m){    hash_table_t *h;    s3senid_t **sseq;    int32 n_sseq;    int32 p, j, k;    glist_t g;    gnode_t *gn;    hash_entry_t *he;        k = m->n_emit_state * sizeof(s3senid_t);        h = hash_new (m->n_phone, HASH_CASE_YES);    n_sseq = 0;        /* Identify unique senone-sequence IDs.  BUG: tmat-id not being considered!! */    for (p = 0; p < m->n_phone; p++) {	/* Add senone sequence to hash table */	if ((j = hash_enter_bkey (h, (char *)(m->sseq[p]), k, n_sseq)) == n_sseq)	    n_sseq++;		m->phone[p].ssid = j;    }        /* Generate compacted sseq table */    sseq = (s3senid_t **) ckd_calloc_2d (n_sseq, m->n_emit_state, sizeof(s3senid_t));/* freed in mdef_free() */        g = hash_tolist (h, &j);    assert (j == n_sseq);        for (gn = g; gn; gn = gnode_next(gn)) {	he = (hash_entry_t *) gnode_ptr (gn);	j = hash_entry_val(he);	memcpy (sseq[j], hash_entry_key(he), k);    }    glist_free (g);        /* Free the old, temporary senone sequence table, replace with compacted one */    ckd_free_2d ((void **) m->sseq);    m->sseq = sseq;    m->n_sseq = n_sseq;        hash_free (h);}static int32 noncomment_line(char *line, int32 size, FILE *fp){    while (fgets (line, size, fp) != NULL) {        if (line[0] != '#')	    return 0;    }    return -1;}/* * Initialize phones (ci and triphones) and state->senone mappings from .mdef file. */mdef_t *mdef_init (char *mdeffile){    FILE *fp;    int32 n_ci, n_tri, n_map, n;    char tag[1024], buf[1024];    s3senid_t **senmap;    s3senid_t *tempsenmap;    s3pid_t p;    int32 s, ci, cd;    mdef_t *m;    int32 *cdsen_start, *cdsen_end;    if (! mdeffile)	E_FATAL("No mdef-file\n");    E_INFO("Reading model definition: %s\n", mdeffile);    m = (mdef_t *) ckd_calloc (1, sizeof(mdef_t)); /* freed in mdef_free */        if ((fp = fopen(mdeffile, "r")) == NULL)        E_FATAL_SYSTEM("fopen(%s,r) failed\n", mdeffile);    if (noncomment_line(buf, sizeof(buf), fp) < 0)        E_FATAL("Empty file: %s\n", mdeffile);    if (strncmp(buf, MODEL_DEF_VERSION, strlen(MODEL_DEF_VERSION)) != 0)        E_FATAL("Version error: Expecing %s, but read %s\n", MODEL_DEF_VERSION, buf);    /* Read #base phones, #triphones, #senone mappings defined in header */    n_ci = -1;    n_tri = -1;    n_map = -1;    m->n_ci_sen = -1;    m->n_sen = -1;    m->n_tmat = -1;    do {	if (noncomment_line(buf, sizeof(buf), fp) < 0)	    E_FATAL("Incomplete header\n");	if ((sscanf(buf, "%d %s", &n, tag) != 2) || (n < 0))	    E_FATAL("Error in header: %s\n", buf);	if (strcmp(tag, "n_base") == 0)	    n_ci = n;	else if (strcmp(tag, "n_tri") == 0)	    n_tri = n;	else if (strcmp(tag, "n_state_map") == 0)	    n_map = n;	else if (strcmp(tag, "n_tied_ci_state") == 0)	    m->n_ci_sen = n;	else if (strcmp(tag, "n_tied_state") == 0)	    m->n_sen = n;	else if (strcmp(tag, "n_tied_tmat") == 0)	    m->n_tmat = n;	else	    E_FATAL("Unknown header line: %s\n", buf);    } while ((n_ci < 0) || (n_tri < 0) || (n_map < 0) ||	     (m->n_ci_sen < 0) || (m->n_sen < 0) || (m->n_tmat < 0));    if ((n_ci == 0) || (m->n_ci_sen == 0) || (m->n_tmat == 0) || (m->n_ci_sen > m->n_sen))        E_FATAL("%s: Error in header\n", mdeffile);        /* Check typesize limits */    if (n_ci >= MAX_S3CIPID)	E_FATAL("%s: #CI phones (%d) exceeds limit (%d)\n", mdeffile, n_ci, MAX_S3CIPID);    if (n_ci + n_tri >= MAX_S3PID)	E_FATAL("%s: #Phones (%d) exceeds limit (%d)\n", mdeffile, n_ci+n_tri, MAX_S3PID);    if (m->n_sen >= MAX_S3SENID)	E_FATAL("%s: #senones (%d) exceeds limit (%d)\n", mdeffile, m->n_sen, MAX_S3SENID);    if (m->n_tmat >= MAX_S3TMATID)	E_FATAL("%s: #tmats (%d) exceeds limit (%d)\n", mdeffile, m->n_tmat, MAX_S3TMATID);        m->n_emit_state = (n_map / (n_ci+n_tri)) - 1;    if ((m->n_emit_state+1) * (n_ci+n_tri) != n_map)        E_FATAL("Header error: n_state_map not a multiple of n_ci*n_tri\n");    /* Initialize ciphone info */    m->n_ciphone = n_ci;    m->ciphone_ht = hash_new (n_ci, 1);	/* With case-insensitive string names */ /* freed in mdef_free */    m->ciphone = (ciphone_t *) ckd_calloc (n_ci, sizeof(ciphone_t)); /* freed in mdef_free */    /* RAH, let's null the pointers so that we can reliably deallocate them */    /*    for (i=0;i<m->n_ciphone;i++) { */ /*  */    /*m->ciphone[i].name = NULL; */ /*  */    /*} */ /*  */    /* Initialize phones info (ciphones + triphones) */    m->n_phone = n_ci + n_tri;    m->phone = (phone_t *) ckd_calloc (m->n_phone, sizeof(phone_t)); /* freed in mdef_free */    /* Allocate space for state->senone map for each phone */    /* Fast decoder-specific */    senmap = (s3senid_t **) ckd_calloc_2d (m->n_phone, m->n_emit_state, sizeof(s3senid_t));/* freed in mdef_free */    m->sseq = senmap;	/* TEMPORARY; until it is compressed into just the unique ones */    /* Flat decoder-specific */    /* Allocate space for state->senone map for each phone */    /* ARCHAN 20040820, this sacrifice readability and may cause pointer       problems in future. However, this is a less evil than       duplication of code.  This is trick point all the state mapping       to the global mapping and avoid duplicated memory.      */    /* S3 xwdpid_compress will compress the below list phone list.      */        /* ARCHAN, this part should not be used when one of the recognizer is used. */     tempsenmap = (s3senid_t *) ckd_calloc (m->n_phone * m->n_emit_state, sizeof(s3senid_t));    for (p = 0; p < m->n_phone; p++)        m->phone[p].state = tempsenmap + (p * m->n_emit_state);            /* Allocate initial space for <ci,lc,rc,wpos> -> pid mapping */    m->wpos_ci_lclist = (ph_lc_t ***) ckd_calloc_2d (N_WORD_POSN, m->n_ciphone, sizeof(ph_lc_t *)); /* freed in mdef_free */    /*     * Read base phones and triphones.  They'll simply be assigned a running sequence     * number as their "phone-id".  If the phone-id < n_ci, it's a ciphone.     */    /* Read base phones */    for (p = 0; p < n_ci; p++) {        if (noncomment_line(buf, sizeof(buf), fp) < 0)	    E_FATAL("Premature EOF reading CIphone %d\n", p);        parse_base_line (m, buf, p);    }    m->sil = mdef_ciphone_id (m, S3_SILENCE_CIPHONE);        /* Read triphones, if any */    for (; p < m->n_phone; p++) {        if (noncomment_line(buf, sizeof(buf), fp) < 0)	    E_FATAL("Premature EOF reading phone %d\n", p);        parse_tri_line (m, buf, p);    }    if (noncomment_line(buf, sizeof(buf), fp) >= 0)	E_ERROR("Non-empty file beyond expected #phones (%d)\n", m->n_phone);    /* Build CD senones to CI senones map */    if (m->n_ciphone * m->n_emit_state != m->n_ci_sen)	E_FATAL("#CI-senones(%d) != #CI-phone(%d) x #emitting-states(%d)\n",		m->n_ci_sen, m->n_ciphone, m->n_emit_state);    m->cd2cisen = (s3senid_t *) ckd_calloc (m->n_sen, sizeof(s3senid_t)); /* freed in mdef_free */    m->sen2cimap = (s3cipid_t *) ckd_calloc (m->n_sen, sizeof(s3cipid_t)); /* freed in mdef_free */    for (s = 0; s < m->n_sen; s++)	m->sen2cimap[s] = BAD_S3CIPID;    for (s = 0; s < m->n_ci_sen; s++) {		/* CI senones */	m->cd2cisen[s] = (s3senid_t) s;	m->sen2cimap[s] = s / m->n_emit_state;    }    for (p = n_ci; p < m->n_phone; p++) {	/* CD senones */	for (s = 0; s < m->n_emit_state; s++) {	    cd = m->sseq[p][s];	    ci = m->sseq[(int)m->phone[p].ci][s];	    m->cd2cisen[cd] = (s3senid_t) ci;	    m->sen2cimap[cd] = m->phone[p].ci;	}    }        /*     * Count #senones (CI+CD) for each CI phone.     * HACK!!  For handling holes in senone-CIphone mappings.  Does not work if holes     * are present at the beginning or end of senones for a given CIphone.     */    cdsen_start = (int32 *) ckd_calloc (m->n_ciphone, sizeof(int32)); /* freed locally */    cdsen_end = (int32 *) ckd_calloc (m->n_ciphone, sizeof(int32)); /* freed locally */    for (s = m->n_ci_sen; s < m->n_sen; s++) {	if (NOT_S3CIPID(m->sen2cimap[s]))	    continue;		if (! cdsen_start[(int)m->sen2cimap[s]])	    cdsen_start[(int)m->sen2cimap[s]] = s;	cdsen_end[(int)m->sen2cimap[s]] = s;    }    /* Fill up holes */    for (s = m->n_ci_sen; s < m->n_sen; s++) {	if (IS_S3CIPID(m->sen2cimap[s]))	    continue;	/* Check if properly inside the observed ranges above */	for (p = 0; p < m->n_ciphone; p++) {	    if ((s > cdsen_start[p]) && (s < cdsen_end[p]))		break;	}	if (p >= m->n_ciphone)	    E_FATAL("Unreferenced senone %d; cannot determine parent CIphone\n", s);	m->sen2cimap[s] = p;    }    /* Build #CD-senones for each CIphone */    m->ciphone2n_cd_sen = (int32 *) ckd_calloc (m->n_ciphone, sizeof(int32));/* freed mdef_free */    n = 0;    for (p = 0; p < m->n_ciphone; p++) {	if (cdsen_start[p] > 0) {	    m->ciphone2n_cd_sen[p] = cdsen_end[p] - cdsen_start[p] + 1;	    n += m->ciphone2n_cd_sen[p];	}    }    n += m->n_ci_sen;    assert (n == m->n_sen);    ckd_free (cdsen_start);    ckd_free (cdsen_end);    sseq_compress (m);        E_INFO("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n",	   m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state,	   m->n_ci_sen, m->n_sen, m->n_sseq);        fclose (fp);        return m;}void mdef_sseq2sen_active (mdef_t *mdef, int32 *sseq, int32 *sen){    int32 ss, i;    s3senid_t *sp;        for (ss = 0; ss < mdef_n_sseq(mdef); ss++) {	if (sseq[ss]) {	    sp = mdef->sseq[ss];	    for (i = 0; i < mdef_n_emit_state(mdef); i++)		sen[sp[i]] = 1;	}    }}/* RAH 4.23.01, Need to step down the ->next list to see if there are   any more things to free *//* RAH 4.19.01, Attempt to free memory that was allocated within this module   I have not verified that all the memory has been freed. I've taken only a    reasonable effort for now.   RAH 4.24.01 - verified that all memory is released. */void mdef_free_recursive_lc (ph_lc_t *lc){  if (lc == NULL) return;  if (lc->rclist)     mdef_free_recursive_rc (lc->rclist);  if (lc->next)     mdef_free_recursive_lc (lc->next);  ckd_free ((void *) lc);}void mdef_free_recursive_rc (ph_rc_t *rc){  if (rc == NULL) return;    if (rc->next)       mdef_free_recursive_rc (rc->next);    ckd_free ((void *) rc);}/* RAH, Free memory that was allocated in mdef_init    Rational purify shows that no leaks exist */   void mdef_free (mdef_t *m){  int i,j;  if (m) {     if (m->ciphone2n_cd_sen)      ckd_free    ((void *)m->ciphone2n_cd_sen);    if (m->sen2cimap)      ckd_free    ((void *)m->sen2cimap);    if (m->cd2cisen)      ckd_free    ((void *)m->cd2cisen);    /* RAH, go down the ->next list and delete all the pieces */    for (i=0;i<N_WORD_POSN;i++)      for (j=0;j<m->n_ciphone;j++) 	if (m->wpos_ci_lclist[i][j]) {	  mdef_free_recursive_lc (m->wpos_ci_lclist[i][j]->next);	  mdef_free_recursive_rc (m->wpos_ci_lclist[i][j]->rclist);	}        for (i=0;i<N_WORD_POSN;i++)      for (j=0;j<m->n_ciphone;j++) 	if (m->wpos_ci_lclist[i][j])  	  ckd_free ((void *) m->wpos_ci_lclist[i][j]);        if (m->wpos_ci_lclist)      ckd_free_2d ((void *)m->wpos_ci_lclist);    if (m->sseq)       ckd_free_2d ((void *)m->sseq);    /* Free phone context */    if (m->phone)       ckd_free    ((void *)m->phone);        if (m->ciphone_ht)      hash_free (m->ciphone_ht);    for (i=0;i<m->n_ciphone;i++) {      if (m->ciphone[i].name) 	ckd_free    ((void *)m->ciphone[i].name);    }    if (m->ciphone)       ckd_free    ((void *)m->ciphone);        ckd_free    ((void *)m);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -