s3_allphone.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 844 行 · 第 1/2 页

C
844
字号
/* ==================================================================== * Copyright (c) 1995-2004 Carnegie Mellon University.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * This work was supported in part by funding from the Defense Advanced  * Research Projects Agency and the National Science Foundation of the  * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * allphone.c -- Allphone Viterbi decoding. *  * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1996 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** *  * HISTORY *  * 02-Jun-97	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added allphone lattice output. *  * 14-Oct-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Started. *//** \file s3_allphone.c    \brief Engine for s3 phoneme recognition */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <s3types.h>#include "mdef.h"#include "tmat.h"#include "logs3.h"#include "s3_allphone.h"/** * SOME ASSUMPTIONS *   - All phones (ciphones and triphones) have same HMM topology with n_state states. *   - Initial state = state 0; final state = state n_state-1. *   - Final state is a non-emitting state with no arcs out of it. *   - Some form of Bakis topology (ie, no cycles, except for self-transitions). *//** * Phone-HMM (PHMM) structure:  Models a single unique <senone-sequence, tmat> pair. * Can represent several different triphones, but all with the same parent basephone. * (NOTE: Word-position attribute of triphone is ignored.) */typedef struct phmm_s {    s3pid_t pid;	/** Phone id (temp. during init.) */    s3tmatid_t tmat;	/** Transition matrix id for this PHMM */    s3cipid_t ci;	/** Parent basephone for this PHMM */    s3frmid_t active;	/** Latest frame in which this PHMM is/was active */    uint32 *lc;		/** Set (bit-vector) of left context phones seen for this PHMM */    uint32 *rc;		/** Set (bit-vector) of right context phones seen for this PHMM */    s3senid_t *sen;	/** Senone-id sequence underlying this PHMM */    int32 *score;	/** Total path score during Viterbi decoding */    struct history_s **hist;	/** Viterbi history (for backtrace) */    int32 bestscore;	/** Best state score in any frame */    int32 inscore;	/** Incoming score from predecessor PHMMs */    struct history_s *inhist;	/** History corresponding to inscore */    struct phmm_s *next;	/** Next unique PHMM for same parent basephone */    struct plink_s *succlist;	/** List of predecessor PHMM nodes */} phmm_t;static phmm_t **ci_phmm;	/** PHMM lists (for each CI phone) *//** * List of links from a PHMM node to its successors; one link per successor. */typedef struct plink_s {    phmm_t *phmm;		/** Successor PHMM node */    struct plink_s *next;	/** Next link for parent PHMM node */} plink_t;/** * History (paths) information at any point in allphone Viterbi search. */typedef struct history_s {    phmm_t *phmm;	/** PHMM ending this path */    int32 score;	/** Path score for this path */    s3frmid_t ef;	/** End frame */    struct history_s *hist;	/** Previous history entry */    struct history_s *next;	/** Next in allocated list */} history_t;static history_t **frm_hist;	/** List of history nodes allocated in each frame */extern mdef_t *mdef;		/** Model definition */extern tmat_t *tmat;		/** Transition probability matrices */static int32 lrc_size = 0;static int32 curfrm;		/* Current frame */static int32 beam;static int32 *score_scale;	/* Score by which state scores scaled in each frame */static phseg_t *phseg;static int32 **tp;		/* Phone transition probabilities */static int32 n_histnode;	/* No. of history entries *//** * Find PHMM node with same senone sequence and tmat id as the given triphone. * Return ptr to PHMM node if found, NULL otherwise. */static phmm_t *phmm_lookup (s3pid_t pid){    phmm_t *p;    phone_t *old, *new;        new = &(mdef->phone[pid]);        for (p = ci_phmm[(unsigned)mdef->phone[pid].ci]; p; p = p->next) {	old = &(mdef->phone[p->pid]);	if (old->tmat == new->tmat) {	  if (old->ssid == new->ssid)		return p;	}    }    return NULL;}static void lrc_set (uint32 *vec, int32 ci){    int32 i, j;        assert (lrc_size > 0);        /* If lc or rc not specified, set all flags */    if (NOT_S3CIPID(ci)) {	for (i = 0; i < lrc_size; i++)	    vec[i] = (uint32) 0xffffffff;    } else {	i = (ci >> 5);	j = ci - (i << 5);	vec[i] |= (1 << j);    }}static int32 lrc_is_set (uint32 *vec, int32 ci){    int32 i, j;    i = (ci >> 5);    j = ci - (i << 5);    return (vec[i] & (1 << j));}static int32 phmm_link ( void ){    s3cipid_t ci, rc;    phmm_t *p, *p2;    int32 *rclist;    int32 i, n_link;    plink_t *l;        rclist = (int32 *) ckd_calloc (mdef->n_ciphone+1, sizeof(int32));        /* Create successor links between PHMM nodes */    n_link = 0;    for (ci = 0; ci < mdef->n_ciphone; ci++) {	for (p = ci_phmm[(unsigned)ci]; p; p = p->next) {	    /* Build rclist for p */	    i = 0;	    for (rc = 0; rc < mdef->n_ciphone; rc++) {		if (lrc_is_set (p->rc, rc))		    rclist[i++] = rc;	    }	    rclist[i] = BAD_S3CIPID;	    	    /* For each rc in rclist, transition to PHMMs for rc if left context = ci */	    for (i = 0; IS_S3CIPID(rclist[i]); i++) {		for (p2 = ci_phmm[rclist[i]]; p2; p2 = p2->next) {		    if (lrc_is_set (p2->lc, ci)) {			/* transition from p to p2 */			l = (plink_t *) listelem_alloc (sizeof(plink_t));			l->phmm = p2;			l->next = p->succlist;			p->succlist = l;						n_link++;		    }		}	    }	}    }    ckd_free (rclist);        return n_link;}static int32 phmm_build ( void ){    s3pid_t pid;    phmm_t *p, **pid2phmm;    s3cipid_t ci;    int32 n_phmm, n_link;    s3senid_t *sen;    int32 *score;    history_t **hist;    uint32 *lc, *rc;    int32 i, s;    s3cipid_t *filler;    E_INFO("Building PHMM net\n");        ci_phmm = (phmm_t **) ckd_calloc (mdef->n_ciphone, sizeof(phmm_t *));    pid2phmm = (phmm_t **) ckd_calloc (mdef->n_phone, sizeof(phmm_t *));    for (lrc_size = 32; lrc_size < mdef->n_ciphone; lrc_size += 32);    lrc_size >>= 5;    /* For each unique ciphone/triphone entry in mdef, create a PHMM node */    n_phmm = 0;    for (pid = 0; pid < mdef->n_phone; pid++) {	if ((p = phmm_lookup (pid)) == NULL) {	    /* No previous entry; create a new one */	    p = (phmm_t *) listelem_alloc (sizeof(phmm_t));	    p->pid = pid;	    p->tmat = mdef->phone[pid].tmat;	    p->ci = mdef->phone[pid].ci;	    p->succlist = NULL;	    	    p->next = ci_phmm[(unsigned)p->ci];	    ci_phmm[(unsigned)p->ci] = p;	    n_phmm++;	}	pid2phmm[pid] = p;    }        /* Fill out rest of each PHMM node */    sen = (s3senid_t *) ckd_calloc (n_phmm * mdef->n_emit_state, sizeof(s3senid_t));    score = (int32 *) ckd_calloc (n_phmm * (mdef->n_emit_state+1), sizeof(int32));    hist = (history_t **) ckd_calloc (n_phmm * (mdef->n_emit_state+1),				      sizeof(history_t *));    lc = (uint32 *) ckd_calloc (n_phmm * lrc_size * 2, sizeof(uint32));    rc = lc + (n_phmm * lrc_size);    for (ci = 0; ci < mdef->n_ciphone; ci++) {	for (p = ci_phmm[(unsigned)ci]; p; p = p->next) {	    p->sen = sen;	    for (s = 0; s < mdef->n_emit_state; s++)		p->sen[s] = mdef->sseq[mdef->phone[p->pid].ssid][s];	    sen += mdef->n_emit_state;	    p->score = score;	    score += (mdef->n_emit_state + 1);	    p->hist = hist;	    hist += (mdef->n_emit_state + 1);	    p->lc = lc;	    lc += lrc_size;	    	    p->rc = rc;	    rc += lrc_size;	}    }    /* Fill out lc and rc bitmaps (remember to map all fillers to each other!!) */    filler = (s3cipid_t *) ckd_calloc (mdef->n_ciphone + 1, sizeof(s3cipid_t));    i = 0;    for (ci = 0; ci < mdef->n_ciphone; ci++) {	if (mdef->ciphone[(unsigned)ci].filler)	    filler[i++] = ci;    }    filler[i] = BAD_S3CIPID;    for (pid = 0; pid < mdef->n_phone; pid++) {	p = pid2phmm[pid];	if (IS_S3CIPID(mdef->phone[pid].lc) && mdef->ciphone[(unsigned)mdef->phone[pid].lc].filler) {	    for (i = 0; IS_S3CIPID(filler[i]); i++)		lrc_set (p->lc, filler[i]);	} else	    lrc_set (p->lc, mdef->phone[pid].lc);		if (IS_S3CIPID(mdef->phone[pid].rc) && mdef->ciphone[(unsigned)mdef->phone[pid].rc].filler) {	    for (i = 0; IS_S3CIPID(filler[i]); i++)		lrc_set (p->rc, filler[i]);	} else	    lrc_set (p->rc, mdef->phone[pid].rc);    }    ckd_free (pid2phmm);    ckd_free (filler);        /* Create links between PHMM nodes */    n_link = phmm_link ();        E_INFO ("%d nodes, %d links\n", n_phmm, n_link);        return 0;}#if 0static void phmm_dump ( void ){    s3cipid_t ci, lc, rc;    phmm_t *p;    plink_t *l;        printf ("Nodes:\n");    for (ci = 0; ci < mdef->n_ciphone; ci++) {	for (p = ci_phmm[(unsigned)ci]; p; p = p->next) {	    printf ("%5d\t%s", p->pid, mdef_ciphone_str (mdef, p->ci));	    printf ("\tLC=");	    for (lc = 0; lc < mdef->n_ciphone; lc++)		if (lrc_is_set (p->lc, lc))		    printf (" %s", mdef_ciphone_str (mdef, lc));	    printf ("\tRC=");	    for (rc = 0; rc < mdef->n_ciphone; rc++)		if (lrc_is_set (p->rc, rc))		    printf (" %s", mdef_ciphone_str (mdef, rc));	    printf ("\n");	}    }    for (ci = 0; ci < mdef->n_ciphone; ci++) {	for (p = ci_phmm[(unsigned)ci]; p; p = p->next) {	    printf ("%5d -> ", p->pid);	    for (l = p->succlist; l; l = l->next)		printf (" %5d", l->phmm->pid);	    printf ("\n");	}    }}#endif/** * Check model tprob matrices that they conform to upper-diagonal assumption. */static void chk_tp_uppertri ( void ){    int32 i, n_state, from, to;        n_state = mdef->n_emit_state;        /* Check that each tmat is upper-triangular */    for (i = 0; i < tmat->n_tmat; i++) {	for (to = 0; to < n_state; to++)	    for (from = to+1; from < n_state; from++)		if (tmat->tp[i][from][to] > S3_LOGPROB_ZERO)		    E_FATAL("HMM transition matrix not upper triangular\n");    }}int32 allphone_start_utt (char *uttid){    s3cipid_t ci;    phmm_t *p;    int32 s;        for (ci = 0; ci < mdef->n_ciphone; ci++) {	for (p = ci_phmm[(unsigned)ci]; p; p = p->next) {	    p->active = -1;	    p->inscore = S3_LOGPROB_ZERO;	    p->bestscore = S3_LOGPROB_ZERO;	    for (s = 0; s <= mdef->n_emit_state; s++) {		p->score[s] = S3_LOGPROB_ZERO;		p->hist[s] = NULL;	    }	}    }    curfrm = 0;    /* Initialize start state of the SILENCE PHMM */    ci = mdef_ciphone_id (mdef, S3_SILENCE_CIPHONE);    if (NOT_S3CIPID(ci))	E_FATAL("Cannot find CI-phone %s\n", S3_SILENCE_CIPHONE);    for (p = ci_phmm[(unsigned)ci]; p && (p->pid != ci); p = p->next);    if (! p)	E_FATAL("Cannot find HMM for %s\n", S3_SILENCE_CIPHONE);    p->inscore = 0;    p->inhist = NULL;    p->active = curfrm;        n_histnode = 0;        return 0;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?