s3_allphone.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 844 行 · 第 1/2 页
C
844 行
/* ==================================================================== * Copyright (c) 1995-2004 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * allphone.c -- Allphone Viterbi decoding. * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1996 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** * * HISTORY * * 02-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added allphone lattice output. * * 14-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Started. *//** \file s3_allphone.c \brief Engine for s3 phoneme recognition */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <s3types.h>#include "mdef.h"#include "tmat.h"#include "logs3.h"#include "s3_allphone.h"/** * SOME ASSUMPTIONS * - All phones (ciphones and triphones) have same HMM topology with n_state states. * - Initial state = state 0; final state = state n_state-1. * - Final state is a non-emitting state with no arcs out of it. * - Some form of Bakis topology (ie, no cycles, except for self-transitions). *//** * Phone-HMM (PHMM) structure: Models a single unique <senone-sequence, tmat> pair. * Can represent several different triphones, but all with the same parent basephone. * (NOTE: Word-position attribute of triphone is ignored.) */typedef struct phmm_s { s3pid_t pid; /** Phone id (temp. during init.) */ s3tmatid_t tmat; /** Transition matrix id for this PHMM */ s3cipid_t ci; /** Parent basephone for this PHMM */ s3frmid_t active; /** Latest frame in which this PHMM is/was active */ uint32 *lc; /** Set (bit-vector) of left context phones seen for this PHMM */ uint32 *rc; /** Set (bit-vector) of right context phones seen for this PHMM */ s3senid_t *sen; /** Senone-id sequence underlying this PHMM */ int32 *score; /** Total path score during Viterbi decoding */ struct history_s **hist; /** Viterbi history (for backtrace) */ int32 bestscore; /** Best state score in any frame */ int32 inscore; /** Incoming score from predecessor PHMMs */ struct history_s *inhist; /** History corresponding to inscore */ struct phmm_s *next; /** Next unique PHMM for same parent basephone */ struct plink_s *succlist; /** List of predecessor PHMM nodes */} phmm_t;static phmm_t **ci_phmm; /** PHMM lists (for each CI phone) *//** * List of links from a PHMM node to its successors; one link per successor. */typedef struct plink_s { phmm_t *phmm; /** Successor PHMM node */ struct plink_s *next; /** Next link for parent PHMM node */} plink_t;/** * History (paths) information at any point in allphone Viterbi search. */typedef struct history_s { phmm_t *phmm; /** PHMM ending this path */ int32 score; /** Path score for this path */ s3frmid_t ef; /** End frame */ struct history_s *hist; /** Previous history entry */ struct history_s *next; /** Next in allocated list */} history_t;static history_t **frm_hist; /** List of history nodes allocated in each frame */extern mdef_t *mdef; /** Model definition */extern tmat_t *tmat; /** Transition probability matrices */static int32 lrc_size = 0;static int32 curfrm; /* Current frame */static int32 beam;static int32 *score_scale; /* Score by which state scores scaled in each frame */static phseg_t *phseg;static int32 **tp; /* Phone transition probabilities */static int32 n_histnode; /* No. of history entries *//** * Find PHMM node with same senone sequence and tmat id as the given triphone. * Return ptr to PHMM node if found, NULL otherwise. */static phmm_t *phmm_lookup (s3pid_t pid){ phmm_t *p; phone_t *old, *new; new = &(mdef->phone[pid]); for (p = ci_phmm[(unsigned)mdef->phone[pid].ci]; p; p = p->next) { old = &(mdef->phone[p->pid]); if (old->tmat == new->tmat) { if (old->ssid == new->ssid) return p; } } return NULL;}static void lrc_set (uint32 *vec, int32 ci){ int32 i, j; assert (lrc_size > 0); /* If lc or rc not specified, set all flags */ if (NOT_S3CIPID(ci)) { for (i = 0; i < lrc_size; i++) vec[i] = (uint32) 0xffffffff; } else { i = (ci >> 5); j = ci - (i << 5); vec[i] |= (1 << j); }}static int32 lrc_is_set (uint32 *vec, int32 ci){ int32 i, j; i = (ci >> 5); j = ci - (i << 5); return (vec[i] & (1 << j));}static int32 phmm_link ( void ){ s3cipid_t ci, rc; phmm_t *p, *p2; int32 *rclist; int32 i, n_link; plink_t *l; rclist = (int32 *) ckd_calloc (mdef->n_ciphone+1, sizeof(int32)); /* Create successor links between PHMM nodes */ n_link = 0; for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = ci_phmm[(unsigned)ci]; p; p = p->next) { /* Build rclist for p */ i = 0; for (rc = 0; rc < mdef->n_ciphone; rc++) { if (lrc_is_set (p->rc, rc)) rclist[i++] = rc; } rclist[i] = BAD_S3CIPID; /* For each rc in rclist, transition to PHMMs for rc if left context = ci */ for (i = 0; IS_S3CIPID(rclist[i]); i++) { for (p2 = ci_phmm[rclist[i]]; p2; p2 = p2->next) { if (lrc_is_set (p2->lc, ci)) { /* transition from p to p2 */ l = (plink_t *) listelem_alloc (sizeof(plink_t)); l->phmm = p2; l->next = p->succlist; p->succlist = l; n_link++; } } } } } ckd_free (rclist); return n_link;}static int32 phmm_build ( void ){ s3pid_t pid; phmm_t *p, **pid2phmm; s3cipid_t ci; int32 n_phmm, n_link; s3senid_t *sen; int32 *score; history_t **hist; uint32 *lc, *rc; int32 i, s; s3cipid_t *filler; E_INFO("Building PHMM net\n"); ci_phmm = (phmm_t **) ckd_calloc (mdef->n_ciphone, sizeof(phmm_t *)); pid2phmm = (phmm_t **) ckd_calloc (mdef->n_phone, sizeof(phmm_t *)); for (lrc_size = 32; lrc_size < mdef->n_ciphone; lrc_size += 32); lrc_size >>= 5; /* For each unique ciphone/triphone entry in mdef, create a PHMM node */ n_phmm = 0; for (pid = 0; pid < mdef->n_phone; pid++) { if ((p = phmm_lookup (pid)) == NULL) { /* No previous entry; create a new one */ p = (phmm_t *) listelem_alloc (sizeof(phmm_t)); p->pid = pid; p->tmat = mdef->phone[pid].tmat; p->ci = mdef->phone[pid].ci; p->succlist = NULL; p->next = ci_phmm[(unsigned)p->ci]; ci_phmm[(unsigned)p->ci] = p; n_phmm++; } pid2phmm[pid] = p; } /* Fill out rest of each PHMM node */ sen = (s3senid_t *) ckd_calloc (n_phmm * mdef->n_emit_state, sizeof(s3senid_t)); score = (int32 *) ckd_calloc (n_phmm * (mdef->n_emit_state+1), sizeof(int32)); hist = (history_t **) ckd_calloc (n_phmm * (mdef->n_emit_state+1), sizeof(history_t *)); lc = (uint32 *) ckd_calloc (n_phmm * lrc_size * 2, sizeof(uint32)); rc = lc + (n_phmm * lrc_size); for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = ci_phmm[(unsigned)ci]; p; p = p->next) { p->sen = sen; for (s = 0; s < mdef->n_emit_state; s++) p->sen[s] = mdef->sseq[mdef->phone[p->pid].ssid][s]; sen += mdef->n_emit_state; p->score = score; score += (mdef->n_emit_state + 1); p->hist = hist; hist += (mdef->n_emit_state + 1); p->lc = lc; lc += lrc_size; p->rc = rc; rc += lrc_size; } } /* Fill out lc and rc bitmaps (remember to map all fillers to each other!!) */ filler = (s3cipid_t *) ckd_calloc (mdef->n_ciphone + 1, sizeof(s3cipid_t)); i = 0; for (ci = 0; ci < mdef->n_ciphone; ci++) { if (mdef->ciphone[(unsigned)ci].filler) filler[i++] = ci; } filler[i] = BAD_S3CIPID; for (pid = 0; pid < mdef->n_phone; pid++) { p = pid2phmm[pid]; if (IS_S3CIPID(mdef->phone[pid].lc) && mdef->ciphone[(unsigned)mdef->phone[pid].lc].filler) { for (i = 0; IS_S3CIPID(filler[i]); i++) lrc_set (p->lc, filler[i]); } else lrc_set (p->lc, mdef->phone[pid].lc); if (IS_S3CIPID(mdef->phone[pid].rc) && mdef->ciphone[(unsigned)mdef->phone[pid].rc].filler) { for (i = 0; IS_S3CIPID(filler[i]); i++) lrc_set (p->rc, filler[i]); } else lrc_set (p->rc, mdef->phone[pid].rc); } ckd_free (pid2phmm); ckd_free (filler); /* Create links between PHMM nodes */ n_link = phmm_link (); E_INFO ("%d nodes, %d links\n", n_phmm, n_link); return 0;}#if 0static void phmm_dump ( void ){ s3cipid_t ci, lc, rc; phmm_t *p; plink_t *l; printf ("Nodes:\n"); for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = ci_phmm[(unsigned)ci]; p; p = p->next) { printf ("%5d\t%s", p->pid, mdef_ciphone_str (mdef, p->ci)); printf ("\tLC="); for (lc = 0; lc < mdef->n_ciphone; lc++) if (lrc_is_set (p->lc, lc)) printf (" %s", mdef_ciphone_str (mdef, lc)); printf ("\tRC="); for (rc = 0; rc < mdef->n_ciphone; rc++) if (lrc_is_set (p->rc, rc)) printf (" %s", mdef_ciphone_str (mdef, rc)); printf ("\n"); } } for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = ci_phmm[(unsigned)ci]; p; p = p->next) { printf ("%5d -> ", p->pid); for (l = p->succlist; l; l = l->next) printf (" %5d", l->phmm->pid); printf ("\n"); } }}#endif/** * Check model tprob matrices that they conform to upper-diagonal assumption. */static void chk_tp_uppertri ( void ){ int32 i, n_state, from, to; n_state = mdef->n_emit_state; /* Check that each tmat is upper-triangular */ for (i = 0; i < tmat->n_tmat; i++) { for (to = 0; to < n_state; to++) for (from = to+1; from < n_state; from++) if (tmat->tp[i][from][to] > S3_LOGPROB_ZERO) E_FATAL("HMM transition matrix not upper triangular\n"); }}int32 allphone_start_utt (char *uttid){ s3cipid_t ci; phmm_t *p; int32 s; for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = ci_phmm[(unsigned)ci]; p; p = p->next) { p->active = -1; p->inscore = S3_LOGPROB_ZERO; p->bestscore = S3_LOGPROB_ZERO; for (s = 0; s <= mdef->n_emit_state; s++) { p->score[s] = S3_LOGPROB_ZERO; p->hist[s] = NULL; } } } curfrm = 0; /* Initialize start state of the SILENCE PHMM */ ci = mdef_ciphone_id (mdef, S3_SILENCE_CIPHONE); if (NOT_S3CIPID(ci)) E_FATAL("Cannot find CI-phone %s\n", S3_SILENCE_CIPHONE); for (p = ci_phmm[(unsigned)ci]; p && (p->pid != ci); p = p->next); if (! p) E_FATAL("Cannot find HMM for %s\n", S3_SILENCE_CIPHONE); p->inscore = 0; p->inhist = NULL; p->active = curfrm; n_histnode = 0; return 0;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?