📄 allphone.c
字号:
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- *//* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * allphone.c -- All CI phone decoding. * * HISTORY * * 22-Nov-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Modified to use unified semi-continuous/continuous acoustic * model evaluation module (senscr). * * 06-Aug-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added phonetp (phone transition probs matrix) to search. * * 10-Sep-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Modified to let allphone_utt return phone segmentation result, * instead of simply printing it to the standard log file. * Added print_back_trace option. * * 08-Sep-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added check for presence of SILb/SILe in allphone_start_utt and * allphone_result. * * 01-Jan-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Created. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <math.h>#include <cmd_ln.h>#include <err.h>#include "s2types.h"#include "basic_types.h"#include "search_const.h"#include "linklist.h"#include "list.h"#include "hash_table.h"#include "phone.h"#include "log.h"#include "s2_semi_mgau.h"#include "senscr.h"#include "msd.h"#include "dict.h"#include "lmclass.h"#include "lm_3g.h"#include "kb.h"#include "fbs.h"#include "search.h"#include "ckd_alloc.h"static CHAN_T *ci_chan; /* hmm model instances for each CI phone */static int32 n_ciphone;static int32 *renorm_scr;static int32 allphone_bw; /* beam width */static int32 allphone_exitbw; /* phone exit beam width */static int32 allphone_pip; /* phone insertion penalty */typedef struct { int32 f; int32 p; int32 scr; int32 bp;} allphone_bp_t;static allphone_bp_t *allphone_bp;static int32 n_bp;#define ALLPHONE_BP_MAX 65536static search_hyp_t *allp_seghyp = NULL, *allp_seghyp_tail;extern int32 *senone_active;extern int32 n_senone_active;static voidallphone_start_utt(void){ int32 s, p; for (p = 0; p < n_ciphone; p++) ci_chan[p].active = -1;#if 0 /* note that SILb isn't in the current 4k models --kal */ p = phone_to_id("SILb", TRUE); if (p < 0) { p = phone_to_id("SIL", TRUE); if (p < 0) E_FATAL("SILb/SIL not found\n"); }#else p = phone_to_id("SIL", TRUE); if (p < 0) E_FATAL("SILb/SIL not found\n");#endif ci_chan[p].score[0] = 0; for (s = 1; s < HMM_LAST_STATE; s++) ci_chan[p].score[s] = WORST_SCORE; ci_chan[p].path[0] = -1; ci_chan[p].active = 0; n_bp = 0;}static voidallphone_senone_active(void){ int32 p, s, n; n = 0; for (p = 0; p < n_ciphone; p++) { for (s = 0; s < NODE_CNT-1; ++s) { senone_active[n++] = bin_mdef_sseq2sen(mdef, ci_chan[p].sseqid, s); } } n_senone_active = n;}static int32allphone_eval_ci_chan(int32 f){ int32 p, bestscr; bestscr = WORST_SCORE; for (p = 0; p < n_ciphone; p++) { if (ci_chan[p].active != f) continue; chan_v_eval(ci_chan + p); if (bestscr < ci_chan[p].bestscore) bestscr = ci_chan[p].bestscore; } return (bestscr);}static voidallphone_bp_entry(int32 f, int32 p){ if (n_bp == ALLPHONE_BP_MAX - 2) E_ERROR("BP table full\n"); if (n_bp >= ALLPHONE_BP_MAX) return; allphone_bp[n_bp].f = f; allphone_bp[n_bp].p = p; allphone_bp[n_bp].scr = ci_chan[p].score[HMM_LAST_STATE]; allphone_bp[n_bp].bp = ci_chan[p].path[HMM_LAST_STATE]; n_bp++;}static voidallphone_chan_prune(int32 f, int32 bestscr){ int32 p, thresh, exit_thresh; thresh = bestscr + allphone_bw; exit_thresh = bestscr + allphone_exitbw; for (p = 0; p < n_ciphone; p++) { if (ci_chan[p].active != f) continue; if (ci_chan[p].bestscore > thresh) { ci_chan[p].active = f + 1; if (ci_chan[p].score[HMM_LAST_STATE] > exit_thresh) allphone_bp_entry(f, p); } }}static voidallphone_chan_trans(int32 f, int32 bp){ int32 p, scr, s, predp; predp = allphone_bp[bp].p; for (p = 0; p < n_ciphone; p++) { scr = allphone_bp[bp].scr + phonetp[predp][p]; if ((ci_chan[p].active < f) || (ci_chan[p].score[0] < scr)) { ci_chan[p].score[0] = scr; if (ci_chan[p].active < f) for (s = 1; s < HMM_LAST_STATE; s++) ci_chan[p].score[s] = WORST_SCORE; ci_chan[p].path[0] = bp; ci_chan[p].active = f + 1; } }}static voidallphone_renorm(int32 f, int32 bestscr){ int32 p, s; for (p = 0; p < n_ciphone; p++) { if (ci_chan[p].active == f) { for (s = 0; s < HMM_LAST_STATE; s++) if (ci_chan[p].score[s] > WORST_SCORE) ci_chan[p].score[s] -= bestscr; } } renorm_scr[f] = bestscr;}/* Special version which frees the phone strings too. */static voidallphone_utt_seghyp_free(search_hyp_t * h){ search_hyp_t *tmp; while (h) { tmp = h->next; free((void *) h->word); listelem_free(h, sizeof(search_hyp_t)); h = tmp; }}static voidallphone_backtrace(int32 bp){ int32 bf, f, nf, bscr, escr; search_hyp_t *h; if (bp < 0) return; allphone_backtrace(allphone_bp[bp].bp); if (allphone_bp[bp].bp < 0) { bf = 0; bscr = 0; } else { bf = allphone_bp[allphone_bp[bp].bp].f + 1; bscr = allphone_bp[allphone_bp[bp].bp].scr; } for (escr = allphone_bp[bp].scr, f = bf; f <= allphone_bp[bp].f; f++) escr += renorm_scr[f]; nf = allphone_bp[bp].f - bf + 1; h = (search_hyp_t *) listelem_alloc(sizeof(search_hyp_t)); h->wid = allphone_bp[bp].p; /* The result of phone_from_id is ephemeral in PocketSphinx. So * makesure to use allphone_utt_seghyp_free(). */ h->word = ckd_salloc(phone_from_id(allphone_bp[bp].p)); h->sf = bf; h->ef = allphone_bp[bp].f; h->next = NULL; if (allp_seghyp_tail) allp_seghyp_tail->next = h; else allp_seghyp = h; allp_seghyp_tail = h; if (cmd_ln_boolean("-backtrace")) { printf("\t%5d %5d %10d %11d %s\n", allp_seghyp_tail->sf, allp_seghyp_tail->ef, (escr - bscr) / nf, escr - bscr, allp_seghyp_tail->word); }}static voidallphone_result(void){ int32 i, b, f, sile, bestbp, scr; if (n_bp <= 0) { printf("NO ALIGNMENT\n"); return; } f = allphone_bp[n_bp - 1].f; for (b = n_bp - 2; (b >= 0) && (allphone_bp[b].f == f); --b); b++; sile = phone_to_id("SILe", TRUE); if (sile < 0) sile = phone_to_id("SIL", TRUE); /* No need to check if SIL exists; already checked in allphone_start_utt */ for (i = b; (i < n_bp) && (allphone_bp[i].p != sile); i++); if (i >= n_bp) { printf("UTTERANCE DID NOT END IN SILe\n"); bestbp = b; for (i = b + 1; i < n_bp; i++) if (allphone_bp[i].scr > allphone_bp[bestbp].scr) bestbp = i; } else bestbp = i; if (cmd_ln_boolean("-backtrace")) { printf("\t%5s %5s %10s %11s %s (Allphone) (%s)\n", "SFrm", "EFrm", "AScr/Frm", "AScr", "Phone", uttproc_get_uttid()); printf ("\t------------------------------------------------------------\n"); } allphone_backtrace(bestbp); if (cmd_ln_boolean("-backtrace") && (bestbp >= 0)) { assert(allphone_bp[bestbp].f >= 0); scr = allphone_bp[bestbp].scr; for (f = 0; f <= allphone_bp[bestbp].f; f++) scr += renorm_scr[f]; printf ("\t------------------------------------------------------------\n"); f = allphone_bp[bestbp].f; if (f >= 0) { printf("\t%5d %5d %10d %11d %s(TOTAL)\n", 0, f, scr / (f + 1), scr, uttproc_get_uttid()); } }}search_hyp_t *allphone_utt(int32 nfr, mfcc_t ***feat_buf){ int32 i, f; int32 bestscr; int32 lastbp, bestbp; if (allp_seghyp) allphone_utt_seghyp_free(allp_seghyp); allp_seghyp = NULL; allp_seghyp_tail = NULL; allphone_senone_active(); allphone_start_utt(); renorm_scr[0] = 0; for (f = 0; f < nfr; ++f) { senscr_active(feat_buf[f], f); if ((bestscr = allphone_eval_ci_chan(f)) <= WORST_SCORE) { E_ERROR("POOR MATCH: bestscore= %d\n", bestscr); break; } lastbp = n_bp; allphone_chan_prune(f, bestscr); if (lastbp < n_bp) { bestbp = lastbp; for (i = lastbp + 1; i < n_bp; i++) { if (allphone_bp[i].scr > allphone_bp[bestbp].scr) bestbp = i; } allphone_chan_trans(f, bestbp); } allphone_renorm(f + 1, bestscr); } allphone_result(); return allp_seghyp;}voidallphone_init(){ float64 bw, exitbw; float32 pip; int32 i; bw = cmd_ln_float64("-beam"); exitbw = cmd_ln_float64("-wbeam"); pip = cmd_ln_float32("-pip"); n_ciphone = phoneCiCount(); ci_chan = ckd_calloc(n_ciphone, sizeof(CHAN_T)); for (i = 0; i < n_ciphone; i++) { ci_chan[i].sseqid = bin_mdef_pid2ssid(mdef, i); ci_chan[i].ciphone = i; } renorm_scr = ckd_calloc(MAX_FRAMES, sizeof(int32)); allphone_bp = ckd_calloc(ALLPHONE_BP_MAX, sizeof(allphone_bp_t)); allphone_bw = LOG(bw); allphone_exitbw = LOG(exitbw); allphone_pip = LOG(pip); E_INFO("bw= %d, wordbw= %d, pip= %d\n", allphone_bw, allphone_exitbw, allphone_pip);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -