📄 s2_semi_mgau.c
字号:
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- *//* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * * HISTORY * * $Log$ * Revision 1.2 2006/04/06 14:03:02 dhdfu * Prevent confusion among future generations by calling this s2_semi_mgau instead of sc_vq * * Revision 1.1 2006/04/05 20:14:26 dhdfu * Add cut-down support for Sphinx-2 fast GMM computation (from * PocketSphinx). This does *not* support Sphinx2 format models, but * rather semi-continuous Sphinx3 models. I'll try to write a model * converter at some point soon. * * Unfortunately the smallest models I have for testing don't do so well * on the AN4 test sentence (should use AN4 models, maybe...) so it comes * with a "don't panic" warning. * * Revision 1.4 2006/04/04 15:31:31 dhuggins * Remove redundant acoustic score scaling in senone computation. * * Revision 1.3 2006/04/04 15:24:29 dhuggins * Get the meaning of LOG_BASE right (oops!). Seems to work fine now, at * least at logbase=1.0001. * * Revision 1.2 2006/04/04 14:54:40 dhuggins * Add support for s2_semi_mgau - it doesn't crash, but it doesn't work either :) * * Revision 1.1 2006/04/04 04:25:17 dhuggins * Add a cut-down version of sphinx2 fast GMM computation (SCVQ) from * PocketSphinx. Not enabled or tested yet. Doesn't support Sphinx2 * models (write an external conversion tool instead, please). Hopefully * this will put an end to me complaining about Sphinx3 being too slow :-) * * Revision 1.12 2004/12/10 16:48:56 rkm * Added continuous density acoustic model handling * * * 22-Nov-2004 M K Ravishankar (rkm@cs) at Carnegie-Mellon University * Moved best senone score and best senone within phone * computation out of here and into senscr module, for * integrating continuous models into sphinx2. * * 19-Nov-97 M K Ravishankar (rkm@cs) at Carnegie-Mellon University * Added ability to read power variance file if it exists. * * 19-Jun-95 M K Ravishankar (rkm@cs) at Carnegie-Mellon University * Added scvq_set_psen() and scvq_set_bestpscr(). Modified SCVQScores_all to * also compute best senone score/phone. * * 19-May-95 M K Ravishankar (rkm@cs) at Carnegie-Mellon University * Added check for bad VQ scores in SCVQScores and SCVQScores_all. * * 01-Jul-94 M K Ravishankar (rkm@cs) at Carnegie-Mellon University * In SCVQScores, returned result from SCVQComputeScores_opt(). * * 01-Nov-93 M K Ravishankar (rkm@cs) at Carnegie-Mellon University * Added compressed, 16-bit senone probs option. * * 6-Apr-92 Fil Alleva (faa) at Carnegie-Mellon University * - added SCVQAgcSet() and agcType. * * 08-Oct-91 Eric Thayer (eht) at Carnegie-Mellon University * Created from system by Xuedong Huang * 22-Oct-91 Eric Thayer (eht) at Carnegie-Mellon University * Installed some efficiency improvements to acoustic scoring *//* System headers */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <limits.h>#include <math.h>/* SphinxBase headers */#include <sphinx_config.h>#include <cmd_ln.h>#include <fixpoint.h>#include <ckd_alloc.h>#include <bio.h>#include <err.h>/* Local headers */#include "s2types.h"#include "sphinx_types.h"#include "log.h"#include "s2_semi_mgau.h"#include "kdtree.h"#include "kb.h"#include "s2io.h"#include "senscr.h"#include "posixwin32.h"#define MGAU_MIXW_VERSION "1.0" /* Sphinx-3 file format version for mixw */#define MGAU_PARAM_VERSION "1.0" /* Sphinx-3 file format version for mean/var */#define NONE -1#define WORST_DIST (int32)(0x80000000)/* * In terms of already shifted and negated quantities (i.e. dealing with * 8-bit quantized values): */#define LOG_ADD(p1,p2) (logadd_tbl[(p1<<8)+(p2)])/** Subtract GMM component b (assumed to be positive) and saturate */#define GMMSUB(a,b) \ (((a)-(b) > a) ? (INT_MIN) : ((a)-(b)))/** Add GMM component b (assumed to be positive) and saturate */#define GMMADD(a,b) \ (((a)+(b) < a) ? (INT_MAX) : ((a)+(b)))extern const unsigned char logadd_tbl[];#ifndef MIN#define MIN(a,b) ((a) < (b) ? (a) : (b))#endif/* * Compute senone scores. */static int32 SCVQComputeScores(s2_semi_mgau_t * s, int32 compallsen);/* * Optimization for various topN cases, PDF-size(#bits) cases of * SCVQComputeScores() and SCVQComputeScores_all(). */static int32 get_scores4_8b(s2_semi_mgau_t * s);static int32 get_scores2_8b(s2_semi_mgau_t * s);static int32 get_scores1_8b(s2_semi_mgau_t * s);static int32 get_scores_8b(s2_semi_mgau_t * s);static int32 get_scores4_8b_all(s2_semi_mgau_t * s);static int32 get_scores2_8b_all(s2_semi_mgau_t * s);static int32 get_scores1_8b_all(s2_semi_mgau_t * s);static int32 get_scores_8b_all(s2_semi_mgau_t * s);static voideval_topn(s2_semi_mgau_t *s, int32 feat, mfcc_t *z){ int32 i, ceplen; vqFeature_t *topn; topn = s->f[feat]; ceplen = s->veclen[feat]; for (i = 0; i < s->topN; i++) { mean_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */ vqFeature_t vtmp; var_t *var, d; mfcc_t *obs; int32 cw, j; cw = topn[i].codeword; mean = s->means[feat] + cw * ceplen; var = s->vars[feat] + cw * ceplen; d = s->dets[feat][cw]; obs = z; for (j = 0; j < ceplen; j++) { diff = *obs++ - *mean++; sqdiff = MFCCMUL(diff, diff); compl = MFCCMUL(sqdiff, *var); d = GMMSUB(d, compl); ++var; } topn[i].val.dist = (int32) d; if (i == 0) continue; vtmp = topn[i]; for (j = i - 1; j >= 0 && (int32) d > topn[j].val.dist; j--) { topn[j + 1] = topn[j]; } topn[j + 1] = vtmp; }}static voideval_cb_kdtree(s2_semi_mgau_t *s, int32 feat, mfcc_t *z, kd_tree_node_t *node, uint32 maxbbi){ vqFeature_t *worst, *best, *topn; int32 i, ceplen; best = topn = s->f[feat]; worst = topn + (s->topN - 1); ceplen = s->veclen[feat]; for (i = 0; i < maxbbi; ++i) { mean_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */ var_t *var, d; mfcc_t *obs; vqFeature_t *cur; int32 cw, j, k; cw = node->bbi[i]; mean = s->means[feat] + cw * ceplen; var = s->vars[feat] + cw * ceplen; d = s->dets[feat][cw]; obs = z; for (j = 0; (j < ceplen) && (d >= worst->val.dist); j++) { diff = *obs++ - *mean++; sqdiff = MFCCMUL(diff, diff); compl = MFCCMUL(sqdiff, *var); d = GMMSUB(d, compl); ++var; } if (j < ceplen) continue; if (d < worst->val.dist) continue; for (k = 0; k < s->topN; k++) { /* already there, so don't need to insert */ if (topn[k].codeword == cw) break; } if (k < s->topN) continue; /* already there. Don't insert */ /* remaining code inserts codeword and dist in correct spot */ for (cur = worst - 1; cur >= best && d >= cur->val.dist; --cur) memcpy(cur + 1, cur, sizeof(vqFeature_t)); ++cur; cur->codeword = cw; cur->val.dist = (int32) d; }}static voideval_cb(s2_semi_mgau_t *s, int32 feat, mfcc_t *z){ vqFeature_t *worst, *best, *topn; mean_t *mean; var_t *var; int32 *det, *detP, *detE; int32 i, ceplen; best = topn = s->f[feat]; worst = topn + (s->topN - 1); mean = s->means[feat]; var = s->vars[feat]; det = s->dets[feat]; detE = det + s->n_density; ceplen = s->veclen[feat]; for (detP = det; detP < detE; ++detP) { mean_t diff, sqdiff, compl; /* diff, diff^2, component likelihood */ var_t d; mfcc_t *obs; vqFeature_t *cur; int32 cw, j; d = *detP; obs = z; cw = detP - det; for (j = 0; (j < ceplen) && (d >= worst->val.dist); ++j) { diff = *obs++ - *mean++; sqdiff = MFCCMUL(diff, diff); compl = MFCCMUL(sqdiff, *var); d = GMMSUB(d, compl); ++var; } if (j < ceplen) { /* terminated early, so not in topn */ mean += (ceplen - j); var += (ceplen - j); continue; } if (d < worst->val.dist) continue; for (i = 0; i < s->topN; i++) { /* already there, so don't need to insert */ if (topn[i].codeword == cw) break; } if (i < s->topN) continue; /* already there. Don't insert */ /* remaining code inserts codeword and dist in correct spot */ for (cur = worst - 1; cur >= best && d >= cur->val.dist; --cur) memcpy(cur + 1, cur, sizeof(vqFeature_t)); ++cur; cur->codeword = cw; cur->val.dist = (int32) d; }}static voidmgau_dist(s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z){ /* Initialize topn codewords to topn codewords from previous * frame, and calculate their densities. */ memcpy(s->f[feat], s->lastf[feat], sizeof(vqFeature_t) * s->topN); eval_topn(s, feat, z); /* If this frame is skipped, do nothing else. */ if (frame % s->ds_ratio) return; /* Evaluate the rest of the codebook (or subset thereof). */ if (s->kdtrees) { kd_tree_node_t *node; uint32 maxbbi; node = eval_kd_tree(s->kdtrees[feat], z, s->kd_maxdepth); maxbbi = s->kd_maxbbi == -1 ? node->n_bbi : MIN(node->n_bbi, s-> kd_maxbbi); eval_cb_kdtree(s, feat, z, node, maxbbi); } else { eval_cb(s, feat, z); } /* Make a copy of current topn. */ memcpy(s->lastf[feat], s->f[feat], sizeof(vqFeature_t) * s->topN);}/* * Compute senone scores for the active senones. */int32s2_semi_mgau_frame_eval(s2_semi_mgau_t * s, mfcc_t ** featbuf, int32 frame, int32 compallsen){ int i, j; for (i = 0; i < s->n_feat; ++i) mgau_dist(s, frame, i, featbuf[i]); /* normalize the topN feature scores */ for (j = 0; j < s->n_feat; j++) { s->score_tmp[j] = s->f[j][0].val.score; } for (i = 1; i < s->topN; i++) for (j = 0; j < s->n_feat; j++) { s->score_tmp[j] = ADD(s->score_tmp[j], s->f[j][i].val.score); } for (i = 0; i < s->topN; i++) for (j = 0; j < s->n_feat; j++) { s->f[j][i].val.score -= s->score_tmp[j]; if (s->f[j][i].val.score > 0) s->f[j][i].val.score = INT_MIN; /* tkharris++ */ /* E_FATAL("**ERROR** VQ score= %d\n", f[j][i].val.score); */ } return SCVQComputeScores(s, compallsen);}static int32SCVQComputeScores(s2_semi_mgau_t * s, int32 compallsen){ if (compallsen) { switch (s->topN) { case 4: return get_scores4_8b_all(s); break; case 2: return get_scores2_8b_all(s); break; case 1: return get_scores1_8b_all(s); break; default: return get_scores_8b_all(s); break; } } else { switch (s->topN) { case 4: return get_scores4_8b(s); break; case 2: return get_scores2_8b(s); break; case 1: return get_scores1_8b(s); break; default: return get_scores_8b(s); break; } }}static int32get_scores_8b(s2_semi_mgau_t * s){ E_FATAL("get_scores_8b() not implemented\n"); return 0;}static int32get_scores_8b_all(s2_semi_mgau_t * s){ E_FATAL("get_scores_8b_all() not implemented\n"); return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -