📄 s2_semi_mgau.c

📁 WinCE平台上的语音识别程序
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- *//* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * This work was supported in part by funding from the Defense Advanced  * Research Projects Agency and the National Science Foundation of the  * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * * HISTORY *  * $Log$ * Revision 1.2  2006/04/06  14:03:02  dhdfu * Prevent confusion among future generations by calling this s2_semi_mgau instead of sc_vq *  * Revision 1.1  2006/04/05 20:14:26  dhdfu * Add cut-down support for Sphinx-2 fast GMM computation (from * PocketSphinx).  This does *not* support Sphinx2 format models, but * rather semi-continuous Sphinx3 models.  I'll try to write a model * converter at some point soon. * * Unfortunately the smallest models I have for testing don't do so well * on the AN4 test sentence (should use AN4 models, maybe...) so it comes * with a "don't panic" warning. * * Revision 1.4  2006/04/04 15:31:31  dhuggins * Remove redundant acoustic score scaling in senone computation. * * Revision 1.3  2006/04/04 15:24:29  dhuggins * Get the meaning of LOG_BASE right (oops!).  Seems to work fine now, at * least at logbase=1.0001. * * Revision 1.2  2006/04/04 14:54:40  dhuggins * Add support for s2_semi_mgau - it doesn't crash, but it doesn't work either :) * * Revision 1.1  2006/04/04 04:25:17  dhuggins * Add a cut-down version of sphinx2 fast GMM computation (SCVQ) from * PocketSphinx.  Not enabled or tested yet.  Doesn't support Sphinx2 * models (write an external conversion tool instead, please).  Hopefully * this will put an end to me complaining about Sphinx3 being too slow :-) * * Revision 1.12  2004/12/10 16:48:56  rkm * Added continuous density acoustic model handling * *  * 22-Nov-2004  M K Ravishankar (rkm@cs) at Carnegie-Mellon University * 		Moved best senone score and best senone within phone * 		computation out of here and into senscr module, for * 		integrating continuous  models into sphinx2. *  * 19-Nov-97  M K Ravishankar (rkm@cs) at Carnegie-Mellon University * 	Added ability to read power variance file if it exists. *  * 19-Jun-95  M K Ravishankar (rkm@cs) at Carnegie-Mellon University * 	Added scvq_set_psen() and scvq_set_bestpscr().  Modified SCVQScores_all to * 	also compute best senone score/phone. *  * 19-May-95  M K Ravishankar (rkm@cs) at Carnegie-Mellon University * 	Added check for bad VQ scores in SCVQScores and SCVQScores_all. *  * 01-Jul-94  M K Ravishankar (rkm@cs) at Carnegie-Mellon University * 	In SCVQScores, returned result from SCVQComputeScores_opt(). *  * 01-Nov-93  M K Ravishankar (rkm@cs) at Carnegie-Mellon University * 	Added compressed, 16-bit senone probs option. *  *  6-Apr-92  Fil Alleva (faa) at Carnegie-Mellon University *	- added SCVQAgcSet() and agcType. * * 08-Oct-91  Eric Thayer (eht) at Carnegie-Mellon University *	Created from system by Xuedong Huang * 22-Oct-91  Eric Thayer (eht) at Carnegie-Mellon University *	Installed some efficiency improvements to acoustic scoring *//* System headers */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <limits.h>#include <math.h>/* SphinxBase headers */#include <sphinx_config.h>#include <cmd_ln.h>#include <fixpoint.h>#include <ckd_alloc.h>#include <bio.h>#include <err.h>/* Local headers */#include "s2types.h"#include "sphinx_types.h"#include "log.h"#include "s2_semi_mgau.h"#include "kdtree.h"#include "kb.h"#include "s2io.h"#include "senscr.h"#include "posixwin32.h"#define MGAU_MIXW_VERSION	"1.0"   /* Sphinx-3 file format version for mixw */#define MGAU_PARAM_VERSION	"1.0"   /* Sphinx-3 file format version for mean/var */#define NONE		-1#define WORST_DIST	(int32)(0x80000000)/* * In terms of already shifted and negated quantities (i.e. dealing with * 8-bit quantized values): */#define LOG_ADD(p1,p2)	(logadd_tbl[(p1<<8)+(p2)])/** Subtract GMM component b (assumed to be positive) and saturate */#define GMMSUB(a,b) \	(((a)-(b) > a) ? (INT_MIN) : ((a)-(b)))/** Add GMM component b (assumed to be positive) and saturate */#define GMMADD(a,b) \	(((a)+(b) < a) ? (INT_MAX) : ((a)+(b)))extern const unsigned char logadd_tbl[];#ifndef MIN#define MIN(a,b) ((a) < (b) ? (a) : (b))#endif/* * Compute senone scores. */static int32 SCVQComputeScores(s2_semi_mgau_t * s, int32 compallsen);/* * Optimization for various topN cases, PDF-size(#bits) cases of * SCVQComputeScores() and SCVQComputeScores_all(). */static int32 get_scores4_8b(s2_semi_mgau_t * s);static int32 get_scores2_8b(s2_semi_mgau_t * s);static int32 get_scores1_8b(s2_semi_mgau_t * s);static int32 get_scores_8b(s2_semi_mgau_t * s);static int32 get_scores4_8b_all(s2_semi_mgau_t * s);static int32 get_scores2_8b_all(s2_semi_mgau_t * s);static int32 get_scores1_8b_all(s2_semi_mgau_t * s);static int32 get_scores_8b_all(s2_semi_mgau_t * s);static voideval_topn(s2_semi_mgau_t *s, int32 feat, mfcc_t *z){    int32 i, ceplen;    vqFeature_t *topn;    topn = s->f[feat];    ceplen = s->veclen[feat];    for (i = 0; i < s->topN; i++) {        mean_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */        vqFeature_t vtmp;        var_t *var, d;        mfcc_t *obs;        int32 cw, j;        cw = topn[i].codeword;        mean = s->means[feat] + cw * ceplen;        var = s->vars[feat] + cw * ceplen;        d = s->dets[feat][cw];        obs = z;        for (j = 0; j < ceplen; j++) {            diff = *obs++ - *mean++;            sqdiff = MFCCMUL(diff, diff);            compl = MFCCMUL(sqdiff, *var);            d = GMMSUB(d, compl);            ++var;        }        topn[i].val.dist = (int32) d;        if (i == 0)            continue;        vtmp = topn[i];        for (j = i - 1; j >= 0 && (int32) d > topn[j].val.dist; j--) {            topn[j + 1] = topn[j];        }        topn[j + 1] = vtmp;    }}static voideval_cb_kdtree(s2_semi_mgau_t *s, int32 feat, mfcc_t *z,               kd_tree_node_t *node, uint32 maxbbi){    vqFeature_t *worst, *best, *topn;    int32 i, ceplen;    best = topn = s->f[feat];    worst = topn + (s->topN - 1);    ceplen = s->veclen[feat];    for (i = 0; i < maxbbi; ++i) {        mean_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */        var_t *var, d;        mfcc_t *obs;        vqFeature_t *cur;        int32 cw, j, k;        cw = node->bbi[i];        mean = s->means[feat] + cw * ceplen;        var = s->vars[feat] + cw * ceplen;        d = s->dets[feat][cw];        obs = z;        for (j = 0; (j < ceplen) && (d >= worst->val.dist); j++) {            diff = *obs++ - *mean++;            sqdiff = MFCCMUL(diff, diff);            compl = MFCCMUL(sqdiff, *var);            d = GMMSUB(d, compl);            ++var;        }        if (j < ceplen)            continue;        if (d < worst->val.dist)            continue;        for (k = 0; k < s->topN; k++) {            /* already there, so don't need to insert */            if (topn[k].codeword == cw)                break;        }        if (k < s->topN)            continue;       /* already there.  Don't insert */        /* remaining code inserts codeword and dist in correct spot */        for (cur = worst - 1; cur >= best && d >= cur->val.dist; --cur)            memcpy(cur + 1, cur, sizeof(vqFeature_t));        ++cur;        cur->codeword = cw;        cur->val.dist = (int32) d;    }}static voideval_cb(s2_semi_mgau_t *s, int32 feat, mfcc_t *z){    vqFeature_t *worst, *best, *topn;    mean_t *mean;    var_t *var;    int32 *det, *detP, *detE;    int32 i, ceplen;    best = topn = s->f[feat];    worst = topn + (s->topN - 1);    mean = s->means[feat];    var = s->vars[feat];    det = s->dets[feat];    detE = det + s->n_density;    ceplen = s->veclen[feat];    for (detP = det; detP < detE; ++detP) {        mean_t diff, sqdiff, compl; /* diff, diff^2, component likelihood */        var_t d;        mfcc_t *obs;        vqFeature_t *cur;        int32 cw, j;        d = *detP;        obs = z;        cw = detP - det;        for (j = 0; (j < ceplen) && (d >= worst->val.dist); ++j) {            diff = *obs++ - *mean++;            sqdiff = MFCCMUL(diff, diff);            compl = MFCCMUL(sqdiff, *var);            d = GMMSUB(d, compl);            ++var;        }        if (j < ceplen) {            /* terminated early, so not in topn */            mean += (ceplen - j);            var += (ceplen - j);            continue;        }        if (d < worst->val.dist)            continue;        for (i = 0; i < s->topN; i++) {            /* already there, so don't need to insert */            if (topn[i].codeword == cw)                break;        }        if (i < s->topN)            continue;       /* already there.  Don't insert */        /* remaining code inserts codeword and dist in correct spot */        for (cur = worst - 1; cur >= best && d >= cur->val.dist; --cur)            memcpy(cur + 1, cur, sizeof(vqFeature_t));        ++cur;        cur->codeword = cw;        cur->val.dist = (int32) d;    }}static voidmgau_dist(s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z){    /* Initialize topn codewords to topn codewords from previous     * frame, and calculate their densities. */    memcpy(s->f[feat], s->lastf[feat], sizeof(vqFeature_t) * s->topN);    eval_topn(s, feat, z);    /* If this frame is skipped, do nothing else. */    if (frame % s->ds_ratio)        return;    /* Evaluate the rest of the codebook (or subset thereof). */    if (s->kdtrees) {        kd_tree_node_t *node;        uint32 maxbbi;        node =            eval_kd_tree(s->kdtrees[feat], z, s->kd_maxdepth);        maxbbi = s->kd_maxbbi == -1 ? node->n_bbi : MIN(node->n_bbi,                                                        s->                                                        kd_maxbbi);        eval_cb_kdtree(s, feat, z, node, maxbbi);    }    else {        eval_cb(s, feat, z);    }    /* Make a copy of current topn. */    memcpy(s->lastf[feat], s->f[feat], sizeof(vqFeature_t) * s->topN);}/* * Compute senone scores for the active senones. */int32s2_semi_mgau_frame_eval(s2_semi_mgau_t * s,			mfcc_t ** featbuf, int32 frame,			int32 compallsen){    int i, j;    for (i = 0; i < s->n_feat; ++i)        mgau_dist(s, frame, i, featbuf[i]);    /* normalize the topN feature scores */    for (j = 0; j < s->n_feat; j++) {        s->score_tmp[j] = s->f[j][0].val.score;    }    for (i = 1; i < s->topN; i++)        for (j = 0; j < s->n_feat; j++) {            s->score_tmp[j] = ADD(s->score_tmp[j], s->f[j][i].val.score);        }    for (i = 0; i < s->topN; i++)        for (j = 0; j < s->n_feat; j++) {            s->f[j][i].val.score -= s->score_tmp[j];            if (s->f[j][i].val.score > 0)                s->f[j][i].val.score = INT_MIN; /* tkharris++ */            /* E_FATAL("**ERROR** VQ score= %d\n", f[j][i].val.score); */        }    return SCVQComputeScores(s, compallsen);}static int32SCVQComputeScores(s2_semi_mgau_t * s, int32 compallsen){    if (compallsen) {	switch (s->topN) {	case 4:	    return get_scores4_8b_all(s);	    break;	case 2:	    return get_scores2_8b_all(s);	    break;	case 1:	    return get_scores1_8b_all(s);	    break;	default:	    return get_scores_8b_all(s);	    break;	}    }    else {	switch (s->topN) {	case 4:	    return get_scores4_8b(s);	    break;	case 2:	    return get_scores2_8b(s);	    break;	case 1:	    return get_scores1_8b(s);	    break;	default:	    return get_scores_8b(s);	    break;	}    }}static int32get_scores_8b(s2_semi_mgau_t * s){    E_FATAL("get_scores_8b() not implemented\n");    return 0;}static int32get_scores_8b_all(s2_semi_mgau_t * s){    E_FATAL("get_scores_8b_all() not implemented\n");    return 0;}
12 3 下一页
💿 文件大小 5925 K
👤 上传用户 flashlee2003200
📂 所属分类多媒体处理
🏷️ 相关标签

#WinCE #语音识别 #程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -