📄 cont_mgau.h

📁 CMU大名鼎鼎的SPHINX－3大词汇量连续语音识别系统
💻 H
字号:
/* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * This work was supported in part by funding from the Defense Advanced  * Research Projects Agency and the National Science Foundation of the  * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * cont_mgau.h -- Mixture Gaussians for continuous HMM models. * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1997 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** * * HISTORY *  * 20.Apr.2001  RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu) *              Added mgau_free to free memory allocated by mgau_init() * 15-Dec-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added mgau_model_t.{frm_sen_eval,frm_gau_eval}. * 		Added mgau_var_nzvec_floor(). *  * 28-Mar-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * 		Started. */#ifndef _S3_CONT_MGAU_H_#define _S3_CONT_MGAU_H_#include <s3types.h>/** \file cont_mgau.h *  \brief Interface of full GMM computation with integer value of log likelihood.  */#ifdef __cplusplusextern "C" {#endif  /** * Specification of sets of GMM  */#define CONTHMM  10001 /**(NOT USED, for backward compatibility only)  */#define SEMIHMM  10002 /**(NOT USED, for backward compatibllity only)  */#define FULL_INT_COMP 20001  /** (NOT USED) Use full integer computation */#define FULL_FLOAT_COMP 20002 /** (NOT USED) Use full floating point computation */#define MIX_INT_FLOAT_COMP 20003 /** (NOT USED) Use mixture of integer and floating point computation */#define MGAU_MEAN		1 /** Constant fo specified the mean is used */#define MGAU_VAR		2 /** Constant fo specified the variance is used */  /**   * Mixture Gaussians: Weighted set of Gaussian densities, each with its own mean vector and * diagonal covariance matrix.  Specialized for continuous HMMs to improve speed performance. * So, a separate mixture Gaussian, with its own mixture weights, for each HMM state.  Also, * a single feature stream assumed.  (In other words, the mgau_t structure below represents * a senone in a fully continuous HMM model.) *  * Given a Gaussian density with mean vector m and diagonal variance vector v, and some * input vector x, all of length n, the Mahalanobis distance of x from the Gaussian mean m * is given by: *     {1/sqrt((2pi)^n * det(v))} * exp{-Sum((x[i] - m[i])^2 / (2v[i]))} * To speed up this evaluation, the first sub-expression ({1/sqrt...}) can be precomputed at * initialization, and so can 1/2v[i] in the second sub-expression.  Secondly, recognition * systems work with log-likelihood values, so these distances or likelihood values are * computed in log-domain.  Finally, float32 operations are costlier than int32 ones, so * the log-values are converted to logs3 domain (see libmisc/logs3.h) (but before the mixing * weights are applied).  Thus, to reiterate, the final scores are (int32) logs3 values. */  /** 20040826 ARCHAN:  * Introduced hook to the GMM definition to allow gaussian computation using full float  * operations. Also added another hook that allows potential use of full covariance matrix.  * At this point, full covariance matrix computation was not fully implemented.  * If comp_type = MIX_INT_FLOAT_COMP, then the interger pointer will be used to store the  * mixture weight.  It is also important to initialize the logs3 routine separately. * If comp_type = FULL_FLOAT_COMP, then the floating point pointer will be used to store the mixture * weights.  */  /**   * A single mixture-Gaussian model for one senone (see above comment).   */typedef struct {  int32 n_comp;	/** #Component Gaussians in this mixture.  NOTE: May be 0 (for the			   untrained states). */  /* Definition for mean */  float32 **mean;	/** The n_comp means of the Gaussians. The mean vector for a single mixture-Gaussian model for one senone. Dimension: n_comp * dimension */  /* Definition for variances */  float32 **var;	/** The n_comp (diagonal) variances of the Gaussians.  Could be			   converted to 1/(2*var) for faster computation (see above comment).  The diagonal variance vector for a single mixture-Gaussian model for one senone. Dimension: n_comp * dimension */  float32 ***fullvar;   /* (NOT USED) The n_comp (full) variances of the Gaussians. */                        /* A full co-variance matrix for a single mixture-Gaussian model for one senone */                        /* Dimension: n_comp * dimension * dimension */  /* Definition for the log reciprocal terms */  float32 *lrd;	        /** Log(Reciprocal(Determinant (variance))).  (Then there is also a			   (2pi)^(veclen) involved...) */  /* Definitions for the mixture weights */  int32 *mixw;	        /** Mixture weights for the n_comp components (int32 instead of float32			   because these values are in logs3 domain)*/  float32 *mixw_f;      /** Mixture weights for the n_comp components in float32 */} mgau_t;  /** * The set of mixture-Gaussians in an acoustic model. */typedef struct {  int32 n_mgau;	/** #Mixture Gaussians in this model (i.e., #senones) */  int32 max_comp;	/** Max components in any mixture */  int32 veclen;	/** Vector length of the Gaussian density means (and diagonal vars) */  mgau_t *mgau;	/** The n_mgau mixture Gaussians */  float64 distfloor;	/** Mahalanobis distances can underflow when finally converted to			   logs3 values.  To prevent this, floor the log values first. */  int32 comp_type;  /**Type of computation used in this set of mixture-Gaussians*/		       int32 verbose;    /**Whether to display information */  /* Used only in the flat lexicon decoder, statistics */  int32 frm_sen_eval;		/** #Senones evaluated in the most recent frame */  int32 frm_gau_eval;		/** #Gaussian densities evaluated in the most recent frame */  int32 frm_ci_sen_eval;        /** #CI Senones evaluated in most recent frame*/  int32 frm_ci_gau_eval;        /** #CI Senones evaluated in most recent frame*/  int32 gau_type; /** gau_type=CONTHMM if it is fully continous HMM, 		     gau_type=SEMIHMM if it is semi continous HMM.*/} mgau_model_t;  /** Access macros */  /** \def mgau_n_mgau      Access number of GMMs      \def mgau_max_comp      Access the maximum number of components. It can be different across GMMs      \def mgau_veclen      Access an integer array contains the size of each stream      \def mgau_n_comp      Access the number of component for a particular mixture in a GMM      \def mgau_mean       Access the mean      \def mgau_var      Access the variance      \def mgau_lrd      Access the floating point version of the Gaussian constant      \def mgau_lrd      Access the integer version of the Gaussian constant      \def mgau_frm_sen_eval      Number of senones evaluated in this frame      \def mgau_frm_gau_eval      Number of gaussians evaluated in this frame      \def mgau_frm_ci_sen_eval      Number of CI senones evaluated in this frame      \def mgau_frm_ci_gau_eval      Number of CI gaussians evaluated in this frame   */#define mgau_n_mgau(g)		((g)->n_mgau)#define mgau_max_comp(g)	((g)->max_comp)#define mgau_veclen(g)		((g)->veclen)#define mgau_n_comp(g,m)	((g)->mgau[m].n_comp)#define mgau_mean(g,m,c)	((g)->mgau[m].mean[c])#define mgau_var(g,m,c)		((g)->mgau[m].var[c])#define mgau_fullvar(g,m,c)	((g)->mgau[m].fullvar[c])#define mgau_mixw(g,m,c)	((g)->mgau[m].mixw[c])#define mgau_mixw_f(g,m,c)	((g)->mgau[m].mixw_f[c])#define mgau_lrd(g,m,c)		((g)->mgau[m].lrd[c])#define mgau_lrdi(g,m,c)	((g)->mgau[m].lrdi[c])#define mgau_frm_sen_eval(g)	((g)->frm_sen_eval)#define mgau_frm_gau_eval(g)	((g)->frm_gau_eval)#define mgau_frm_cisen_eval(g)	((g)->frm_ci_sen_eval)#define mgau_frm_cigau_eval(g)	((g)->frm_ci_gau_eval)  /** * Create a new mixture Gaussian model from the given files (Sphinx3 format).  Optionally, * apply the precomputations mentioned in the main comment above. * Return value: pointer to the model created if successful; NULL if error. */mgau_model_t *mgau_init (char *meanfile,	/** In: File containing means of mixture gaussians */	   char *varfile,	/** In: File containing variances of mixture gaussians */	   float64 varfloor,	/** In: Floor value applied to variances; e.g., 0.0001 */	   char *mixwfile,	/** In: File containing mixture weights */	   float64 mixwfloor,	/** In: Floor value for mixture weights; e.g., 0.0000001 */	   int32 precomp,       /** In: If TRUE, create and precompute mgau_t.lrd and also				   transform each var value to 1/(2*var).  (If FALSE, one				   cannot use the evaluation routines provided here.) */	   char* senmgau,	/** In: type of the gaussians distribution, .cont. or .semi. FIX 				   me! This is confusing!*/	   int32 comp_type);    /** In: Type of computation in this set of gaussian mixtures. */				  /** * Floor any variance vector that is non-zero (vector). * Return value: No. of variance VALUES floored. */int32 mgau_var_nzvec_floor (mgau_model_t *g, float64 floor);  /** * Evaluate a single mixture Gaussian at the given vector x; i.e., compute the Mahalanobis * distance of x from each mean in the mixture, and combine them using the mixture weights. * Return value: The final score from this evaluation (a logs3 domain value).  NOTE: if the * specified mixture is empty, S3_LOGPROB_ZERO is returned (see libmisc/libmisc.h). */  /* The hybrid integer and floating point implementation of GMM computation */int32mgau_eval (mgau_model_t *g,	/** In: The entire mixture Gaussian model */	   int32 m,		/** In: The chosen mixture in the model (i.e., g->mgau[m]) */	   int32 *active_comp,	/** In: An optional, -1 terminated list of active component				   indices; if non-NULL, only the specified components are				   used in the evaluation. */	   float32 *x /** In: Input observation vector (of length g->veclen). */  );		  /** * Like mgau_eval, but return the scores of the individual components, instead of combining * them into a senone score.  Return value: Best component score. */int32 mgau_comp_eval (mgau_model_t *g,	/** In: Set of mixture Gaussians */		      int32 m,		/** In: Mixture being considered */		      float32 *x,	/** In: Input vector being compared to the components */		      int32 *score);	/** Out: Array of scores for each component */  /** 20040829 : ARCHAN: a temporary hacked function to convert the log domain value back to float domain */int32 mgau_precomp_hack_log_to_float(mgau_model_t *g);  /**   * A routine that dump all mean and variance parameters of a set of gaussian distribution.      */int32 mgau_dump (mgau_model_t *g,  /** In: Set of mixture Gaussians */		 int32 type);      /** In: type of output, MGAU_MEAN for mean or MGAU_VAR for variance.  */  /** RAH   * Free memory allocated by mgau_init   */void mgau_free (mgau_model_t *g);  /**    * Reloading the means. This is particularly useful for speaker adaptation.    */  int32 mgau_mean_reload(mgau_model_t *g,  /** In/Out : The mean which will be resetted*/			 char* mean_file_name); /** In: The mean files */#ifdef __cplusplus}#endif#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -