📄 vector.c

📁 CMU大名鼎鼎的SPHINX－3大词汇量连续语音识别系统
💻 C
字号:
/* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * This work was supported in part by funding from the Defense Advanced  * Research Projects Agency and the National Science Foundation of the  * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * vector.c * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1997 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** *  * HISTORY *  * 10-Mar-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * 		Added vector_accum(), vector_vqlabel(), and vector_vqgen(). *  * 09-Mar-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * 		Added vector_is_zero(), vector_cmp(), and vector_dist_eucl(). * 		Changed the name vector_dist_eval to vector_dist_maha. *  * 07-Oct-98	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * 		Added distance computation related functions. *  * 12-Nov-95	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * 		Copied from Eric Thayer. */#include "vector.h"#include "logs3.h"#include "s3types.h"#if (!WIN32) /* RAH #include <random.h> */#include <time.h>		/* RAH */#endiffloat64 vector_sum_norm (float32 *vec, int32 len){    float64 sum, f;    int32 i;        sum = 0.0;    for (i = 0; i < len; i++)	sum += vec[i];    if (sum != 0.0) {	f = 1.0 / sum;	for (i = 0; i < len; i++)	  /*	  vec[i] *=  f;		*/  /*  */	  /* The compiler was complaining about the above line, to make sure we don't lose accuracy, use this  */	  vec[i] = (float32) ((float64) vec[i] * (float64)  f);		    }        return sum;}void vector_floor (float32 *vec, int32 len, float64 flr){    int32 i;    for (i = 0; i < len; i++)	if (vec[i] < flr)	    vec[i] = (float32)flr;}void vector_nz_floor (float32 *vec, int32 len, float64 flr){    int32 i;    for (i = 0; i < len; i++)	if ((vec[i] != 0.0) && (vec[i] < flr))	    vec[i] = (float32)flr;}void vector_print(FILE *fp, vector_t v, int32 dim){    int32 i;        for (i = 0; i < dim; i++)	fprintf (fp, " %11.4e", v[i]);    fprintf (fp, "\n");    fflush (fp);}int vector_normalize(vector_t v, uint32 dim){    uint32 i;    float64 s;    int ret_val = S3_SUCCESS;    for (i = 0, s = 0; i < dim; i++)        s += v[i];    if (s != 0) {        for (i = 0; i < dim; i++)            v[i] /= (float) s;    }    else {        ret_val = S3_WARNING;    }    return ret_val;}int32 vector_is_zero (float32 *vec, int32 len){    int32 i;        for (i = 0; (i < len) && (vec[i] == 0.0); i++);    return (i == len);	/* TRUE iff all mean values are 0.0 */}int32 vector_maxcomp_int32 (int32 *val, int32 len){    int32 i, bi;        bi = 0;    for (i = 1; i < len; i++) {	if (val[i] > val[bi])	    bi = i;    }    return bi;}int32 vector_mincomp_int32 (int32 *val, int32 len){    int32 i, bi;        bi = 0;    for (i = 1; i < len; i++) {	if (val[i] < val[bi])	    bi = i;    }    return bi;}int32 vector_maxcomp_float32 (float32 *val, int32 len){    int32 i, bi;        bi = 0;    for (i = 1; i < len; i++) {	if (val[i] > val[bi])	    bi = i;    }    return bi;}int32 vector_mincomp_float32 (float32 *val, int32 len){    int32 i, bi;        bi = 0;    for (i = 1; i < len; i++) {	if (val[i] < val[bi])	    bi = i;    }    return bi;}void vector_accum (float32 *dst, float32 *src, int32 len){    int32 i;        for (i = 0; i < len; i++)	dst[i] += src[i];}int32 vector_cmp (float32 *v1, float32 *v2, int32 len){    int32 i;        for (i = 0; i < len; i++) {	if (v1[i] < v2[i])	    return -1;	if (v1[i] > v2[i])	    return 1;    }        return 0;}int32 vector_mean (float32 *mean, float32 **data, int32 n_vec, int32 n_dim){    int32 i, j;    float64 f;        assert ((n_vec > 0) && (n_dim > 0));        for (i = 0; i < n_dim; i++)	mean[i] = 0.0;        for (i = 0; i < n_vec; i++) {	for (j = 0; j < n_dim; j++)	    mean[j] += data[i][j];    }        f = 1.0/(float64)n_vec;    for (i = 0; i < n_dim; i++)	mean[i] *= (float32)f;        return 0;}float64 vector_dist_eucl (float32 *v1, float32 *v2, int32 len){    float64 d;    int32 i;        d = 0.0;    for (i = 0; i < len; i++)	d += (v1[i] - v2[i]) * (v1[i] - v2[i]);        return d;}float64 vector_maha_precomp (float32 *var, int32 len){    float64 det;    int32 i;        for (det = (float64)0.0, i = 0; i < len; i++) {	/* log(1.0/prod(var[i])) */	det -= (float64) (log(var[i]));	var[i] = (float32)(1.0 / (var[i] * 2.0));    }    det -= log(2.0 * PI) * len;        return (det * 0.5);	/* sqrt */}float64 vector_dist_maha (float32 *vec, float32 *mean, float32 *varinv, float64 loginvdet,			  int32 len){    float64 dist, diff;    int32 i;        dist = loginvdet;    for (i = 0; i < len; i++) {	diff = (vec[i] - mean[i]);	dist -= diff * diff * varinv[i];    }        return dist;}int32 vector_vqlabel (float32 *vec, float32 **mean, int32 rows, int32 cols, float64 *sqerr){    int32 i, besti;    float64 d, bestd;        assert ((rows > 0) && (cols > 0));        bestd = vector_dist_eucl (mean[0], vec, cols);    besti = 0;        for (i = 1; i < rows; i++) {	d = vector_dist_eucl (mean[i], vec, cols);	if (bestd > d) {	    bestd = d;	    besti = i;	}    }        if (sqerr)	*sqerr = bestd;        return besti;}float64 vector_vqgen (float32 **data, int32 rows, int32 cols, int32 vqrows,		      float64 epsilon, int32 maxiter,		      float32 **mean, int32 *map,int32 seed){    int32 i, j, r, it;    float64 sqerr, prev_sqerr=0, t;    bitvec_t sel;    int32 *count;    float32 *gmean;    ptmr_t tm;        assert ((rows >= vqrows) && (maxiter >= 0) && (epsilon > 0.0));        sel = bitvec_alloc (rows);        ptmr_init (&tm);    ptmr_start (&tm);        /* Pick a random initial set of centroids */    if(seed < 0){      E_INFO("You are using the internal mechanism of vector_vqgen to decide the seed.  \n");      s3_rand_seed((unsigned) time(NULL));    }else{          /*ARCHAN: DON'T delete this line! This will always give the      experimenter the seed they were using in their experiments. This      make the result be repeatable*/      E_INFO("You are using %d as the seed \n",seed);      s3_rand_seed(seed);    }    for (i = 0; i < vqrows; i++) {	/* Find r = a random, previously unselected row from the input */      r = (s3_rand_int31() & (int32)0x7fffffff) % rows;       /*	r = (rand() & (int32)0x7fffffff) % rows; */	while (bitvec_is_set (sel, r)) {	/* BUG: possible infinite loop!! */	    if (++r >= rows)		r = 0;	}	bitvec_set (sel, r);		memcpy ((void *)(mean[i]), (void *)(data[r]), cols * sizeof(float32));	/* BUG: What if two randomly selected rows are identical in content?? */    }    bitvec_free (sel);        count = (int32 *) ckd_calloc (vqrows, sizeof(int32));        /* In k-means, unmapped means in any iteration are a problem.  Replace them with gmean */    gmean = (float32 *) ckd_calloc (cols, sizeof(float32));    vector_mean (gmean, mean, vqrows, cols);    for (it = 0;; it++) {		/* Iterations of k-means algorithm */	/* Find the current data->mean mappings (labels) */	sqerr = 0.0;	for (i = 0; i < rows; i++) {	    map[i] = vector_vqlabel (data[i], mean, vqrows, cols, &t);	    sqerr += t;	}	ptmr_stop(&tm);		if (it == 0)	    E_INFO("Iter %4d: %.1fs CPU; sqerr= %e\n", it, tm.t_cpu, sqerr);	else	    E_INFO("Iter %4d: %.1fs CPU; sqerr= %e; delta= %e\n",		   it, tm.t_cpu, sqerr, (prev_sqerr-sqerr)/prev_sqerr);		/* Check if exit condition satisfied */	if ((sqerr == 0.0) || (it >= maxiter-1) ||	    ((it > 0) && ( ((prev_sqerr - sqerr) / prev_sqerr) < epsilon )) )	    break;	prev_sqerr = sqerr;		ptmr_start(&tm);		/* Update (reestimate) means */	for (i = 0; i < vqrows; i++) {	    for (j = 0; j < cols; j++)		mean[i][j] = 0.0;	    count[i] = 0;	}	for (i = 0; i < rows; i++) {	    vector_accum (mean[map[i]], data[i], cols);	    count[map[i]]++;	}	for (i = 0; i < vqrows; i++) {	    if (count[i] > 1) {		t = 1.0 / (float64)(count[i]);		for (j = 0; j < cols; j++)		  /*		  mean[i][j] *= t; */ /* RAH, compiler was complaining about this,  */		  mean[i][j] = (float32) ((float64) mean[i][j] * (float64) t); /*  */	    } else if (count[i] == 0) {		E_ERROR("Iter %d: mean[%d] unmapped\n", it, i);		memcpy (mean[i], gmean, cols * sizeof(float32));	    }	}    }        ckd_free (count);    ckd_free (gmean);        return sqerr;}float64 vector_pdf_entropy (float32 *p, int32 len){    float64 sum;    int32 i;        sum = 0.0;    for (i = 0; i < len; i++) {	if (p[i] > 0.0)	    sum -= p[i] * log(p[i]);    }    sum /= log(2.0);        return sum;}float64 vector_pdf_cross_entropy (float32 *p1, float32 *p2, int32 len){    float64 sum;    int32 i;        sum = 0.0;    for (i = 0; i < len; i++) {	if (p2[i] > 0.0)	    sum -= p1[i] * log(p2[i]);    }    sum /= log(2.0);        return sum;}void vector_gautbl_alloc (vector_gautbl_t *gautbl, int32 n_gau, int32 veclen){    gautbl->n_gau = n_gau;    gautbl->veclen = veclen;    gautbl->mean = (float32 **) ckd_calloc_2d (n_gau, veclen, sizeof(float32));    gautbl->var = (float32 **) ckd_calloc_2d (n_gau, veclen, sizeof(float32));    gautbl->lrd = (float32 *) ckd_calloc (n_gau, sizeof(float32));    gautbl->distfloor = logs3_to_log (S3_LOGPROB_ZERO);}void vector_gautbl_free (vector_gautbl_t *gautbl){    ckd_free_2d ((void **) gautbl->mean);    ckd_free_2d ((void **) gautbl->var);    ckd_free ((void *) gautbl->lrd);}void vector_gautbl_var_floor (vector_gautbl_t *gautbl, float64 floor){    int32 g;        for (g = 0; g < gautbl->n_gau; g++)	vector_floor (gautbl->var[g], gautbl->veclen, floor);}void vector_gautbl_maha_precomp (vector_gautbl_t *gautbl){    int32 g;        for (g = 0; g < gautbl->n_gau; g++)	gautbl->lrd[g] = (float32) vector_maha_precomp (gautbl->var[g], gautbl->veclen);}void vector_gautbl_eval_logs3 (vector_gautbl_t *gautbl,			       int32 offset,			       int32 count,			       float32 *x,			       int32 *score){    int32 i, r;    float64 f;    int32 end, veclen;    float32 *m1, *m2, *v1, *v2;    float64 dval1, dval2, diff1, diff2;        f = log_to_logs3_factor();        /* Interleave evaluation of two vectors at a time for speed on pipelined machines */    end = offset + count;    veclen = gautbl->veclen;        for (r = offset; r < end-1; r += 2) {	m1 = gautbl->mean[r];	m2 = gautbl->mean[r+1];	v1 = gautbl->var[r];	v2 = gautbl->var[r+1];	dval1 = gautbl->lrd[r];	dval2 = gautbl->lrd[r+1];	for (i = 0; i < veclen; i++) {	    diff1 = x[i] - m1[i];	    dval1 -= diff1 * diff1 * v1[i];	    diff2 = x[i] - m2[i];	    dval2 -= diff2 * diff2 * v2[i];	}		if (dval1 < gautbl->distfloor)	    dval1 = gautbl->distfloor;	if (dval2 < gautbl->distfloor)	    dval2 = gautbl->distfloor;	score[r] = (int32)(f * dval1);	score[r+1] = (int32)(f * dval2);    }        if (r < end) {	m1 = gautbl->mean[r];	v1 = gautbl->var[r];	dval1 = gautbl->lrd[r];		for (i = 0; i < veclen; i++) {	    diff1 = x[i] - m1[i];	    dval1 -= diff1 * diff1 * v1[i];	}		if (dval1 < gautbl->distfloor)	    dval1 = gautbl->distfloor;	score[r] = (int32)(f * dval1);    }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -