📄 fe.c

📁 CMU大名鼎鼎的SPHINX－3大词汇量连续语音识别系统
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* ==================================================================== * Copyright (c) 1996-2004 Carnegie Mellon University.  All rights  * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * This work was supported in part by funding from the Defense Advanced  * Research Projects Agency and the National Science Foundation of the  * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * */#include "fe.h"#include <stdio.h>#include <stdlib.h>#if !defined(WIN32)#include <unistd.h>#include <sys/file.h>#if !defined(O_BINARY)#define O_BINARY 0#endif#endif#include <string.h>#include <time.h>#include <sys/types.h>#include <sys/stat.h>#include <fcntl.h>#include <assert.h>#if defined(WIN32)#include <io.h>#include <errno.h>/*Now using customized random generator. *//*#define srand48(x) srand(x)  #define lrand48() rand()*/#endif#if (WIN32) /* RAH #include <random.h> */#include <time.h>		/* RAH */#endifint32 ep_fe_openfiles(param_t *P, fe_t *FE, char *infile, int32 *fp_in, int32 *nsamps, int32 *nframes, int32 *nblocks);void fe_init_dither(int32 seed);/*** Function to free the front-end wrapper ***/void few_free(fewrap_t *FEW){	free(FEW->fr_data);        free(FEW->fr_cep);         fe_close(FEW->FE);        fe_free_param(FEW->P);        free(FEW);   }/*** Function to initialize the front-end wrapper ***/fewrap_t * few_initialize(){  fewrap_t *FEW = (fewrap_t *) ckd_calloc(1,sizeof(fewrap_t));    /********************** INITIALIZING COMPONENTS ******************/    // initialize parameters  FEW->P = fe_parse_options();    // initialize the front-end parameters  if (( FEW->FE = fe_init(FEW->P))==NULL){    fprintf(stderr,"memory alloc failed in fe_convert_files()\n...exiting\n");    exit(0);  }    /*************** Allocate memory for each frame of speech *******************/    FEW->fr_data = (int16 *)ckd_calloc(FEW->FE->FRAME_SIZE, sizeof(int16));  FEW->fr_cep = (float32 *)ckd_calloc(FEW->FE->NUM_CEPSTRA, sizeof(float32));    return(FEW);}int32 fe_convert_files(param_t *P){    fe_t *FE;    char *infile,*outfile, fileroot[MAXCHARS];    FILE *ctlfile;    int16 *spdata=NULL;    int32 splen,total_samps,frames_proc,nframes,nblocks,last_frame;    int32 fp_in,fp_out, last_blocksize=0,curr_block,total_frames;    float32 **cep = NULL, **last_frame_cep;    int32 return_value;    int32 warn_zero_energy = OFF;    int32 process_utt_return_value;        splen=0;    if ((FE = fe_init(P))==NULL){	E_ERROR("memory alloc failed...exiting\n");	return(FE_MEM_ALLOC_ERROR);    }    if (P->is_batch){	if ((ctlfile = fopen(P->ctlfile,"r")) == NULL){	    E_ERROR("Unable to open control file %s\n",P->ctlfile);	    return(FE_CONTROL_FILE_ERROR);	}	while (fscanf(ctlfile,"%s",fileroot)!=EOF){	    fe_build_filenames(P,fileroot,&infile,&outfile);	    if (P->verbose) E_INFO("%s\n",infile);	    return_value = fe_openfiles(P,FE,infile,&fp_in,&total_samps,&nframes,&nblocks,outfile,&fp_out);	    if (return_value != FE_SUCCESS){	      return(return_value);	    }	    warn_zero_energy = OFF;	    if (nblocks*P->blocksize>=total_samps) 		last_blocksize = total_samps - (nblocks-1)*P->blocksize;	    	    if (!fe_start_utt(FE)){		curr_block=1;		total_frames=frames_proc=0;		/*execute this loop only if there is more than 1 block to		  be processed */		while(curr_block < nblocks){		    splen = P->blocksize;		    if ((spdata = (int16 *)calloc(splen,sizeof(int16)))==NULL){			E_ERROR("Unable to allocate memory block of %d shorts for input speech\n",splen);			return(FE_MEM_ALLOC_ERROR);		    } 		    if (fe_readblock_spch(P,fp_in,splen,spdata)!=splen){			E_ERROR("error reading speech data\n");			return(FE_INPUT_FILE_READ_ERROR);		    }		    process_utt_return_value = 		      fe_process_utt(FE,spdata,splen,&cep, &frames_proc);		    if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {		      warn_zero_energy = ON;		    } else {		      assert(process_utt_return_value == FE_SUCCESS);		    }		    if (frames_proc>0)			fe_writeblock_feat(P,FE,fp_out,frames_proc,cep);		    ckd_free_2d((void **)cep);		    curr_block++;		    total_frames += frames_proc;		    if (spdata!=NULL) { 		      free(spdata); 		      spdata = NULL; 		    }		}		/* process last (or only) block */		if (spdata!=NULL) {		  free(spdata);		}		splen=last_blocksize;				if ((spdata = (int16 *)calloc(splen,sizeof(int16)))==NULL){		    E_ERROR("Unable to allocate memory block of %d shorts for input speech\n",splen);		    return(FE_MEM_ALLOC_ERROR);		} 		if (fe_readblock_spch(P,fp_in,splen,spdata)!=splen){		    E_ERROR("error reading speech data\n");		    return(FE_INPUT_FILE_READ_ERROR);		}				process_utt_return_value = 		  fe_process_utt(FE,spdata,splen,&cep, &frames_proc);		if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {		  warn_zero_energy = ON;		} else {		  assert(process_utt_return_value == FE_SUCCESS);		}		if (frames_proc>0)		    fe_writeblock_feat(P,FE,fp_out,frames_proc,cep);		ckd_free_2d((void **)cep);		curr_block++;		if (P->logspec != ON)		    last_frame_cep = (float32 **)ckd_calloc_2d(1,FE->NUM_CEPSTRA,sizeof(float32));		else		    last_frame_cep = (float32 **)ckd_calloc_2d(1,FE->MEL_FB->num_filters,sizeof(float32));		process_utt_return_value = 		  fe_end_utt(FE, last_frame_cep[0], &last_frame);		if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {		  warn_zero_energy = ON;		} else {		  assert(process_utt_return_value == FE_SUCCESS);		}		if (last_frame>0){		    fe_writeblock_feat(P,FE,fp_out,last_frame,last_frame_cep);		    frames_proc++;		}		total_frames += frames_proc;				fe_closefiles(fp_in,fp_out);				free(spdata); spdata = 0;		ckd_free_2d((void **)last_frame_cep);			    }	    else{		E_ERROR("fe_start_utt() failed\n");		return(FE_START_ERROR);	    }	}	fe_close(FE);	if (ON == warn_zero_energy) {	  E_WARN("File %s has some frames with zero energy. Consider using dither\n", infile);	}    }        else if (P->is_single){		fe_build_filenames(P,fileroot,&infile,&outfile);	if (P->verbose) printf("%s\n",infile);	return_value = fe_openfiles(P,FE,infile,&fp_in,&total_samps,&nframes,&nblocks,outfile,&fp_out);	if (return_value != FE_SUCCESS){	  return(return_value);	}	warn_zero_energy = OFF;		if (nblocks*P->blocksize>=total_samps) 	    last_blocksize = total_samps - (nblocks-1)*P->blocksize;		if (!fe_start_utt(FE)){	    curr_block=1;	    total_frames=frames_proc=0;	    /*execute this loop only if there are more than 1 block to	      be processed */	    while(curr_block < nblocks){		splen = P->blocksize;		if ((spdata = (int16 *)calloc(splen,sizeof(int16)))==NULL){		    E_ERROR("Unable to allocate memory block of %d shorts for input speech\n",splen);		    return(FE_MEM_ALLOC_ERROR);		} 		if (fe_readblock_spch(P,fp_in,splen,spdata)!=splen){		    E_ERROR("Error reading speech data\n");		    return(FE_INPUT_FILE_READ_ERROR);		}		process_utt_return_value = 		  fe_process_utt(FE,spdata,splen,&cep, &frames_proc);		if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {		  warn_zero_energy = ON;		} else {		  assert(process_utt_return_value == FE_SUCCESS);		}		if (frames_proc>0)		    fe_writeblock_feat(P,FE,fp_out,frames_proc,cep);		ckd_free_2d((void **)cep);		curr_block++;		total_frames += frames_proc;		if (spdata!=NULL) { 		  free(spdata); 		  spdata = NULL; 		}			    }	    /* process last (or only) block */	    if (spdata!=NULL) {free(spdata);}	    splen =last_blocksize;	    if ((spdata = (int16 *)calloc(splen,sizeof(int16)))==NULL){		E_ERROR("Unable to allocate memory block of %d shorts for input speech\n",splen);		return(FE_MEM_ALLOC_ERROR);	    } 	    if (fe_readblock_spch(P,fp_in,splen,spdata)!=splen){		E_ERROR("Error reading speech data\n");		return(FE_INPUT_FILE_READ_ERROR);	    }	    process_utt_return_value = 	      fe_process_utt(FE,spdata,splen,&cep, &frames_proc);	    if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {	      warn_zero_energy = ON;	    } else {	      assert(process_utt_return_value == FE_SUCCESS);	    }	    if (frames_proc>0)		fe_writeblock_feat(P,FE,fp_out,frames_proc,cep);	    ckd_free_2d((void **)cep);	    curr_block++;	    if (P->logspec != ON)	        last_frame_cep = (float32 **)ckd_calloc_2d(1,FE->NUM_CEPSTRA,sizeof(float32));	    else	        last_frame_cep = (float32 **)ckd_calloc_2d(1,FE->MEL_FB->num_filters,sizeof(float32));	    process_utt_return_value = 	      fe_end_utt(FE, last_frame_cep[0], &last_frame);	    if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {	      warn_zero_energy = ON;	    } else {	      assert(process_utt_return_value == FE_SUCCESS);	    }	    if (last_frame>0){	      fe_writeblock_feat(P,FE,fp_out,last_frame,last_frame_cep);	      frames_proc++;	    }	    total_frames += frames_proc;	    	    	    fe_closefiles(fp_in,fp_out);	    	    free(spdata);	    ckd_free_2d((void **)last_frame_cep);	}	else{	    E_ERROR("fe_start_utt() failed\n");	    return(FE_START_ERROR);	}		fe_close(FE);	if (ON == warn_zero_energy) {	  E_WARN("File %s has some frames with zero energy. Consider using dither\n", infile);	}    }    else{	E_ERROR("Unknown mode - single or batch?\n");	return(FE_UNKNOWN_SINGLE_OR_BATCH);	    }    P->splen=splen;    P->nframes=nframes;    P->spdata=spdata;    return(FE_SUCCESS);    }int16 * fe_convert_files_to_spdata(param_t *P, fe_t *FE, int32 *splenp, int32 *nframesp){    char *infile, fileroot[MAXCHARS];    int32 splen,total_samps,frames_proc,nframes,nblocks;    int32 fp_in, last_blocksize=0,curr_block,total_frames;    int16 *spdata;    spdata = NULL;               /* 20040917: ARCHAN The fe_copy_str and free pair is very fishy       here. If the number of utterance being processed is larger than       1M, there may be chance, we will hit out of segment problem. */        if (P->is_single){                        fe_build_filenames(P,fileroot,&infile,NULL);        if (P->verbose) printf("%s\n",infile);        if (ep_fe_openfiles(P,FE,infile,&fp_in,&total_samps,&nframes,&nblocks) != FE_SUCCESS){       printf("fe_openfiles exited!\n");                  exit(0);        }                                        if (nblocks*P->blocksize>=total_samps)             last_blocksize = total_samps - (nblocks-1)*P->blocksize;                if (!fe_start_utt(FE)){            curr_block=1;            total_frames=frames_proc=0;                //printf("Total frames %d, last_blocksize: %d\n",total_frames, last_blocksize);                        /* process last (or only) block */            if (spdata!=NULL) free(spdata);            splen =last_blocksize;            if ((spdata = (int16 *)calloc(splen,sizeof(int16)))==NULL){                fprintf(stderr,"Unable to allocate memory block of %d shorts for input speech\n",splen);                exit(0);            }             if (fe_readblock_spch(P,fp_in,splen,spdata)!=splen){                fprintf(stderr,"Error reading speech data\n");                exit(0);
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -