📄 fe.c
字号:
/* ==================================================================== * Copyright (c) 1996-2004 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * */#include "fe.h"#include <stdio.h>#include <stdlib.h>#if !defined(WIN32)#include <unistd.h>#include <sys/file.h>#if !defined(O_BINARY)#define O_BINARY 0#endif#endif#include <string.h>#include <time.h>#include <sys/types.h>#include <sys/stat.h>#include <fcntl.h>#include <assert.h>#if defined(WIN32)#include <io.h>#include <errno.h>/*Now using customized random generator. *//*#define srand48(x) srand(x) #define lrand48() rand()*/#endif#if (WIN32) /* RAH #include <random.h> */#include <time.h> /* RAH */#endifint32 ep_fe_openfiles(param_t *P, fe_t *FE, char *infile, int32 *fp_in, int32 *nsamps, int32 *nframes, int32 *nblocks);void fe_init_dither(int32 seed);/*** Function to free the front-end wrapper ***/void few_free(fewrap_t *FEW){ free(FEW->fr_data); free(FEW->fr_cep); fe_close(FEW->FE); fe_free_param(FEW->P); free(FEW); }/*** Function to initialize the front-end wrapper ***/fewrap_t * few_initialize(){ fewrap_t *FEW = (fewrap_t *) ckd_calloc(1,sizeof(fewrap_t)); /********************** INITIALIZING COMPONENTS ******************/ // initialize parameters FEW->P = fe_parse_options(); // initialize the front-end parameters if (( FEW->FE = fe_init(FEW->P))==NULL){ fprintf(stderr,"memory alloc failed in fe_convert_files()\n...exiting\n"); exit(0); } /*************** Allocate memory for each frame of speech *******************/ FEW->fr_data = (int16 *)ckd_calloc(FEW->FE->FRAME_SIZE, sizeof(int16)); FEW->fr_cep = (float32 *)ckd_calloc(FEW->FE->NUM_CEPSTRA, sizeof(float32)); return(FEW);}int32 fe_convert_files(param_t *P){ fe_t *FE; char *infile,*outfile, fileroot[MAXCHARS]; FILE *ctlfile; int16 *spdata=NULL; int32 splen,total_samps,frames_proc,nframes,nblocks,last_frame; int32 fp_in,fp_out, last_blocksize=0,curr_block,total_frames; float32 **cep = NULL, **last_frame_cep; int32 return_value; int32 warn_zero_energy = OFF; int32 process_utt_return_value; splen=0; if ((FE = fe_init(P))==NULL){ E_ERROR("memory alloc failed...exiting\n"); return(FE_MEM_ALLOC_ERROR); } if (P->is_batch){ if ((ctlfile = fopen(P->ctlfile,"r")) == NULL){ E_ERROR("Unable to open control file %s\n",P->ctlfile); return(FE_CONTROL_FILE_ERROR); } while (fscanf(ctlfile,"%s",fileroot)!=EOF){ fe_build_filenames(P,fileroot,&infile,&outfile); if (P->verbose) E_INFO("%s\n",infile); return_value = fe_openfiles(P,FE,infile,&fp_in,&total_samps,&nframes,&nblocks,outfile,&fp_out); if (return_value != FE_SUCCESS){ return(return_value); } warn_zero_energy = OFF; if (nblocks*P->blocksize>=total_samps) last_blocksize = total_samps - (nblocks-1)*P->blocksize; if (!fe_start_utt(FE)){ curr_block=1; total_frames=frames_proc=0; /*execute this loop only if there is more than 1 block to be processed */ while(curr_block < nblocks){ splen = P->blocksize; if ((spdata = (int16 *)calloc(splen,sizeof(int16)))==NULL){ E_ERROR("Unable to allocate memory block of %d shorts for input speech\n",splen); return(FE_MEM_ALLOC_ERROR); } if (fe_readblock_spch(P,fp_in,splen,spdata)!=splen){ E_ERROR("error reading speech data\n"); return(FE_INPUT_FILE_READ_ERROR); } process_utt_return_value = fe_process_utt(FE,spdata,splen,&cep, &frames_proc); if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) { warn_zero_energy = ON; } else { assert(process_utt_return_value == FE_SUCCESS); } if (frames_proc>0) fe_writeblock_feat(P,FE,fp_out,frames_proc,cep); ckd_free_2d((void **)cep); curr_block++; total_frames += frames_proc; if (spdata!=NULL) { free(spdata); spdata = NULL; } } /* process last (or only) block */ if (spdata!=NULL) { free(spdata); } splen=last_blocksize; if ((spdata = (int16 *)calloc(splen,sizeof(int16)))==NULL){ E_ERROR("Unable to allocate memory block of %d shorts for input speech\n",splen); return(FE_MEM_ALLOC_ERROR); } if (fe_readblock_spch(P,fp_in,splen,spdata)!=splen){ E_ERROR("error reading speech data\n"); return(FE_INPUT_FILE_READ_ERROR); } process_utt_return_value = fe_process_utt(FE,spdata,splen,&cep, &frames_proc); if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) { warn_zero_energy = ON; } else { assert(process_utt_return_value == FE_SUCCESS); } if (frames_proc>0) fe_writeblock_feat(P,FE,fp_out,frames_proc,cep); ckd_free_2d((void **)cep); curr_block++; if (P->logspec != ON) last_frame_cep = (float32 **)ckd_calloc_2d(1,FE->NUM_CEPSTRA,sizeof(float32)); else last_frame_cep = (float32 **)ckd_calloc_2d(1,FE->MEL_FB->num_filters,sizeof(float32)); process_utt_return_value = fe_end_utt(FE, last_frame_cep[0], &last_frame); if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) { warn_zero_energy = ON; } else { assert(process_utt_return_value == FE_SUCCESS); } if (last_frame>0){ fe_writeblock_feat(P,FE,fp_out,last_frame,last_frame_cep); frames_proc++; } total_frames += frames_proc; fe_closefiles(fp_in,fp_out); free(spdata); spdata = 0; ckd_free_2d((void **)last_frame_cep); } else{ E_ERROR("fe_start_utt() failed\n"); return(FE_START_ERROR); } } fe_close(FE); if (ON == warn_zero_energy) { E_WARN("File %s has some frames with zero energy. Consider using dither\n", infile); } } else if (P->is_single){ fe_build_filenames(P,fileroot,&infile,&outfile); if (P->verbose) printf("%s\n",infile); return_value = fe_openfiles(P,FE,infile,&fp_in,&total_samps,&nframes,&nblocks,outfile,&fp_out); if (return_value != FE_SUCCESS){ return(return_value); } warn_zero_energy = OFF; if (nblocks*P->blocksize>=total_samps) last_blocksize = total_samps - (nblocks-1)*P->blocksize; if (!fe_start_utt(FE)){ curr_block=1; total_frames=frames_proc=0; /*execute this loop only if there are more than 1 block to be processed */ while(curr_block < nblocks){ splen = P->blocksize; if ((spdata = (int16 *)calloc(splen,sizeof(int16)))==NULL){ E_ERROR("Unable to allocate memory block of %d shorts for input speech\n",splen); return(FE_MEM_ALLOC_ERROR); } if (fe_readblock_spch(P,fp_in,splen,spdata)!=splen){ E_ERROR("Error reading speech data\n"); return(FE_INPUT_FILE_READ_ERROR); } process_utt_return_value = fe_process_utt(FE,spdata,splen,&cep, &frames_proc); if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) { warn_zero_energy = ON; } else { assert(process_utt_return_value == FE_SUCCESS); } if (frames_proc>0) fe_writeblock_feat(P,FE,fp_out,frames_proc,cep); ckd_free_2d((void **)cep); curr_block++; total_frames += frames_proc; if (spdata!=NULL) { free(spdata); spdata = NULL; } } /* process last (or only) block */ if (spdata!=NULL) {free(spdata);} splen =last_blocksize; if ((spdata = (int16 *)calloc(splen,sizeof(int16)))==NULL){ E_ERROR("Unable to allocate memory block of %d shorts for input speech\n",splen); return(FE_MEM_ALLOC_ERROR); } if (fe_readblock_spch(P,fp_in,splen,spdata)!=splen){ E_ERROR("Error reading speech data\n"); return(FE_INPUT_FILE_READ_ERROR); } process_utt_return_value = fe_process_utt(FE,spdata,splen,&cep, &frames_proc); if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) { warn_zero_energy = ON; } else { assert(process_utt_return_value == FE_SUCCESS); } if (frames_proc>0) fe_writeblock_feat(P,FE,fp_out,frames_proc,cep); ckd_free_2d((void **)cep); curr_block++; if (P->logspec != ON) last_frame_cep = (float32 **)ckd_calloc_2d(1,FE->NUM_CEPSTRA,sizeof(float32)); else last_frame_cep = (float32 **)ckd_calloc_2d(1,FE->MEL_FB->num_filters,sizeof(float32)); process_utt_return_value = fe_end_utt(FE, last_frame_cep[0], &last_frame); if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) { warn_zero_energy = ON; } else { assert(process_utt_return_value == FE_SUCCESS); } if (last_frame>0){ fe_writeblock_feat(P,FE,fp_out,last_frame,last_frame_cep); frames_proc++; } total_frames += frames_proc; fe_closefiles(fp_in,fp_out); free(spdata); ckd_free_2d((void **)last_frame_cep); } else{ E_ERROR("fe_start_utt() failed\n"); return(FE_START_ERROR); } fe_close(FE); if (ON == warn_zero_energy) { E_WARN("File %s has some frames with zero energy. Consider using dither\n", infile); } } else{ E_ERROR("Unknown mode - single or batch?\n"); return(FE_UNKNOWN_SINGLE_OR_BATCH); } P->splen=splen; P->nframes=nframes; P->spdata=spdata; return(FE_SUCCESS); }int16 * fe_convert_files_to_spdata(param_t *P, fe_t *FE, int32 *splenp, int32 *nframesp){ char *infile, fileroot[MAXCHARS]; int32 splen,total_samps,frames_proc,nframes,nblocks; int32 fp_in, last_blocksize=0,curr_block,total_frames; int16 *spdata; spdata = NULL; /* 20040917: ARCHAN The fe_copy_str and free pair is very fishy here. If the number of utterance being processed is larger than 1M, there may be chance, we will hit out of segment problem. */ if (P->is_single){ fe_build_filenames(P,fileroot,&infile,NULL); if (P->verbose) printf("%s\n",infile); if (ep_fe_openfiles(P,FE,infile,&fp_in,&total_samps,&nframes,&nblocks) != FE_SUCCESS){ printf("fe_openfiles exited!\n"); exit(0); } if (nblocks*P->blocksize>=total_samps) last_blocksize = total_samps - (nblocks-1)*P->blocksize; if (!fe_start_utt(FE)){ curr_block=1; total_frames=frames_proc=0; //printf("Total frames %d, last_blocksize: %d\n",total_frames, last_blocksize); /* process last (or only) block */ if (spdata!=NULL) free(spdata); splen =last_blocksize; if ((spdata = (int16 *)calloc(splen,sizeof(int16)))==NULL){ fprintf(stderr,"Unable to allocate memory block of %d shorts for input speech\n",splen); exit(0); } if (fe_readblock_spch(P,fp_in,splen,spdata)!=splen){ fprintf(stderr,"Error reading speech data\n"); exit(0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -