📄 fe_enhance.h
字号:
///////////////////////////////////////////////////////////////////////////////
// This is a part of the Feature program.
// Version: 1.0
// Date: February 22, 2003
// Programmer: Oh-Wook Kwon
// Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
///////////////////////////////////////////////////////////////////////////////
#ifndef _FE_ENHANCE_H_
#define _FE_ENHANCE_H_
#include "FE_common.h"
/*-----------------------------------*/
/* Definition for endpoint detectors */
/*-----------------------------------*/
#define NR_MAX_RECORD_TIME 10 /* 10 seconds */
#define NR_WAV_BUF_SIZE (NR_MAX_RECORD_TIME*16000) /* 10 seconds at 16 kHz mono sampling frequency */
#define NR_MAX_WIN_SIZE 512 /* maximum window size */
/*----------------------------------*/
/* Definition for noise reduction */
/*----------------------------------*/
#define NR_MAX_FRAME_SHIFT 320
#define NR_MAX_SPEC_LENGTH 257 /* FFT_LENGTH_1/2+1 */
#define NR_OUT_BUF_SIZE (5*NR_MAX_WIN_SIZE)
#ifdef _DEBUG
#define NR_BUF_SIZE NR_WAV_BUF_SIZE
#else
#define NR_BUF_SIZE (2*16000) /* save only 2 second speech to save memory */
#endif
/*----------------------------------*/
/* Definition for Wiener filter */
/*----------------------------------*/
#define NR_NUM_CHANNELS 23
#define NR_FL 17
typedef struct {
int m_lowX;
int m_centerX;
int m_highX;
float m_sumWeight;
} WfMelFB; /* mel filter bank for noise reduction */
class Wiener {
public:
/* basic part */
int m_isWiener;
int m_sampleRate;
int m_winSize;
int m_shiftSize;
int m_fftSize;
/* derived part */
int m_specLength;
float m_scaleFactor;
/* for audio/file interface */
short m_inputSpeech[NR_BUF_SIZE]; /* ring buffer for input speech */
long m_inputEndX; /* end sample point to input speech */
/* spectrum estimation */
float m_HanningWin[NR_MAX_WIN_SIZE];
float m_buf_in[4*NR_MAX_FRAME_SHIFT]; /* frame 0, frame 1, frame 2, frame 3 */
float m_buf_out[4*NR_MAX_FRAME_SHIFT]; /* frame 0, frame 1, frame 2, frame 3 */
float m_spec[NR_MAX_SPEC_LENGTH];
float m_spec_re[NR_MAX_WIN_SIZE];
float m_spec_im[NR_MAX_WIN_SIZE];
/* Power spectral density mean */
float m_sqrtInPSD[NR_MAX_SPEC_LENGTH];
float m_lastSpectrum[NR_MAX_SPEC_LENGTH];
float m_lastSpectrum2[NR_MAX_SPEC_LENGTH];
float m_sqrtNoisePSD[NR_MAX_SPEC_LENGTH];
long m_nbFrameX;
/* Wiener filter design */
float m_wienerFilter[NR_MAX_SPEC_LENGTH];
float m_sqrtDen3PSD[NR_MAX_SPEC_LENGTH];
/* Spectral subtraction design */
float m_ssFilter[NR_MAX_SPEC_LENGTH];
float m_oversubGain; /* over-subtraction gain (fg), usually 4 */
float m_oversubCutoffFreq; /* over-subtraction cutoff frequency (fc), usually 800 Hz */
float m_oversubFactor[NR_MAX_SPEC_LENGTH]; /* oversubFac(f) = fg/(1+f/fc) */
/* for VAD for noise estimation */
int m_nbSpeechFrame;
int m_flagVADNest;
int m_hangOver;
float m_meanEn;
long m_nbFrameVADNest;
#ifdef _DEBUG
short m_denSpeech[NR_BUF_SIZE]; /* ring buffer for denoised speech */
long m_denEndX; /* end sample point to denoised speech */
#endif
float m_outSpeech[NR_OUT_BUF_SIZE];
long m_localFrameX; /* time frame index of noise reduction (for internal use) */
/* Mel filter-bank */
int m_NumChannels;
WfMelFB m_MelFB[NR_NUM_CHANNELS+2];
float m_MelWeight[NR_MAX_SPEC_LENGTH];
float m_H2mel[NR_NUM_CHANNELS+2];
/* Mel IDCT */
float m_hWFmirr[2*(NR_NUM_CHANNELS+1)+1];
float m_melIdctMatrix[(NR_NUM_CHANNELS+2)*(NR_NUM_CHANNELS+2)];
/* Apply filter */
int m_bufStartX;
float m_hWFw[NR_FL];
float m_HanningWin2[NR_FL];
/*--------------------*/
/* Member functions */
/*--------------------*/
Wiener();
virtual ~Wiener();
int Init(int samplingRate, int isWiener);
FeReturnCode InitNewUtterance(const char *fname);
FeReturnCode OneFrame(short *sample, int sampleN, float *out, int frameX);
void Close();
#ifdef _DEBUG
int SaveInput(const char *fname, int offsetX);
int SaveDenoised(const char *fname, int offsetX);
#endif
private:
int GetSample(short *sample, int sampleN);
FeReturnCode OneFrameWiener(float *si, float *out);
FeReturnCode OneFrameSS(float *in, float *out);
void EstimateSpectrum(float *s, float *spectrum, float *re, float *im, int subSample);
void ComputeMeanPSD(float *spectrum, float *lastSpectrum, float *lastSpectrum2, int flagVADNest, float *sqrtInPSD);
void DesignWiener(int t, int flagVADNest, const float *in, const float *inPSD, float *noisePSD, float *den3PSD, float *filter);
void DesignSpecsub(int t, int flagVADNest, const float *in, const float *inPSD, float *noisePSD, float *den3PSD, float *filter);
void VADNest(int t, const float *s);
void ApplyFilter(float *re, float *im, float *h, float *out);
void InitHanning (float *win, int len);
void InitMelFilterBanks (float startingFrequency, float samplingRate, int fftLength, int numChannels);
int InitMelIDCTMatrix (float *idctMatrix, int numChannels);
void MelFilterBank(float *h2, float *h2mel);
void MelIDCT(float *h2mel, float *hWFmirr);
void ApplyWiener(float *s, float *hWFmirr, float *hWFw, float *out);
};
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -