📄 fe_enhance.cpp
字号:
///////////////////////////////////////////////////////////////////////////////
// This is a part of the Feature program.
// Version: 1.0
// Date: February 22, 2003
// Programmer: Oh-Wook Kwon
// Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
///////////////////////////////////////////////////////////////////////////////
#include "StdAfx.h"
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <assert.h>
#include "FE_feature.h"
/*------------------------------------------
* Definition of Noise Reduction Parameters
*-----------------------------------------*/
/* general */
#ifndef DEFAULT_SAMPLING_RATE
#define DEFAULT_SAMPLING_RATE 16000
#endif
/* Wiener filter design */
#define NR_NB_FRAME_THRESHOLD_NSE 100
#define NR_LAMBDA_NSE 0.99
#define NR_EPS 1e-16 /* owkwon: originally 1e-16 */
#define NR_BETA 0.98
#define NR_ETA_TH 0.01 /* owkwon: reduced to -40 dB for kWaves, originally 0.079432823 corresponding to -22 dB */
#define NR_STARTING_FREQ 0 /* 0Hz */
/* Spectral subtraction design */
#define NR_SS_TH 0.01
/* VAD for noise estimation (VADNest) */
#define NR_NB_FRAME_THRESHOLD_LTE 10
#define NR_LAMBDA_LTE 0.97
#define NR_SNR_THRESHOLD_UPD_LTE 20
#define NR_ENERGY_FLOOR 80
#define NR_MIN_FRAME 10
#define NR_SNR_THRESHOLD_VAD 24 /* owkwon: increased, originally 15 */
#define NR_MIN_SPEECH_FRAME_HANGOVER 4
#define NR_HANGOVER 5
#define NR_SNR_THRESHOLD_NO_UPD_LTE 80 /* owkwon: there sometimes exist only a few silence frames less than NR_MIN_FRAME */
/*----------------------
* Constant Definitions
*----------------------*/
#ifndef M_PI
#define M_PI ((float)3.14159265358979323846)
#endif
/*----------------------
* Macro Definitions
*----------------------*/
#define NR_T(z) ((z)>0 ? (z) : 0)
#define NR_SQ(z) ((z)*(z))
#define Round(x) ((int)((x)+0.5))
/*---------------------
* Function Prototypes
*---------------------*/
extern void PRFFT_NEW(float *a, float *b, int m, int n_pts, int iff);
int Fe::EnhanceMain(const char *infile, const char *outfile, int sampleRate, int isWiener)
{
FILE *fpin=fopen(infile,"rb");
if(fpin==0){
fprintf(stderr, "File open error (%s)\n",infile);
assert(0);
return 0;
}
fseek(fpin,0L,SEEK_END);
int fsize = ftell(fpin);
rewind(fpin);
int sampleN=fsize/sizeof(short);
vector<short> data(sampleN);
fread(&data[0], sizeof(short), sampleN, fpin);
fclose(fpin);
enhance_basic(&data[0], sampleN, sampleRate, isWiener);
FILE *fpout=fopen(outfile,"wb");
if(fpout==0){
fprintf(stderr, "File open error (%s)\n",outfile);
assert(0);
return 0;
}
fwrite(&data[0], sizeof(short), sampleN, fpout);
fclose(fpout);
return 1;
}
void Fe::enhance_basic(short *sample, int sampleN, int samplingRate, int isWiener)
{
int t;
Wiener wiener;
wiener.Init(samplingRate, isWiener);
float out[NR_MAX_WIN_SIZE];
wiener.InitNewUtterance(0);
int frameN=sampleN/wiener.m_shiftSize;
int outN=0;
for(t=0; ;t++){
int i;
int n=my_min(wiener.m_shiftSize,my_max(0,sampleN-t*wiener.m_shiftSize));
FeReturnCode status;
if(n>=wiener.m_shiftSize){
status=wiener.OneFrame(sample+t*wiener.m_shiftSize, n, &out[0], t);
}
else{ /* feed in dummy frames */
short tmpA[NR_MAX_WIN_SIZE];
for(i=0;i<n;i++) tmpA[i]=sample[t*wiener.m_shiftSize+i];
for(i=n;i<wiener.m_shiftSize;i++) tmpA[i]=0;
status=wiener.OneFrame(tmpA, wiener.m_shiftSize, &out[0], t);
}
if(status==FE_NULL) continue;
int r=my_min(wiener.m_shiftSize,my_max(0,sampleN-outN));
assert(outN+r <= sampleN);
for(i=0;i<r;i++){
sample[outN+i]=(short)out[i];
}
outN+=r;
if(outN>=sampleN) break;
}
#if 0
wiener.SaveInput("test.input.raw",0);
wiener.SaveDenoised("test.output1.raw",0);
#endif
wiener.Close();
}
Wiener::Wiener()
{
m_sampleRate=DEFAULT_SAMPLING_RATE;
m_NumChannels=NR_NUM_CHANNELS;
m_localFrameX=0;
}
Wiener::~Wiener()
{
}
void Wiener::Close()
{
}
int Wiener::GetSample(short *sample, int sampleN)
{
long startX=(m_inputEndX)%NR_BUF_SIZE;
if(startX+sampleN <= NR_BUF_SIZE){
memmove((char*)(m_inputSpeech+startX), sample, 2*(sampleN));
}
else{
memmove((char*)(m_inputSpeech+startX), sample, 2*(NR_BUF_SIZE-startX));
memmove((char*)(m_inputSpeech), sample+(NR_BUF_SIZE-startX), 2*(sampleN-NR_BUF_SIZE+startX));
}
m_inputEndX += sampleN;
return sampleN;
}
int Wiener::Init(int samplingRate, int isWiener)
{
int i, kHz;
m_isWiener=isWiener;
m_sampleRate=samplingRate;
/*-------------------------------------------------------------------*/
/* Set parameters FrameLength, FrameShift and FFTLength */
/*-------------------------------------------------------------------*/
kHz=(int)(m_sampleRate/1000.0+0.5);
if (kHz == SAMPLING_FREQ_1) { /* 8 kHz */
m_winSize = FRAME_LENGTH_1;
m_shiftSize = FRAME_SHIFT_1;
m_fftSize = FFT_LENGTH_1;
}
else if (kHz == SAMPLING_FREQ_2) { /* 11 kHz */
m_winSize = FRAME_LENGTH_2;
m_shiftSize = FRAME_SHIFT_2;
m_fftSize = FFT_LENGTH_2;
}
else if (kHz == SAMPLING_FREQ_3) { /* 16 kHz */
m_winSize = FRAME_LENGTH_3;
m_shiftSize = FRAME_SHIFT_3;
m_fftSize = FFT_LENGTH_3;
}
else {
float shiftMs=10, winMs=25;
m_winSize = (int)(winMs/1000*m_sampleRate);
m_shiftSize = (int)(shiftMs/1000*m_sampleRate);
m_fftSize = 1; while (m_fftSize<m_winSize) m_fftSize *= 2;
}
if(m_shiftSize>NR_MAX_FRAME_SHIFT){
fprintf(stderr, "ERROR: Too large frame shift size '%d'!\r\n", m_shiftSize);
assert(0);
return 0;
}
if(m_isWiener==1){
m_specLength=m_fftSize/4+1;
}
else{
m_specLength=m_fftSize/2+1;
}
m_inputEndX=0;
#ifdef _DEBUG
m_denEndX = 0;
#endif
m_localFrameX=0;
/*-------------------------------------------------------*/
/* Initialize Hanning window and scaling factor */
/*-------------------------------------------------------*/
InitHanning(m_HanningWin, m_winSize);
if(m_isWiener==1){
m_bufStartX = m_shiftSize-20;
InitHanning(m_HanningWin2, NR_FL);
InitMelFilterBanks ((float)NR_STARTING_FREQ, (float)m_sampleRate, 2*(m_specLength-1), m_NumChannels);
InitMelIDCTMatrix (m_melIdctMatrix, m_NumChannels);
}
/* amplitude scaling factor */
m_scaleFactor=0;
for(i=0;i<m_shiftSize;i++){
float sum=0;
for(int n=0;n+i<m_winSize;n+=m_shiftSize) sum += m_HanningWin[i+n];
if(sum>m_scaleFactor) m_scaleFactor=sum;
}
/* over-subtraction factor */
if(m_isWiener==0){
m_oversubGain = 8;
m_oversubCutoffFreq = 800;
float fc=m_fftSize*m_oversubCutoffFreq/(float)m_sampleRate;
for(i=0;i<m_specLength;i++){
m_oversubFactor[i] = m_oversubGain/(1+i/fc);
}
}
return 1;
}
FeReturnCode Wiener::InitNewUtterance(const char *fname)
{
int i;
m_inputEndX = 0;
#ifdef _DEBUG
m_denEndX = 0;
#endif
m_localFrameX=0;
/* Wiener filter design */
m_nbFrameX=1;
/* VADNest */
m_nbSpeechFrame=0;
m_meanEn=0;
m_flagVADNest=0;
m_hangOver=0;
m_nbFrameVADNest=0;
for(i=0;i<m_specLength;i++){
m_lastSpectrum2[i]=0;
m_lastSpectrum[i]=0;
m_sqrtNoisePSD[i]=(float)NR_EPS;
m_sqrtDen3PSD[i]=0;
}
for(i=0;i<4*m_shiftSize;i++){
m_buf_in[i]=0;
m_buf_out[i]=0;
}
return FE_OK;
}
FeReturnCode Wiener::OneFrame(short *sample, int sampleN, float *out, int frameX)
{
int i, k, extraFrameN;
long startX=m_shiftSize*m_localFrameX;
long validX;
k=(m_winSize-m_shiftSize)/m_shiftSize;
extraFrameN=((m_winSize-m_shiftSize)%m_shiftSize == 0 ? k : k+1);
sampleN=GetSample(sample,sampleN);
assert(m_localFrameX-frameX <= 4+extraFrameN);
float si[NR_MAX_WIN_SIZE], so[NR_MAX_WIN_SIZE], den[NR_MAX_WIN_SIZE];
FeReturnCode status;
/* shift down one frame in the first buffer and insert a new frame */
for(i=0;i<3*m_shiftSize;i++) m_buf_in[i]=m_buf_in[i+m_shiftSize];
for(i=0;i<m_shiftSize;i++) m_buf_in[3*m_shiftSize+i]=(float)m_inputSpeech[(startX+i)%NR_BUF_SIZE];
for(i=0;i<m_winSize;i++) si[i]=m_buf_in[i];
if(m_isWiener==1){
status=OneFrameWiener(si, so);
}
else{
status=OneFrameSS(si, so);
}
if(status==FE_NULL){
m_localFrameX++;
return FE_NULL;
}
if(m_isWiener==0){
/* overlap and save */
for(i=0;i<3*m_shiftSize;i++) m_buf_out[i]=m_buf_out[i+m_shiftSize];
for(i=0;i<m_shiftSize;i++) m_buf_out[3*m_shiftSize+i]=0;
for(i=0;i<m_winSize;i++) m_buf_out[i]+=so[i];
/* get the result */
for(i=0;i<m_shiftSize;i++) den[i]=m_buf_out[i]/m_scaleFactor;
}
else{
for(i=0;i<m_shiftSize;i++) den[i]=so[i];
}
/* save the denoised speech at the ring buffer */
for(i=0;i<m_shiftSize;i++){
m_outSpeech[(startX+i)%NR_OUT_BUF_SIZE]=den[i];
}
#ifdef _DEBUG
/* save the denoised speech at the ring buffer */
validX=startX-2*m_shiftSize;
for(i=0;i<m_shiftSize;i++){
m_denSpeech[(validX+i)%NR_BUF_SIZE]=(short)(den[i]);
}
#endif
/* NR has 5 frame delay due to one buffer and extraFrameN delay to make one frame for feature extraction */
if(m_localFrameX<5+extraFrameN){
m_localFrameX++;
return FE_NULL;
}
validX=startX-extraFrameN*m_shiftSize-((m_isWiener==1) ? 2*m_shiftSize : 0);
for(i=0;i<m_winSize;i++) {
out[i]=m_outSpeech[(validX+i)%NR_OUT_BUF_SIZE];
}
#ifdef _DEBUG
m_denEndX += m_shiftSize;
#endif
m_localFrameX++;
return FE_SPEECH;
}
FeReturnCode Wiener::OneFrameWiener(float *si, float *out)
{
VADNest(m_localFrameX, si);
EstimateSpectrum(si, m_spec, m_spec_re, m_spec_im, 1);
ComputeMeanPSD(m_spec, m_lastSpectrum, m_lastSpectrum2, m_flagVADNest, m_sqrtInPSD);
if(m_localFrameX<2) {
int i;
for(i=0;i<m_winSize;i++) out[i]=si[i];
return FE_NULL;
}
DesignWiener(m_localFrameX, m_flagVADNest, m_spec, m_sqrtInPSD, m_sqrtNoisePSD, m_sqrtDen3PSD, m_wienerFilter);
if(m_isWiener==0){
ApplyFilter(m_spec_re, m_spec_im, m_wienerFilter, out);
}
else{
MelFilterBank(m_wienerFilter, m_H2mel);
MelIDCT(m_H2mel, m_hWFmirr);
assert(m_shiftSize-m_bufStartX > (NR_FL-1)/2);
ApplyWiener(si+m_shiftSize-m_bufStartX, m_hWFmirr, m_hWFw, out);
}
return FE_SPEECH;
}
FeReturnCode Wiener::OneFrameSS(float *si, float *out)
{
EstimateSpectrum(si, m_spec, m_spec_re, m_spec_im, 0);
ComputeMeanPSD(m_spec, m_lastSpectrum, m_lastSpectrum2, m_flagVADNest, m_sqrtInPSD);
if(m_localFrameX<2) {
int i;
for(i=0;i<m_winSize;i++) out[i]=si[i];
return FE_NULL;
}
DesignSpecsub(m_localFrameX, m_flagVADNest, m_spec, m_sqrtInPSD, m_sqrtNoisePSD, m_sqrtDen3PSD, m_ssFilter);
ApplyFilter(m_spec_re, m_spec_im, m_ssFilter, out);
return FE_SPEECH;
}
void Wiener::InitHanning (float *win, int len)
{
int i;
for (i = 0; i < len; i++){
win[i] = (float)(0.5 - 0.5 * cos (2 * M_PI * (i + 0.5)/len));
}
}
void Wiener::EstimateSpectrum(float *s, float *spectrum, float *re, float *im, int subSample)
{
int i, n, m;
float x[NR_MAX_WIN_SIZE];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -