⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fe_enhance.cpp

📁 这是一个语音特征提取的程序源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
///////////////////////////////////////////////////////////////////////////////
// This is a part of the Feature program.
// Version: 1.0
// Date: February 22, 2003
// Programmer: Oh-Wook Kwon
// Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
///////////////////////////////////////////////////////////////////////////////

#include "StdAfx.h"
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <assert.h>
#include "FE_feature.h"


/*------------------------------------------
* Definition of Noise Reduction Parameters 
*-----------------------------------------*/

/* general */
#ifndef DEFAULT_SAMPLING_RATE
#define DEFAULT_SAMPLING_RATE        16000
#endif

/* Wiener filter design */
#define NR_NB_FRAME_THRESHOLD_NSE      100
#define NR_LAMBDA_NSE                 0.99
#define NR_EPS                       1e-16 /* owkwon: originally 1e-16 */
#define NR_BETA                       0.98
#define NR_ETA_TH                     0.01 /* owkwon: reduced to -40 dB for kWaves, originally 0.079432823 corresponding to -22 dB */
#define NR_STARTING_FREQ                 0   /* 0Hz  */

/* Spectral subtraction design */
#define NR_SS_TH                      0.01

/* VAD for noise estimation (VADNest) */
#define NR_NB_FRAME_THRESHOLD_LTE       10
#define NR_LAMBDA_LTE                 0.97
#define NR_SNR_THRESHOLD_UPD_LTE        20
#define NR_ENERGY_FLOOR                 80
#define NR_MIN_FRAME                    10
#define NR_SNR_THRESHOLD_VAD            24 /* owkwon: increased, originally 15 */
#define NR_MIN_SPEECH_FRAME_HANGOVER     4
#define NR_HANGOVER                      5

#define NR_SNR_THRESHOLD_NO_UPD_LTE     80 /* owkwon: there sometimes exist only a few silence frames less than NR_MIN_FRAME */


/*----------------------
 * Constant Definitions
 *----------------------*/
#ifndef M_PI
#define M_PI        ((float)3.14159265358979323846)
#endif


/*----------------------
 * Macro Definitions
 *----------------------*/
#define NR_T(z)          ((z)>0 ? (z) : 0)
#define NR_SQ(z)                 ((z)*(z))
#define Round(x)    ((int)((x)+0.5))


/*---------------------
 * Function Prototypes
 *---------------------*/
extern void PRFFT_NEW(float *a, float *b, int m, int n_pts, int iff);


int Fe::EnhanceMain(const char *infile, const char *outfile, int sampleRate, int isWiener)
{
	FILE *fpin=fopen(infile,"rb");
	if(fpin==0){
		fprintf(stderr, "File open error (%s)\n",infile);
		assert(0);
		return 0;
	}
	fseek(fpin,0L,SEEK_END);
	int fsize = ftell(fpin);
	rewind(fpin);
	int sampleN=fsize/sizeof(short);
	vector<short> data(sampleN);
	fread(&data[0], sizeof(short), sampleN, fpin);
	fclose(fpin);

	enhance_basic(&data[0], sampleN, sampleRate, isWiener);

	FILE *fpout=fopen(outfile,"wb");
	if(fpout==0){
		fprintf(stderr, "File open error (%s)\n",outfile);
		assert(0);
		return 0;
	}
	fwrite(&data[0], sizeof(short), sampleN, fpout);
	fclose(fpout);
	return 1;
}


void Fe::enhance_basic(short *sample, int sampleN, int samplingRate, int isWiener)
{
	int t;
	Wiener wiener;
	wiener.Init(samplingRate, isWiener);
	float out[NR_MAX_WIN_SIZE];
	wiener.InitNewUtterance(0);
	int frameN=sampleN/wiener.m_shiftSize;
	int outN=0;
	for(t=0; ;t++){
		int i;
		int n=my_min(wiener.m_shiftSize,my_max(0,sampleN-t*wiener.m_shiftSize));
		FeReturnCode status;
		if(n>=wiener.m_shiftSize){
			status=wiener.OneFrame(sample+t*wiener.m_shiftSize, n, &out[0], t);
		}
		else{ /* feed in dummy frames */
			short tmpA[NR_MAX_WIN_SIZE];
			for(i=0;i<n;i++) tmpA[i]=sample[t*wiener.m_shiftSize+i];
			for(i=n;i<wiener.m_shiftSize;i++) tmpA[i]=0;
			status=wiener.OneFrame(tmpA, wiener.m_shiftSize, &out[0], t);
		}
		if(status==FE_NULL) continue;
		int r=my_min(wiener.m_shiftSize,my_max(0,sampleN-outN));
		assert(outN+r <= sampleN);
		for(i=0;i<r;i++){
			sample[outN+i]=(short)out[i];
		}
		outN+=r;
		if(outN>=sampleN) break;
	}
#if 0
	wiener.SaveInput("test.input.raw",0);
	wiener.SaveDenoised("test.output1.raw",0);
#endif
	wiener.Close();
}


Wiener::Wiener()
{
	m_sampleRate=DEFAULT_SAMPLING_RATE;
	m_NumChannels=NR_NUM_CHANNELS;
	m_localFrameX=0;
}


Wiener::~Wiener()
{
}


void Wiener::Close()
{
}


int Wiener::GetSample(short *sample, int sampleN)
{
	long startX=(m_inputEndX)%NR_BUF_SIZE;
	if(startX+sampleN <= NR_BUF_SIZE){		
		memmove((char*)(m_inputSpeech+startX), sample, 2*(sampleN));
	}
	else{
		memmove((char*)(m_inputSpeech+startX), sample, 2*(NR_BUF_SIZE-startX));
		memmove((char*)(m_inputSpeech), sample+(NR_BUF_SIZE-startX), 2*(sampleN-NR_BUF_SIZE+startX));
	}
	m_inputEndX += sampleN;
	return sampleN;
}


int Wiener::Init(int samplingRate, int isWiener)
{
	int i, kHz;
	m_isWiener=isWiener;
	m_sampleRate=samplingRate;

	/*-------------------------------------------------------------------*/
	/* Set parameters FrameLength, FrameShift and FFTLength              */
	/*-------------------------------------------------------------------*/
	kHz=(int)(m_sampleRate/1000.0+0.5);
	if (kHz == SAMPLING_FREQ_1) { /* 8 kHz */
		m_winSize = FRAME_LENGTH_1;
		m_shiftSize = FRAME_SHIFT_1;
		m_fftSize = FFT_LENGTH_1;
	}
	else if (kHz == SAMPLING_FREQ_2) { /* 11 kHz */
		m_winSize = FRAME_LENGTH_2;
		m_shiftSize = FRAME_SHIFT_2;
		m_fftSize = FFT_LENGTH_2;
	}
	else if (kHz == SAMPLING_FREQ_3) { /* 16 kHz */
		m_winSize = FRAME_LENGTH_3;
		m_shiftSize = FRAME_SHIFT_3;
		m_fftSize = FFT_LENGTH_3;	
	}
	else {
		float shiftMs=10, winMs=25;
		m_winSize = (int)(winMs/1000*m_sampleRate);
		m_shiftSize = (int)(shiftMs/1000*m_sampleRate);
		m_fftSize = 1; while (m_fftSize<m_winSize) m_fftSize *= 2;
	}

	if(m_shiftSize>NR_MAX_FRAME_SHIFT){
		fprintf(stderr, "ERROR:   Too large frame shift size '%d'!\r\n", m_shiftSize);
		assert(0);
		return 0;
	}

	if(m_isWiener==1){
		m_specLength=m_fftSize/4+1;
	}
	else{
		m_specLength=m_fftSize/2+1;
	}

	m_inputEndX=0;
#ifdef _DEBUG
	m_denEndX = 0;
#endif
	m_localFrameX=0;

	/*-------------------------------------------------------*/
	/* Initialize Hanning window and scaling factor          */
	/*-------------------------------------------------------*/
	InitHanning(m_HanningWin, m_winSize);

	if(m_isWiener==1){
		m_bufStartX = m_shiftSize-20;
		InitHanning(m_HanningWin2, NR_FL);
		InitMelFilterBanks ((float)NR_STARTING_FREQ, (float)m_sampleRate, 2*(m_specLength-1), m_NumChannels);
		InitMelIDCTMatrix (m_melIdctMatrix, m_NumChannels);
	}

	/* amplitude scaling factor */
	m_scaleFactor=0;
	for(i=0;i<m_shiftSize;i++){
		float sum=0;
		for(int n=0;n+i<m_winSize;n+=m_shiftSize) sum += m_HanningWin[i+n];
		if(sum>m_scaleFactor) m_scaleFactor=sum;
	}

	/* over-subtraction factor */
	if(m_isWiener==0){
		m_oversubGain = 8;
		m_oversubCutoffFreq = 800;
		float fc=m_fftSize*m_oversubCutoffFreq/(float)m_sampleRate;
		for(i=0;i<m_specLength;i++){
			m_oversubFactor[i] = m_oversubGain/(1+i/fc);
		}
	}

    return 1;
}


FeReturnCode Wiener::InitNewUtterance(const char *fname)
{
	int i;
	m_inputEndX = 0;
#ifdef _DEBUG
	m_denEndX = 0;
#endif
	m_localFrameX=0;

	/* Wiener filter design */
	m_nbFrameX=1;

	/* VADNest */
	m_nbSpeechFrame=0;
	m_meanEn=0;
	m_flagVADNest=0;
	m_hangOver=0;
	m_nbFrameVADNest=0;

	for(i=0;i<m_specLength;i++){
		m_lastSpectrum2[i]=0;
		m_lastSpectrum[i]=0;
		m_sqrtNoisePSD[i]=(float)NR_EPS;
		m_sqrtDen3PSD[i]=0;
	}

	for(i=0;i<4*m_shiftSize;i++){
		m_buf_in[i]=0;
		m_buf_out[i]=0;
	}

	return FE_OK;
}


FeReturnCode Wiener::OneFrame(short *sample, int sampleN, float *out, int frameX)
{
    int i, k, extraFrameN;
	long startX=m_shiftSize*m_localFrameX;
	long validX;
	k=(m_winSize-m_shiftSize)/m_shiftSize;
	extraFrameN=((m_winSize-m_shiftSize)%m_shiftSize == 0 ? k : k+1);

	sampleN=GetSample(sample,sampleN);
	assert(m_localFrameX-frameX <= 4+extraFrameN);
	
	float si[NR_MAX_WIN_SIZE], so[NR_MAX_WIN_SIZE], den[NR_MAX_WIN_SIZE];
	FeReturnCode status;
	
	/* shift down one frame in the first buffer and insert a new frame */
	for(i=0;i<3*m_shiftSize;i++) m_buf_in[i]=m_buf_in[i+m_shiftSize];
	for(i=0;i<m_shiftSize;i++) m_buf_in[3*m_shiftSize+i]=(float)m_inputSpeech[(startX+i)%NR_BUF_SIZE];
	for(i=0;i<m_winSize;i++) si[i]=m_buf_in[i];
	
	if(m_isWiener==1){
		status=OneFrameWiener(si, so);
	}
	else{
		status=OneFrameSS(si, so);
	}
	if(status==FE_NULL){
		m_localFrameX++;
		return FE_NULL;
	}
	
	if(m_isWiener==0){
		/* overlap and save */
		for(i=0;i<3*m_shiftSize;i++) m_buf_out[i]=m_buf_out[i+m_shiftSize];
		for(i=0;i<m_shiftSize;i++) m_buf_out[3*m_shiftSize+i]=0;
		for(i=0;i<m_winSize;i++) m_buf_out[i]+=so[i];
		
		/* get the result */
		for(i=0;i<m_shiftSize;i++) den[i]=m_buf_out[i]/m_scaleFactor;
	}
	else{
		for(i=0;i<m_shiftSize;i++) den[i]=so[i];
	}
	
	/* save the denoised speech at the ring buffer */
	for(i=0;i<m_shiftSize;i++){
		m_outSpeech[(startX+i)%NR_OUT_BUF_SIZE]=den[i];
	}
#ifdef _DEBUG
	/* save the denoised speech at the ring buffer */
	validX=startX-2*m_shiftSize;
	for(i=0;i<m_shiftSize;i++){
		m_denSpeech[(validX+i)%NR_BUF_SIZE]=(short)(den[i]);
	}
#endif
	
	/* NR has 5 frame delay due to one buffer and extraFrameN delay to make one frame for feature extraction */
	if(m_localFrameX<5+extraFrameN){
		m_localFrameX++;
		return FE_NULL;
	}
	
	validX=startX-extraFrameN*m_shiftSize-((m_isWiener==1) ? 2*m_shiftSize : 0);
	for(i=0;i<m_winSize;i++) {
		out[i]=m_outSpeech[(validX+i)%NR_OUT_BUF_SIZE];
	}
	
#ifdef _DEBUG
	m_denEndX += m_shiftSize;
#endif
	m_localFrameX++;
	return FE_SPEECH;			
	
}


FeReturnCode Wiener::OneFrameWiener(float *si, float *out)
{
	VADNest(m_localFrameX, si);
	EstimateSpectrum(si, m_spec, m_spec_re, m_spec_im, 1);
	ComputeMeanPSD(m_spec, m_lastSpectrum, m_lastSpectrum2, m_flagVADNest, m_sqrtInPSD);
	if(m_localFrameX<2)	{
		int i;
		for(i=0;i<m_winSize;i++) out[i]=si[i];
		return FE_NULL;
	}
	DesignWiener(m_localFrameX, m_flagVADNest, m_spec, m_sqrtInPSD, m_sqrtNoisePSD, m_sqrtDen3PSD, m_wienerFilter);
	if(m_isWiener==0){	
		ApplyFilter(m_spec_re, m_spec_im, m_wienerFilter, out);
	}
	else{
		MelFilterBank(m_wienerFilter, m_H2mel);
		MelIDCT(m_H2mel, m_hWFmirr);
		assert(m_shiftSize-m_bufStartX > (NR_FL-1)/2);
		ApplyWiener(si+m_shiftSize-m_bufStartX, m_hWFmirr, m_hWFw, out);
	}
	return FE_SPEECH;
}


FeReturnCode Wiener::OneFrameSS(float *si, float *out)
{
	EstimateSpectrum(si, m_spec, m_spec_re, m_spec_im, 0);
	ComputeMeanPSD(m_spec, m_lastSpectrum, m_lastSpectrum2, m_flagVADNest, m_sqrtInPSD);
	if(m_localFrameX<2)	{
		int i;
		for(i=0;i<m_winSize;i++) out[i]=si[i];
		return FE_NULL;
	}
	DesignSpecsub(m_localFrameX, m_flagVADNest, m_spec, m_sqrtInPSD, m_sqrtNoisePSD, m_sqrtDen3PSD, m_ssFilter);
	ApplyFilter(m_spec_re, m_spec_im, m_ssFilter, out);
	return FE_SPEECH;
}


void Wiener::InitHanning (float *win, int len)
{
    int i;
    for (i = 0; i < len; i++){
		win[i] = (float)(0.5 - 0.5 * cos (2 * M_PI * (i + 0.5)/len));
	}
}


void Wiener::EstimateSpectrum(float *s, float *spectrum, float *re, float *im, int subSample)
{
	int i, n, m;
	float x[NR_MAX_WIN_SIZE];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -