📄 fe_enhance.cpp
字号:
/* apply Hanning window */
for(n=0;n<m_winSize;n++){
re[n] = s[n]*m_HanningWin[n];
im[n] = 0;
}
/* pad zeros */
for(n=m_winSize;n<m_fftSize;n++){
re[n]=0;
im[n]=0;
}
/* do FFT */
m = (int) (log10 (m_fftSize) / log10 (2));
PRFFT_NEW(re, im, m, m_fftSize, 1);
/* take power spectrum, P(bin)=|X(bin)|^2 */
x[0] = re[0] * re[0]; /* DC */
for (n = 1; n < m_fftSize / 2; n++) {
x[n] = re[n] * re[n] + im[n] * im[n];
}
x[m_fftSize / 2] = re[m_fftSize / 2] * re[m_fftSize / 2]; /* pi/2 */
if(subSample==1){
/* smooth and subsample spectrum */
assert(m_specLength==m_fftSize/4+1);
for(i=0;i<m_fftSize/4;i++){
spectrum[i]=(x[2*i]+x[2*i+1])/2;
}
spectrum[m_fftSize/4]=x[m_fftSize/2];
}
else{
for(i=0;i<m_fftSize/2+1;i++){
spectrum[i]=x[i];
}
}
}
void Wiener::ComputeMeanPSD(float *spectrum, float *lastSpectrum, float *lastSpectrum2, int flagVADNest, float *sqrtInPSD)
{
int i;
for(i=0;i<m_specLength;i++){
sqrtInPSD[i]=(float)sqrt((double)(lastSpectrum2[i]+lastSpectrum[i]+spectrum[i])/(double)3.0);
lastSpectrum2[i]=lastSpectrum[i];
lastSpectrum[i]=spectrum[i];
}
}
void Wiener::DesignWiener(int t, int flagVADNest, const float *in, const float *sqrtInPSD, float *sqrtNoisePSD, float *sqrtDen3PSD, float *filter)
{
int i;
float lambdaNSE;
float sqrtDenPSD[NR_MAX_SPEC_LENGTH], sqrtDen2PSD[NR_MAX_SPEC_LENGTH];
float sqrtEta[NR_MAX_SPEC_LENGTH], sqrtEta2[NR_MAX_SPEC_LENGTH], h[NR_MAX_SPEC_LENGTH];
/* set the forgetting factor */
if(t<NR_NB_FRAME_THRESHOLD_NSE)
lambdaNSE=1-1/(float)(m_nbFrameX);
else
lambdaNSE=(float)NR_LAMBDA_NSE;
if(t>=2) m_nbFrameX++;
if(flagVADNest==0){ /* this is the result of the previous frame */
for(i=0;i<m_specLength;i++){
/* Update. Pnoise(tn)^0.5 = max(a * Pnoise(tn-1)^0.5 + (1-a) * Pin_PSD(tn)^0.5, EPS) */
sqrtNoisePSD[i]=(float)my_max(lambdaNSE*sqrtNoisePSD[i]+(1-lambdaNSE)*sqrtInPSD[i], NR_EPS);
}
}
else{
/* No change. Pnoise(t)^0.5 = Pnoise(tn)^0.5 */
}
if(t>=2){
/* Be careful about sqrt and square! */
for(i=0;i<m_specLength;i++){
sqrtDenPSD[i]=(float)(NR_BETA*sqrtDen3PSD[i]+(1-NR_BETA)*NR_T(sqrtInPSD[i]-sqrtNoisePSD[i]));
sqrtEta[i]=sqrtDenPSD[i]/sqrtNoisePSD[i];
h[i]=sqrtEta[i]/(1+sqrtEta[i]);
sqrtDen2PSD[i]=h[i]*(sqrtInPSD[i]);
sqrtEta2[i]=my_max(sqrtDen2PSD[i]/sqrtNoisePSD[i], (float)NR_ETA_TH);
filter[i]=sqrtEta2[i]/(1+sqrtEta2[i]);
sqrtDen3PSD[i]=(float)(filter[i]*sqrt(in[i]));
}
}
}
void Wiener::DesignSpecsub(int t, int flagVADNest, const float *in, const float *sqrtInPSD, float *sqrtNoisePSD, float *sqrtDen3PSD, float *filter)
{
int i;
float lambdaNSE;
float s, n;
float globalFac;
/* set the forgetting factor */
if(t<NR_NB_FRAME_THRESHOLD_NSE)
lambdaNSE=1-1/(float)(m_nbFrameX);
else
lambdaNSE=(float)NR_LAMBDA_NSE;
if(t>=2) m_nbFrameX++;
if(flagVADNest==0 && t<NR_NB_FRAME_THRESHOLD_NSE){ /* this is the result of the previous frame */
for(i=0;i<m_specLength;i++){
sqrtNoisePSD[i]=(float)sqrt(my_max(lambdaNSE*NR_SQ(sqrtNoisePSD[i])
+(1-lambdaNSE)*(in[i]-NR_SQ(sqrtDen3PSD[i])), NR_EPS*NR_EPS));
}
}
else{
/* No change. Pnoise(t)^0.5 = Pnoise(tn)^0.5 */
}
if(t<2) return;
/* compute filter with over-subtraction */
s=0, n=0;
for(i=0;i<m_specLength;i++){
s+=NR_SQ(sqrtInPSD[i]); n+=NR_SQ(sqrtNoisePSD[i]);
}
s/=m_specLength; n/=m_specLength;
globalFac=my_max(n/s,(float)NR_SS_TH);
for(i=0;i<m_specLength;i++){
float instSNR = NR_SQ(sqrtNoisePSD[i])/(NR_SQ(sqrtNoisePSD[i])+NR_SQ(sqrtInPSD[i]));
float instSN = NR_SQ(sqrtNoisePSD[i])/NR_SQ(sqrtInPSD[i]+(float)1e-20);
filter[i] = (float)sqrt(my_max(1-(1+m_oversubFactor[i]*instSNR)*instSN, (float)NR_SS_TH*NR_SS_TH*instSN));
sqrtDen3PSD[i]=(float)(filter[i]*sqrt(in[i]));
}
return;
}
void Wiener::VADNest(int t, const float *s)
{
int i;
float lambdaLTE, lambdaLTEhigherE=(float)0.99;
float frameEn=0, mean=0;
int M=2*m_shiftSize; /* changed to get smooth estimate */
if(t<NR_NB_FRAME_THRESHOLD_LTE)
lambdaLTE=1-1/(float)(m_nbFrameVADNest+1);
else
lambdaLTE=(float)NR_LAMBDA_LTE;
for(i=0;i<M;i++) {
frameEn += s[i]*s[i];
mean += s[i];
}
frameEn = frameEn - mean*mean/M; /* owkwon: Use the AC energy only */
frameEn = frameEn*80/(float)M; /* owkwon: normalize to a value for 80 samples */
if(t>=2 && frameEn>0) { /* Start updating after only valid non-zero data */
m_nbFrameVADNest++;
}
frameEn = (float)(0.5+16/log(2)*log((64+frameEn)/64)); /* 16/log(2) ~ 23.08 */
if(frameEn<NR_ENERGY_FLOOR) frameEn=NR_ENERGY_FLOOR; /* to avoid the worst case of all-zero data */
if(m_nbFrameVADNest==1){
m_meanEn=frameEn; /* first non-zero frame */
}
if(frameEn-m_meanEn<NR_SNR_THRESHOLD_NO_UPD_LTE && (frameEn-m_meanEn < NR_SNR_THRESHOLD_UPD_LTE || t < NR_MIN_FRAME)){
if(frameEn<m_meanEn || t<NR_MIN_FRAME)
m_meanEn = m_meanEn + (1-lambdaLTE)*(frameEn-m_meanEn);
else
m_meanEn = m_meanEn + (1-lambdaLTEhigherE)*(frameEn-m_meanEn);
if(m_meanEn<NR_ENERGY_FLOOR) m_meanEn=NR_ENERGY_FLOOR;
}
if(m_nbFrameVADNest>0 && frameEn-m_meanEn>=NR_SNR_THRESHOLD_NO_UPD_LTE){ /* owkwon: */
m_flagVADNest=1;
m_nbSpeechFrame++;
}
else if(frameEn-m_meanEn > NR_SNR_THRESHOLD_VAD && t>=NR_MIN_FRAME){ /* owkwon: changed the inquality sign to '>' */
m_flagVADNest=1;
m_nbSpeechFrame++;
}
else{
if((m_nbSpeechFrame < NR_MIN_SPEECH_FRAME_HANGOVER) && (m_nbSpeechFrame >= 2)){ /* owkwon: added the condition '&& (m_nbSpeechFrame >= 2)' */
m_hangOver=NR_HANGOVER;
}
m_nbSpeechFrame=0;
if(m_hangOver != 0){
m_hangOver=m_hangOver-1;
m_flagVADNest=1;
}
else{
m_flagVADNest=0;
}
}
}
void Wiener::ApplyFilter(float *re, float *im, float *h, float *out)
{
int i;
re[0] *= h[0]; im[0] = 0;
for(i=1;i<m_fftSize/2;i++){
re[i] *= h[i]; im[i] *= h[i];
re[m_fftSize-i] = re[i]; im[m_fftSize-i] = -im[i];
}
re[m_fftSize/2] = 0;
int m = (int) (log10 (m_fftSize) / log10 (2));
PRFFT_NEW(re,im,m,m_fftSize,-1);
for(i=0;i<m_winSize;i++){
out[i]=re[i];
}
}
#ifdef _DEBUG
int Wiener::SaveInput(const char *fname, int offsetX)
{
int i, sampleN;
int endX=my_max(0,m_inputEndX-offsetX*m_shiftSize);
FILE *fp=fopen(fname,"wb");
if(fp==0){
assert(0);
}
sampleN=my_min(NR_BUF_SIZE,endX);
for(i=endX-sampleN;i<endX;i++){
fwrite((char*)(&m_inputSpeech[i%NR_BUF_SIZE]), sizeof(short), 1, fp);
}
fclose(fp);
return 1;
}
int Wiener::SaveDenoised(const char *fname, int offsetX)
{
int i, sampleN;
FILE *fp=fopen(fname,"wb");
if(fp==0){
assert(0);
}
int endX=m_inputEndX-offsetX*m_shiftSize;
sampleN=my_min(NR_BUF_SIZE,endX);
for(i=endX-sampleN;i<endX;i++){
short s=m_denSpeech[i%NR_BUF_SIZE];
fwrite((char*)&s, sizeof(short), 1, fp);
}
fclose(fp);
return 1;
}
#endif
void Wiener::MelFilterBank(float *h2, float *h2mel)
{
int i,k;
for(k=0;k<=m_NumChannels+1;k++){
WfMelFB *melFB=&m_MelFB[k];
float sum = h2[melFB->m_centerX]; /* The center weight is always 1 */
for(i=melFB->m_lowX;i<melFB->m_centerX;i++){
sum += m_MelWeight[i]*h2[i];
}
for(i=melFB->m_centerX+1;i<=melFB->m_highX;i++){
sum += (1-m_MelWeight[i])*h2[i];
}
h2mel[k]=sum/melFB->m_sumWeight;
}
}
void Wiener::MelIDCT(float *h2mel, float *hWFmirr)
{
int n,k;
float hWF[NR_NUM_CHANNELS+2];
for(n=0;n<=NR_NUM_CHANNELS+1;n++){
float sum=0;
for(k=0;k<=NR_NUM_CHANNELS+1;k++)
sum += h2mel[k]*m_melIdctMatrix[k*(NR_NUM_CHANNELS+2)+n];
hWF[n]=sum;
}
for(n=0;n<=NR_NUM_CHANNELS+1;n++)
hWFmirr[n]=hWF[n];
for(n=NR_NUM_CHANNELS+2;n<=2*(NR_NUM_CHANNELS+1);n++)
hWFmirr[n]=hWF[2*(NR_NUM_CHANNELS+1)+1-n];
}
void Wiener::ApplyWiener(float *s, float *hWFmirr, float *hWFw, float *out)
{
int i,n;
float hWFcaus[2*(NR_NUM_CHANNELS+1)+1];
float hWFtrunc[NR_FL];
int FL2=(NR_FL-1)/2;
float fac=0;
for(n=0;n<=NR_NUM_CHANNELS;n++)
hWFcaus[n]=hWFmirr[n+NR_NUM_CHANNELS+1];
for(n=NR_NUM_CHANNELS+1;n<=2*(NR_NUM_CHANNELS+1);n++)
hWFcaus[n]=hWFmirr[n-NR_NUM_CHANNELS-1];
for(n=0;n<=NR_FL-1;n++)
hWFtrunc[n]=hWFcaus[n+NR_NUM_CHANNELS+1-FL2];
for(n=0;n<=NR_FL-1;n++){
hWFw[n]=m_HanningWin2[n]*hWFtrunc[n];
fac += m_HanningWin2[n];
}
for(n=0;n<m_shiftSize;n++){
float sum=0;
for(i=-FL2; i<=FL2; i++) sum += hWFw[i+FL2]*s[n-i];
out[n]=sum;
}
}
/*---------------------------------------------------------------------------
Initialize data structure for FFT windows (mel filter bank).
Computes start (bin[k-1]), center (bin[k]) and end (bin[k+1]) points
of each mel filter to FFT index.
Input:
m_sampleRate Sampling frequency
m_FFTLength FFT length
m_NumChannels Number of channels
Output:
Initialized weights and mel filters
*---------------------------------------------------------------------------*/
void Wiener::InitMelFilterBanks (float startingFrequency, float samplingRate, int fftLength, int numChannels)
{
int i, k;
float freq[NR_NUM_CHANNELS+2];
int bin[NR_NUM_CHANNELS+2];
float start_mel, fs_per_2_mel;
/* Constants for calculation */
freq[0] = startingFrequency;
start_mel = (float)(2595.0 * log10 (1.0 + startingFrequency / 700.0));
bin[0] = Round(fftLength * freq[0] / samplingRate);
freq[numChannels+1] = (float)(samplingRate / 2.0);
fs_per_2_mel = (float)(2595.0 * log10 (1.0 + (samplingRate / 2.0) / 700.0));
bin[numChannels+1] = Round(fftLength * freq[numChannels+1] / samplingRate);
/* Calculating mel-scaled frequency and the corresponding FFT-bin */
/* number for the lower edge of the band */
for (k = 1; k <= numChannels; k++) {
freq[k] = (float)(700 * (pow (10, (start_mel + (float) k / (numChannels + 1) * (fs_per_2_mel - start_mel)) / 2595.0) - 1.0));
bin[k] = Round(fftLength * freq[k] / samplingRate);
}
/* This part is never used to compute MFCC coefficients */
/* but initialized for completeness */
for(i = 0; i<bin[0]; i++){
m_MelWeight[i]=0;
}
m_MelWeight[fftLength/2]=1;
/* Initialize low, center, high indices to FFT-bin */
m_MelFB[0].m_lowX=0;
m_MelFB[0].m_centerX=bin[0];
m_MelFB[0].m_highX=bin[1]-1;
for (k = 1; k <= numChannels; k++) {
m_MelFB[k].m_lowX=bin[k-1]+1;
m_MelFB[k].m_centerX=bin[k];
m_MelFB[k].m_highX=bin[k+1]-1;
}
m_MelFB[numChannels+1].m_lowX=bin[numChannels]+1;
m_MelFB[numChannels+1].m_centerX=bin[numChannels+1];
m_MelFB[numChannels+1].m_highX=bin[numChannels+1];
for (k = 1; k <= numChannels+1; k++) {
for(i = bin[k-1]; i<bin[k]; i++){
m_MelWeight[i]=(i-bin[k-1])/(float)(bin[k]-bin[k-1]);
}
}
m_MelWeight[bin[numChannels+1]]=0;
m_MelFB[0].m_sumWeight=(bin[1]-bin[0]+1)/(float)2;
for (k=1;k<=numChannels;k++){
m_MelFB[k].m_sumWeight=(bin[k+1]-bin[k-1])/(float)2;
}
m_MelFB[numChannels+1].m_sumWeight=(bin[numChannels+1]-bin[numChannels]+1)/(float)2;
return;
}
/*---------------------------------------------------------------------------
Initializes matrix for IDCT computation (IDCT is implemented
as matrix-vector multiplication). The DCT matrix is of size
(numChannels+2)-by-(numChannels+2).
*---------------------------------------------------------------------------*/
int Wiener::InitMelIDCTMatrix (float *idctMatrix, int numChannels)
{
int k, n;
float center[NR_NUM_CHANNELS+2], df[NR_NUM_CHANNELS+2];
float fac=m_sampleRate/(float)(2*(m_specLength-1));
center[0]=0;
center[NR_NUM_CHANNELS+1]=m_sampleRate/(float)2;
for(k=1;k<=NR_NUM_CHANNELS;k++){
int i;
WfMelFB *melFB=&m_MelFB[k];
float sum = (float)melFB->m_centerX; /* The center weight is always 1 */
for(i=melFB->m_lowX;i<melFB->m_centerX;i++) sum += m_MelWeight[i]*i;
for(i=melFB->m_centerX+1;i<=melFB->m_highX;i++) sum += (1-m_MelWeight[i])*i;
center[k]=sum*fac/melFB->m_sumWeight; /* owkwon: Use normalized W(k,i) */
}
df[0]=(center[1]-center[0])/(float)m_sampleRate;
for(k=1;k<=NR_NUM_CHANNELS;k++){
df[k]=(center[k+1]-center[k-1])/(float)m_sampleRate;
}
df[NR_NUM_CHANNELS+1]=(center[NR_NUM_CHANNELS+1]-center[NR_NUM_CHANNELS])/(float)m_sampleRate;
for (k = 0; k < numChannels+2; k++){
for (n = 0; n < numChannels+2; n++)
idctMatrix[k*(numChannels+2)+n] = (float) cos ((2*M_PI*n*center[k])/(float)m_sampleRate) * df[k];
}
return 1;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -