📄 se2_zh.m
字号:
%%% @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ %%%
% This algorithm is used to enhance the noisy speech especially poluted by broadband white noise.
%
% Last modified Feb 2004
%
% Southeast University
%%% @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ %%%
clear all;
[y1,fs,bits]=wavread('E:\noise enhancing\wav\5.wav'); % input clean speech
y1=y1/max(abs(y1));
%wavwrite(y1,8000,8,'F:\noise enhancing\voise\5.wav');
figure(1);
plot(y1);
[noise,fs1,bits1]=wavread('E:\noise enhancing\wav\5_noise.wav'); % input noise
y=mixsig(y1,noise,10); % mix clean speech with noise according to definite SNR
y=y/max(abs(y));%归一化
% wavwrite(y,8000,8,'F:\noise enhancing\wav\mymasking_s&w(-5).wav');
figure(2);
plot(y);
%% [y,fs,bits]=wavread('F:\noise enhancing\noise\denoise\出租车隧道.wav');
frame = 256; % Defining frame size
shift=64;
win=hamming(256);
for j1 = 1:length(y),
signal(j1) = y(j1);
end;
ps_noise=zeros(length(signal)/shift,frame);
frame_temp = zeros(length(signal)/frame,frame);
%%%% estimation of noise energy using the first five frames %%%%
hh = 0;
for k = 1 : 5,
for l = 1 : frame,
b(l) = signal(hh+l);
end;
hh = hh + frame;
frame_temp(k,1:frame) = abs(fft(b)); %fft for the first 5 frames
ps_noise(k,1:frame) = (frame_temp(k,1:frame).*conj(frame_temp(k,1:frame)))/frame;
end;
ps_noise(1,1:frame)= (sum(ps_noise(1:k,1:frame))/5);
frame_temp_initial(1:frame)=sum(frame_temp(k,1:frame))/k;
%%%%%%% START OF THE NOISE ELIMINATION THROUGH SPECTRAL SUBTRACTION BASED ON THE THRESHOLD SET %%%%%%%%%
head = 0;
for k = 1 :( length(signal)/shift-3),
for m = 1 : frame,
abc1(m) = signal(head+m);
end;
abc1=abc1.*win';
head = head +shift;
frame_temp(k,1:frame) = abs(fft(abc1));% FFT OF THE SIGNAL + NOISE FRAME BY FRAME
frame_angle(k,1:frame) = angle(fft(abc1));% ANGLE OF FFT OF THE SIGNAL + NOISE FRAME BY FRAME
ps_signal(k,1:frame) = (frame_temp(k,1:frame).*conj(frame_temp(k,1:frame)))./frame;
ps_temp(k,1:frame)=ps_signal(k,1:frame);
%%%%%%%% end detection: ceptrum distance coefficient %%%%%%%%%%
if k==1
framenoise_temp(k,1:frame)=frame_temp_initial(1:frame);
else framenoise_temp(k,1:frame)=framenoise_temp(k-1,1:frame);
end
dd(k)=sum(ifft(log(abs(framenoise_temp(k,1:frame)))))/frame; %% noise ceptrum
ddi=sum(ifft(log(abs(framenoise_temp(1,1:frame)))))/frame;
d(k)=sum(ps_signal(k,1:frame).*exp(-j*2*pi*(1:frame)*k/frame));%% signal ceptrum
di=sum(ps_signal(k,1:frame));
if k>1
dd(k)=0.8*dd(k)+0.2*d(k-1);
end
cep_disp(k)=4.3429*sqrt((di-ddi).^2+2.*((d(k)-dd(k)).^2));%% ceptrum distance
if k>1
cep_disp(k)=0.8*cep_disp(k)+0.2*cep_disp(k-1); % flat
end
if cep_disp(k)<5
if k>1
ps_noise(k,1:frame)=0.9*ps_noise(k-1,1:frame)+0.1*ps_signal(k,1:frame);
framenoise_temp(k,1:frame)=0.9*framenoise_temp(k-1,1:frame)+0.1*frame_temp(k,1:frame);
end
end
frame_ps(1,k) = (sum(ps_signal(k,1:frame)));
frame_pn(1,k)=sum(ps_noise(k,1:frame));
ps_final(1,k) = frame_ps(1,k)- 0.8*frame_pn(1,k);
%%% Wiener filtering to estimate power spectrum of signal
aa=0.7;bb=2;
h(1,k)=power(ps_final(1,k)/(ps_final(1,k)+aa*frame_pn(1,k)),bb);
frame1(k,1:frame) = h(1,k).*(frame_temp(k,1:frame));
ps_signal(k,1:frame) = (frame1(k,1:frame).*conj(frame1(k,1:frame)))./frame;
%%%%%%%%%%%%%%%%%% calculate masking value %%%%%%%%%%%%%%%%%%%%
T=zeros(1,129);
b=zeros(1,18);c=zeros(1,18);o=zeros(1,18);
sf=zeros(18,18);
%%% calculate power spectrum of every bark band
for i1=1:3,
b(1)=b(1)+ps_signal(k,i1);
end;
for i1=4:6,
b(2)=b(2)+ps_signal(k,i1);
end;
for i1=7:10,
b(3)=b(3)+ps_signal(k,i1);
end;
for i1=11:13,
b(4)=b(4)+ps_signal(k,i1);
end;
for i1=14:16,
b(5)=b(5)+ps_signal(k,i1);
end;
for i1=17:20,
b(6)=b(6)+ps_signal(k,i1);
end;
for i1=21:25,
b(7)=b(7)+ps_signal(k,i1);
end;
for i1=26:29,
b(8)=b(8)+ps_signal(k,i1);
end;
for i1=30:35,
b(9)=b(9)+ps_signal(k,i1);
end;
for i1=36:41,
b(10)=b(10)+ps_signal(k,i1);
end;
for i1=42:47,
b(11)=b(11)+ps_signal(k,i1);
end;
for i1=48:55,
b(12)=b(12)+ps_signal(k,i1);
end;
for i1=56:64,
b(13)=b(13)+ps_signal(k,i1);
end;
for i1=65:74,
b(14)=b(14)+ps_signal(k,i1);
end;
for i1=75:86,
b(15)=b(15)+ps_signal(k,i1);
end;
for i1=87:101,
b(16)=b(16)+ps_signal(k,i1);
end;
for i1=102:118,
b(17)=b(17)+ps_signal(k,i1);
end;
for i1=119:129,
b(18)=b(18)+ps_signal(k,i1);
end;
% for i1=142:170,
% b(19)=b(19)+ps_signal(k,i1);
% end;
% for i1=171:205,
% b(20)=b(20)+ps_signal(k,i1);
% end;
% for i1=206:246,
% b(21)=b(21)+ps_signal(k,i1);
% end;
% for i1=247:256,
% b(22)=b(22)+ps_signal(k,i1);
% end;
%%% calculate the spread function
for i1=1:18,
sf(i1)=15.81+7.5*(i1+0.474)-17.5*sqrt(1+(i1+0.474)*(i1+0.474));
end;
%%% apply the spread function to the critical band spectrum
% for j1=1:22,
% c(j1)=0;
% for i1=1:22,
% c(j1)=c(j1)+b(i1)*sf(i1,j1); %$ (19)
% end;
% end;
cc_temp=conv2(b,sf);
for i1=1:18
for j1=1:22
c(i1)=c(i1)+sum(cc_temp(i1,j1));
end;
end;
%%% calculate the spread masking threshold
temp_value=0.0;
for i1=1:18,
temp_value=temp_value+b(i1);
end;
ua=temp_value/256.0;
temp_value=0;
for i1=1:129,
temp_value=temp_value+log10(ps_signal(k,i1));
end;
temp_value=temp_value/129.0;
uj=power(10,temp_value);
sfm=-10*log10(uj/ua); %% spectrual flatness measurement
u=min(sfm/(-60),1); %% tonality of signal
for i1=1:18,
O(i1)=u*(14.5+i1)+(1-u)*5.5;
T(i1)=power(10,log10(c(i1))-O(i1)/10);
c(i1)=T(i1);%c[i]暂存T[i]
end;
for i1=1:3,
T(i1)=c(1);
end;
for i1=4:6,
T(i1)=c(2);
end;
for i1=7:10,
T(i1)=c(3);
end;
for i1=11:13,
T(i1)=c(4);
end;
for i1=14:16,
T(i1)=c(5);
end;
for i1=17:20,
T(i1)=c(6);
end;
for i1=21:25,
T(i1)=c(7);
end;
for i1=26:29,
T(i1)=c(8);
end;
for i1=30:35,
T(i1)=c(9);
end;
for i1=36:41,
T(i1)=c(10);
end;
for i1=42:47,
T(i1)=c(11);
end;
for i1=48:55,
T(i1)=c(12);
end;
for i1=56:64,
T(i1)=c(13);
end;
for i1=65:74,
T(i1)=c(14);
end;
for i1=75:86,
T(i1)=c(15);
end;
for i1=87:101,
T(i1)=c(16);
end;
for i1=102:118,
T(i1)=c(17);
end;
for i1=119:129,
T(i1)=c(18);
end;
% for i1=142:170,
% T(i1)=c(19);
% end;
% for i1=171:205,
% T(i1)=c(20);
% end;
% for i1=206:246
% T(i1)=c(21);
% end;
% for i1=247:256,
% T(i1)=c(22);
% end;
%%% calculate the absolute threshold
mm=0.0;
for i1=1:129,
f(i1)=mm;
mm=mm+8/129;
end;
f(1)=f(2);
for i1=1:128,
f(i1)=3.64*(f(i1).^(-0.8))- 6.5*exp(-0.6*((f(i1)-3.3).^2))+0.001*(f(i1).^4);
end;
for i1=1:129,
T(i1)=max(T(i1),f(i1));
end;
tmax=0;
tmin=0;
for i1=1:129,
if(T(i1)>tmax)
tmax=T(i1);
end;
if(T(i1)<tmin)
tmin=T(i1);
end;
end;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for k1=1:129,
alfa(k1)=(tmax*6-T(k1)*6+1*T(k1)-1*tmin)/(tmax-tmin);
beita(k1)=(tmax*0.02-T(k1)*0.02+0*T(k1)-0*tmin)/(tmax-tmin);
temppow=abs(ps_noise(k,k1))/abs(ps_temp(k,k1));
if(power(temppow,2)<(1.0/(alfa(k1)+beita(k1))))
frame1( k,k1)=power((1-alfa(k1)*power(temppow,2)),0.5)*ps_temp(k,k1);
else
frame1(k,k1)=power((beita(k1)*power(temppow,2)),0.5)*ps_temp(k,k1);
end;
end;
for i1=130:256
frame1(k,i1)=conj(frame1(k,258-i1));
end
%%% plus phase to the signal
frame1(k,1:frame) = frame1(k,1:frame).*(exp(i*frame_angle(k,1:frame)));
%%% back to time domain
frame2(k,1:frame)=ifft(frame1(k,1:frame));
%%% Retriving back the signal
if k==1
signal(1,1:shift)=frame2(k,1:shift);
else if k==2
signal(1,(shift+1):(2*shift))=(frame2(k,1:shift)+frame2(k-1,(shift+1):(shift*2)))/2;
else if k==3
signal(1,(shift*2+1):(3*shift))=(frame2(k,1:shift)+frame2(k-1,(shift+1):(shift*2))+frame2(k-2,(shift*2+1):(shift*3)))/3;
else signal(1,(((k-1)*shift+1):(k*shift)))=(frame2(k,1:shift)+frame2(k-1,(shift+1):(shift*2))+frame2(k-2,(shift*2+1):(shift*3))+frame2(k-3,(shift*3+1):frame))/4;
end;
end;
end;
end
signal((length(y)-1000):(length(y)))=[]; %give up 4 frames in the end
y1((length(y1)-1000):(length(y1)))=[];
figure(3);
signal=signal';
signal=signal/max(abs(signal));
plot(1:length(signal),signal);
overall_snr = 10*log10(sum(abs(y1).^2)/sum((abs(y1-signal)).^2)) %%% estimate the SNR after processing
wavwrite(signal,8000,8,'E:\noise enhancing\wav\result2(10).wav');
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -