📄 tito_speech.m
字号:
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TITO_Speech.m
% This program gives an example of the mixed speech signal example.
% Two source speech were mixed by a Two Input Two Output dynamic channel
% Frequency domain method was used to estimate the corresponding channel
% response. After that, wiener filter is used to recover the source
% speech signal.
%
% Reference:
% K. Diamantaras, A. Petropulu and B. Chen, "Blind Two-Input-Two-Output
% FIR Channel Identification Based on Second-Order Statistics,"
% IEEE Transactions on Signal Processing, vol. 48, No. 2, pp. 534-542,
% Feb. 2000.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
thres=0.1
i=sqrt(-1);
n = 2,
N = 1024*4
L=5
NF = 128,
window = kaiser(NF,8);
winmat = window*window';
%Fourier matrix
F = exp(-i*2*pi*(0:NF-1)'*(0:NF-1)/NF);
% Load the voice signal from file
s(1,:)= wavread('dog4.wav')';
s(2,:)= wavread('greet4.wav')';
s = s-(mean(s'))'*ones(1,N);
s(1,:) = s(1,:)/std(s(1,:));
s(2,:) = s(2,:)/std(s(2,:));
h = zeros(L,n,n); %Filter taps
h(:,1,1) = [1, zeros(1,L-1)];
h(:,2,2) = [1, zeros(1,L-1)];
%Cross Channel Response
h(:,2,1)=[1.0000 0.615 0.5 0.25 0.43];
h(:,1,2)= [1.0000 -0.6020 -0.1748 0.2082 -0.4049];
%
%Multichannel mixing
%===================
x = zeros(size(s)); %Observed mixture
%H(frequency, signalIndex, signalIndex)
for (ii=1:n)
for (jj=1:n)
x(ii,:) = x(ii,:) + filter(h(:,ii,jj),1,s(jj,:));
H(:,ii,jj) = fft(h(:,ii,jj), NF);
end
end
%
% rs(t) estimation (t = time lag, rs = diagonal matrix)
%Since rs = diagonal we keep only the diagonal part in memory
%===================================================================
rs = zeros(n,NF);
for (lag=0:NF-1)
for (ii=1:n)
rs(ii,lag+1) = s(ii,1:N-lag)*s(ii,lag+1:N)'/(N-lag);
end
end
%
%Rs(w) estimation (w = frequency, Rs = matrix)
%=================================================
Rs = zeros(NF,n); % = Rs(w,w)
Rs1 = zeros(NF/2+1,n); % = Rs(w,w+1)
for (ii=1:n)
Ts = toeplitz(rs(ii,:));
Ts = winmat.*Ts;
for (w=1:NF)
Rs(w,ii) = real(F(w,:)*Ts*F(w,:)');
end
for (w=1:NF/2+1)
Rs1(w,ii) = F(w,:)*Ts*F(w+1,:)';
end
end
%
%rx(t) estimation (t = time lag, rx = matrix)
%=================================================
rx = zeros(2*NF-1,n,n);
for (ii=1:n)
for (jj=1:n)
for (lag=-NF+1:-1)
rx(lag+NF,ii,jj) = x(ii,-lag+1:N)*x(jj,1:N+lag)'/(N+lag);
end
for (lag=0:NF-1)
rx(lag+NF,ii,jj) = x(ii,1:N-lag)*x(jj,lag+1:N)'/(N-lag);
end
end
end
%
%Rx(w) estimation (w = frequency, Rx = matrix)
%=================================================
Rx = zeros(n,n,NF); % = Rx(w,w)
rx1 = zeros(n,n,NF/2+1); % = Rx(w,w+1)
for (ii=1:n)
for (jj=1:n)
RR = toeplitz(rx(NF:-1:1,ii,jj),rx(NF:2*NF-1,ii,jj));
RR = winmat.*RR;
for (w=1:NF)
if (jj==ii)
Rx(ii,jj,w) = real(F(w,:)*RR*F(w,:)');
else
Rx(ii,jj,w) = F(w,:)*RR*F(w,:)';
end
end
for (w=1:NF/2+1)
Rx1(ii,jj,w) = F(w,:)*RR*F(w+1,:)';
end
end
end
%
%Prewhitening
%==============
V = zeros(n,n,NF); %V(signalIndex, signalIndex, frequency)
for (w=1:NF)
V(:,:,w) = inv(sqrtm(Rx(:,:,w)));
end
%
% Compute perfect W = V*H*Rs^{1/2}
%===================================
Wperf = zeros(n,n,NF); %Wperf = V(w)*H(w)*Rs(w)^{1/2}
M = zeros(n,n); %Temporary matrix
for (w=1:NF)
for (ii=1:n)
for (jj=1:n)
M(ii,jj) = H(w,ii,jj);
end
end
Wperf(:,:,w) = V(:,:,w) * M * sqrtm(diag(Rs(w,:)));
end
%
% Compute Ry(w,w+1), Ry(w,w+2) (w = frequency, Ry = matrix)
%============================================================
Ry1 = zeros(n,n,NF/2+1);
for (w=1:NF/2+1)
Ry1(:,:,w) = V(:,:,w)*Rx1(:,:,w)*V(:,:,w+1)';
end
%
% Computing W = svd(Ry1*Ry1')
%===================================
III=[];
W = zeros(n,n,NF/2+1); %W(signalIndex, signalIndex, frequency)
for (w=1:NF/2+1)
[W(:,:,w),Sdummy,Vdummy] = svd(Ry1(:,:,w)*Ry1(:,:,w)');
S(:,w) = diag(Sdummy);
if min(abs(diff(S(:,w)))) >thres*max(S(:,w));
III=[III w];
end
end
%
% Estimate H
%================================
Hest = zeros(n,n,NF/2+1);
for (w=1:NF/2+1)
M = inv(V(:,:,w)) * W(:,:,w);
Hest(:,:,w) = M * inv(diag(diag(M)));
end
%
% Test ratio H_{21}(w), H_{12}(w)
%==================================
ratest = zeros(1,NF/2+1);
rat = zeros(1,NF/2+1);
for (w=1:NF/2+1)
rat(w) = H(w,2,1) / H(w,1,2);
ratest(w) = Hest(2,1,w) / Hest(1,2,w);
rat2(w) = H(w,2,1) +1./ H(w,1,2);
rat2est(w) = Hest(2,1,w) +1./ Hest(1,2,w);
end
RATIO=ratest;
h21=h(:,2,1)'; H21=fft(h21,NF);
h12=h(:,1,2)'; H12=fft(h12,NF);
RATIO=[RATIO fliplr(conj(RATIO(2:NF/2)))];
Hr=RATIO;
look=conv(h21,fliplr(h12));
look=look/max(abs(look))
clf; LOOK=fft(look,NF);
tmp=phase(Hr);
NN=2*length(h12)-1;
[look_rec ls_err1]=rec_fromphase2(tmp,NF,NN,III,look);
plot(look,'m'); hold on; stem(look,'m');
plot([look_rec],'g')
LOOK_REC=fft(look_rec,NF);
M12=sqrt(abs(LOOK_REC)./abs(Hr));
M21=M12.*abs(Hr);
NN=length(h21);
H21R=[]; H12R=[];
for w=1:NF/2+1
ALT1=0.5*(rat2est(w)+sqrt(rat2est(w)^2-4*ratest(w)));
ALT2=0.5*(rat2est(w)-sqrt(rat2est(w)^2-4*ratest(w)));
if abs(abs(ALT1)-M21(w)) < abs(abs(ALT2)-M21(w))
H21R(w)=ALT1;
else
H21R(w)=ALT2;
end
end
H12R=H21R./ratest;
H12R=[H12R fliplr(conj(H12R(2:NF/2)))];
H21R=[H21R fliplr(conj(H21R(2:NF/2)))];
g12=real(ifft(H12R));g12=g12(1:NN);
g21=real(ifft(H21R));g21=g21(1:NN);
figure(1); clf
subplot(211),hold on;grid;
title('h12 True:Blue; Estimation:Red');
plot(h12(1:NN)/max(abs(h12)),'*')
plot(h12(1:NN)/max(abs(h12)))
plot(g12/max(abs(g12)),'r');
subplot(212),hold on;grid;
title('h21 True:Blue; Estimation:Red');
plot(h21(1:NN)/max(abs(h21)),'*')
plot(h21(1:NN)/max(abs(h21)));
plot(g21/max(abs(g21)),'r');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Speech recover begin
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Fs = 22050
Bits = 8
G12=fft(g12(1:L),NF);
G21=fft(g21(1:L),NF);
epsilon=0.00001;
clear Hinv TMP G srec
HREC(:,1,1)=ones(NF,1);
HREC(:,2,2)=ones(NF,1);
HREC(:,1,2)=G12;
HREC(:,2,1)=G21;
for (w=1:NF)
TMP = shiftdim(HREC(w,:,:),1);
TMP = inv(TMP'*TMP + epsilon*eye(n,n)) * TMP';
for (ii=1:n)
for (jj=1:n)
Hinv(w,ii,jj) = TMP(ii,jj);
end
end
end
for (ii=1:n)
for (jj=1:n)
hinv(:,ii,jj) = fftshift((ifft(Hinv(:,ii,jj))));
end
end
hinv=real(hinv);
srec=zeros(n,N);
for (ii=1:n)
for (jj=1:n)
srec(ii,:) = srec(ii,:) + filter(hinv(:,ii,jj)',1,x(jj,:));
end
end
srec=real(srec);
srec = srec-(mean(srec'))'*ones(1,N);
srec(1,:) = srec(1,:)/std(srec(1,:));
srec(2,:) = srec(2,:)/std(srec(2,:));
for w=1:NF
ALL(:,:,w)=reshape(Hinv(w,:,:),n,n)*reshape(H(w,:,:),n,n);
end
for (ii=1:n)
for (jj=1:n)
hgall(:,ii,jj) = fftshift(ifft(reshape(ALL(ii,jj,:),NF,1),NF));
end
end
hgall=real(hgall);
figure(2);clf
subplot(321);plot(s(1,:));title('Dog Barking');axis([1 N -3 3]);%grid;
subplot(322);plot(s(2,:));title('greeting');axis([1 N -4 4]);%grid;
subplot(323);plot(x(1,:));title('Mixed Speech 1');axis([1 N -5 5]);%grid;
subplot(324);plot(x(2,:));title('Mixed Speech 2');axis([1 N -5 5]);%grid;
subplot(325);plot(srec(1,:));title('Recovered Dog Barking');axis([1 N -3 3]);%grid;
subplot(326);plot(srec(2,:));title('Recovered greeting');axis([1 N -4 4]);%grid;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -