📄 main.m
字号:
%%% Underdetimed source separation
%%% The algorithm is described in "Michael Syskind Pedersen, DeLiang Wang,
%%% Jan Larsen and Ulrik Kjems: Two-microphone Separation of Speech
%%% Mixtures", 2006, Submitted.
clear all
close all
format compact %输出格式
dis=1;%%% display information while algorithm is running
if dis
disp('Initialisation...')
end
rand('state',1)%统一分布伪随机数
y={[]};
M=2; %number of sensors
u=0.5;%cardioid pattern controller 心形模式控制
N=3;% number of signals in mixture %%% only used in mixing process
th=1; %mask threshold control (tau)?
stopthresholdini=3000; %One source if condition number is above this value
TC1=0.1; %Merge finalstereo signals if correlation is above TC1
TC2=0.03; %Merge finalstereo and enerstereo if correlation is above TC2
numlags=1; %%% when correlation between two signals are found, how many lags should be considered?
thepow=20; %%% tau_E
minpow=30; %%% included to save a few calculations.
%evalu=0%%% source signals as input, separated signals as output
%evalu=1%%% separation and evaluation
evalu=1;
winnumber=3; %%Selects window function
NFFT=2048;%%%DFT length
k=4; %%% window length is NFFT/k
lastfmasklength=1;
switch winnumber
case 1
WINDOW=hanning(NFFT/k);
case 2
WINDOW=hann(NFFT/k);
case 3
WINDOW=hamming(NFFT/k);
case 4
WINDOW=bartlett(NFFT/k);%(triangular)
case 5
WINDOW=triang(NFFT/k);
case 6
WINDOW=blackman(NFFT/k);
case 7
WINDOW=rectwin(NFFT/k);
end
noverlapfactor=0.75;% overlap between consecutive windows
NOVERLAP=length(WINDOW)*noverlapfactor;
if evalu %%%read source signals signals
s=[];
[s(:,1),fs]=wavread('ukma.wav');%A
[s(:,2),fs]=wavread('frma.wav');%B
[s(:,3),fs]=wavread('itfe.wav');%C
[s(:,4),fs]=wavread('cnfe.wav');%D
[s(:,5),fs]=wavread('rufe.wav');%E
[s(:,6),fs]=wavread('gema.wav');%F
[s(:,7),fs]=wavread('nlma.wav');%G
[s(:,8),fs]=wavread('jpfe.wav');%H
[s(:,9),fs]=wavread('brfe.wav');%I
[s(:,10),fs]=wavread('esma.wav');%J
[s(:,11),fs]=wavread('dkma.wav');%K
[s(:,12),fs]=wavread('ukfe.wav');%L
labelvec={' A',' B',' C',' D',' E',' F',' G',' H',' I',' J',' K',' L'};
% s=s(10001:50000,:);%%%%if signal duration should be made shorter
%%% randomly pick N source signal from the pool of size(s,2) source signals.
NS=1:size(s,2);
for i=1:N
rnd=ceil(rand(1)*(size(s,2)-i+1));
Ns(i)=NS(rnd);
NS(rnd)=[];%actually we pick by removing size(s,2)-N sources from the pool
end
N=length(Ns);
S=s(:,Ns)';
Npos=max(N,7); % at least 7 different directions
all_theta=linspace(0,pi,Npos); %directions equally spaced between 0 and 180 degrees
for i=1:N
rnd=ceil(rand(1)*(Npos-i+1));
theta(i)=all_theta(rnd);
all_theta(rnd)=[];
end
%theta*180/pi %%%% vector of direction angles
%two cardioids pointing in two directions varying from 1-u to 1+u,0<u<=1
A=calcA(theta,u); %calculate elements in mixing matrix
X=A*S; %%%Create 2 mixed signals
wavwrite(X',fs,'stereomix');%save stereo mixture, not scaled!!!
else
[X,fs]=wavread('stereomix.wav');
X=X';
end
%%estimate energy stop thresholds
powpow=10*log10((sum(X(1,:).^2)+sum(X(2,:).^2))/(2*length(X)));%power of mixture(used to determine thresholds)
thE=powpow-thepow; %power threshold
minpower=powpow-minpow;%do not save signals with very little power (makes cumputations faster)
%%% create ideal masks for evaluation purpose
if evalu
for i=1:N
vd=zeros(1,N);
vd(i)=1;
ivd=(vd-1)*(-1);
[imaskL{i},imaskR{i},SNRiL(i),SNRiR(i)]=idealmask(A*diag(vd)*S,A*diag(ivd)*S,fs,NFFT,WINDOW,NOVERLAP);
end
[cmL,cmR]=colorimask(imaskL,imaskR,fs); %%% draw each ideal BM with a certain color
end
x={X}; %initialize buffer of signals to be separated
mask={[]}; %initialize mask buffer
fmask={[]};
enermask={[]};
delete_me_again=sg(X(1,:),NFFT,fs,WINDOW,NOVERLAP);%get dimensions of mask
lastremmask=zeros(size(delete_me_again));
clear delete_me_again
%%% Start algorithm
%%%Initialize counters
countmax=30; %the maximum iteration number (to be absolutely sure that the algorithm stops)
finalcnt=1; %counts the number of separated output signals
enercnt=1; %counts the number of low energy masks
exitcnt=1; % counts the number of times the mask buffer is emptied and the remaining mask is estimated
cnt=0;%number of iterations
if dis
disp('Starting algorithm...')
end
while cnt<countmax
sx=size(x,2); %%%%size of the signal vector. the signals are grouped in sx/2 grops. When there is no more signals left in x, the separation task is over.
cnt=cnt+1;
% stopthreshold=max(stopthresholdini/cnt,minstopthreshold)%enable if stop threshold should change during iterations
stopthreshold=stopthresholdini;
%%% make empty buffers to store signal pairs and corresponding masks for evaluation
xbuffer={};
maskbuffer={};
for n=1:sx
%%% 2 by 2 ICA algorithm
[y{n},Aest] = icaML(x{n});
%%% alternative 2 by 2 ICA algorithm
%%% [Aest,y{n}]=jade(x{n});%%% available from ICA central
%%%normalization
for m=1:2%%%normalization
y{n}(m,:)=y{n}(m,:)/(10*sqrt(var(y{n}(m,:))));
end
%[Aest,W,y{n},theta_est]=normmix(Aest,x{n},u); %normalization based on kmnown microphone responses
%%%normalization
%%% ICA evaluation
if(0)%%% plots ICA output beampatterns
evalICAdB(W,theta_est,theta,N,Ns,labelvec,u)
end
%%% ICA evaluation
%%% estimate mask
if dis
disp('Finding mask...')
end
if evalu
[newX{1}(1,:),newX{1}(2,:),newX{2}(1,:),newX{2}(2,:),msk{1},msk{2}]=applymasks(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,th,NFFT,WINDOW,NOVERLAP,cmR,cmL);
else
[newX{1}(1,:),newX{1}(2,:),newX{2}(1,:),newX{2}(2,:),msk{1},msk{2}]=applymasks(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,th,NFFT,WINDOW,NOVERLAP);
end
%% binaural stopping criteria
for m=1:2
condi=oneortwo_cond(newX{m}(1,:),newX{m}(2,:),fs);%%% one or two signals based on condition number
if(enerstop(newX{m}(1,:),newX{m}(2,:),thE,minpower)==2)
if dis
disp('Not a speech signal-too low energy')
end
elseif(enerstop(newX{m}(1,:),newX{m}(2,:),thE,minpower)==1)
if dis
disp('Not a good quality speech signal')
end
[L,R,enermask{enercnt}]=getfinalmask(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,th,m,NFFT,WINDOW,NOVERLAP,1);%to save memory, enermask is a sparse matrix
stestr=strcat('enerstereo',int2str(enercnt));%not scaled
if dis
disp(strcat('enerstereo',int2str(enercnt)))
end
wavwrite([L,R],fs,stestr);
enercnt=enercnt+1;
elseif(condi>stopthreshold)
if dis
disp('Save as final signal...')
end
[L,R,fmask{finalcnt}]=getfinalmask(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,1,m,NFFT,WINDOW,NOVERLAP,0);%notice that in the last step, the threshold is set to 1.
%str=strcat('y',int2str(m),'iter',int2str(cnt),'n',int2str(n),'final',int2str(finalcnt))
%wavwrite(y{n}(m,:),fs,str);%%%save files
stestr=strcat('finalstereo',int2str(finalcnt));%not scaled
if dis
disp(strcat('finalstereo',int2str(finalcnt)))
end
wavwrite([L,R],fs,stestr);
finalcnt=finalcnt+1;%%%save files
else %continue
xbuffer={xbuffer{1:end},newX{m}};
maskbuffer={maskbuffer{1:end},msk{m}};
% str=strcat('y',int2str(m),'iter',int2str(cnt),'n',int2str(n))
% wavwrite(y{n}(m,:),fs,str);
end
end
end
clear x
x=xbuffer;
clear mask
mask=maskbuffer;
if(isempty(xbuffer))%%% all branches in the algorithm has ended, no more signal pairs to which ICA should be applied
if dis
disp('Stopping separation algorithm')
end
if length(fmask)~=lastfmasklength; %check if one source appears more than once in the final signals
fmask=multisigcheck(fmask,X(1,:)',X(2,:)',TC1,fs,NFFT,WINDOW,NOVERLAP,numlags);
lastfmasklength=length(fmask);
finalcnt=lastfmasklength+1;
end
if isempty(enermask{1})
else %check if poor quality signal is correlated with one of the final signals
fmask=nosigcorr(fmask,enermask,X(1,:)',X(2,:)',TC2,fs,NFFT,WINDOW,NOVERLAP,numlags);
end
enercnt=1;
enermask={[]};
if isempty(fmask{1})
if dis
disp('No signals segregated. Stopping separation procedure.')'
end
exitcnt=4;
break
else
if dis
disp('Finding remaining mask...')
end
if evalu
[L,R,remainingmask]=getremainingmask(X(1,:)',X(2,:)',fmask,fs,NFFT,WINDOW,NOVERLAP,cmL,cmR);
else
[L,R,remainingmask]=getremainingmask(X(1,:)',X(2,:)',fmask,fs,NFFT,WINDOW,NOVERLAP);
end
end
if lastremmask==remainingmask %%% Should not continue if background mask have not changed.
if dis
disp('No changes. Stopping separation procedure.')
end
exitcnt=4;
wavwrite([L,R],fs,'remaining');
break
else
lastremmask=remainingmask;%%% to see if the next background mask have changed.
end
wavwrite([L,R],fs,'remaining');
condi=oneortwo_cond(newX{m}(1,:),newX{m}(2,:),fs);
if(enerstop(L',R',thE,minpower,fs)>0) %% enough energy in remaining mask?
if dis
disp('Not a speech signal')
end
break
elseif(condi>stopthreshold)%%%more than one signal in remaining?
stestr=strcat('finalstereo',int2str(finalcnt));%not scaled
wavwrite([L,R],fs,stestr);
if dis
disp('save remaing')
disp(strcat('finalstereo',int2str(finalcnt)))
end
fmask{finalcnt}=remainingmask;
break
else
mask={remainingmask};
x={[L';R']};
end
if exitcnt>=4
break
end
exitcnt=exitcnt+1;
end
end
if isempty(fmask)
flag=1;
mEL=0
mNR=0
mSNRi=0
mSNRo=0
mSNRx=0
else
if length(fmask)~=lastfmasklength;
fmask=multisigcheck(fmask,X(1,:)',X(2,:)',TC1,fs,NFFT,WINDOW,NOVERLAP,numlags);
end
if isempty(enermask{1});
else
fmask=nosigcorr(fmask,enermask,X(1,:)',X(2,:)',TC2,fs,NFFT,WINDOW,NOVERLAP,numlags);
end
end
if dis
disp('Separation done.')
end
if evalu
if dis
disp('Evaluation of outputs')
end
%%%clear x and mask if algorithm should be run again on another mixture
clear x
clear mask
%% sort masks with corresponding ideal mask
[valL,valR,e1L,e1R,e2L,e2R,q,lbl,cflag]=comparemasks(fmask,imaskL,imaskR,labelvec(Ns),fs,length(s));
%standalone signals
for i=1:N
Xalone(:,:,i)=A(:,i)*S(i,:);
end
%%% calculate snr according to hu-wang 1999 paper
[PLEL,PREL,PLNR,PRNR,SNRL,SNRR,SNRiLi,SNRiRi,SNRxL,SNRxR]=calcELNR(e1L,e1R,e2L,e2R,imaskL,imaskR,q,NFFT,WINDOW,NOVERLAP,Xalone);
lbl{1:end};
datafile=strcat('data.mat');
save(datafile, 'PLEL', 'PREL', 'PLNR' ,'PRNR', 'SNRL', 'SNRR', 'SNRiL', 'SNRiR','lbl', 'th', 'stopthresholdini', 'TC1' ,'TC2', 'thepow', 'minpow', 'NFFT', 'winnumber', 'k', 'NOVERLAP', 'Ns', 'theta')
mEL=100*(mean(PLEL)+mean(PREL))/2
mNR=100*(mean(PLNR)+mean(PRNR))/2
mSNRi=(mean(SNRiL)+mean(SNRiR))/2
mSNRo=(mean(SNRL)+mean(SNRR))/2
mSNRx=(mean(SNRxL)+mean(SNRxR))/2
if length(fmask)~=N
flag=1;%%%not same number of segregated signals as input signals
elseif cflag==1%%% one of the segregated signals appears to be the same source signal.
flag=1;
else
flag=0;
end
mSNR=mSNRo-mSNRi
save mydata mEL mNR mSNRi mSNRo mSNR mSNRx flag
end
if dis
disp('Done')
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -