📄 00002.txt
字号:
function output=MMSESTSA85(signal,fs,IS)
% output=MMSESTSA85(signal,fs,IS)
% Shor time Spectral Amplitude Minimum Mean Square Error Method for
% Denoising noisy speech. based on log-Spectral MMSE Ephraim et al (1985)
% paper under the same title. signal is the input noisy speech, fs is its
% sampling frequency and IS (which is optional) is the initial silence
% estimate (The default value is 0.25 which means that it is assumed that
% the first 0.25 seconds of the signal is speech inactive period and may be
% used for initial noise parameter estimation. IS may also be used as a
% structure for compatibility with my other functions but please do not try
% to use IS it in that way as its functionality is not reliable.
% The output is the restored estimate of clean speech.
% Author: Esfandiar Zavarehei
% Created: Jan-04
% Last Modified: 24-01-05
if (nargin<3 | isstruct(IS))
IS=.25; %Initial Silence or Noise Only part in seconds
end
W=fix(.025*fs); %Window length is 25 ms
SP=.4; %Shift percentage is 40% (10ms) %Overlap-Add method works good with this value(.4)
wnd=hamming(W);
%IGNORE FROM HERE ...............................
if (nargin>=3 & isstruct(IS))%This option is for compatibility with another programme
W=IS.windowsize
SP=IS.shiftsize/W;
%nfft=IS.nfft;
wnd=IS.window;
if isfield(IS,'IS')
IS=IS.IS;
else
IS=.25;
end
end
% ......................................UP TO HERE
pre_emph=0;
signal=filter([1 -pre_emph],1,signal);
NIS=fix((IS*fs-W)/(SP*W) +1);%number of initial silence segments
y=segment(signal,W,SP,wnd); % This function chops the signal into frames
Y=fft(y);
YPhase=angle(Y(1:fix(end/2)+1,:)); %Noisy Speech Phase
Y=abs(Y(1:fix(end/2)+1,:));%Specrogram
numberOfFrames=size(Y,2);
FreqResol=size(Y,1);
N=mean(Y(:,1:NIS)')'; %initial Noise Power Spectrum mean
LambdaD=mean((Y(:,1:NIS)').^2)';%initial Noise Power Spectrum variance
alpha=.99; %used in smoothing xi (For Deciesion Directed method for estimation of A Priori SNR)
NoiseCounter=0;
NoiseLength=9;%This is a smoothing factor for the noise updating
G=ones(size(N));%Initial Gain used in calculation of the new xi
Gamma=G;
X=zeros(size(Y)); % Initialize X (memory allocation)
h=waitbar(0,'Wait...');
for i=1:numberOfFrames
%%%%%%%%%%%%%%%%VAD and Noise Estimation START
if i<=NIS % If initial silence ignore VAD
SpeechFlag=0;
NoiseCounter=100;
else % Else Do VAD
[NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(Y(:,i),N,NoiseCounter); %Magnitude Spectrum Distance VAD
end
if SpeechFlag==0 % If not Speech Update Noise Parameters
N=(NoiseLength*N+Y(:,i))/(NoiseLength+1); %Update and smooth noise mean
LambdaD=(NoiseLength*LambdaD+(Y(:,i).^2))./(1+NoiseLength); %Update and smooth noise variance
end
%%%%%%%%%%%%%%%%%%%VAD and Noise Estimation END
gammaNew=(Y(:,i).^2)./LambdaD; %A postiriori SNR
xi=alpha*(G.^2).*Gamma+(1-alpha).*max(gammaNew-1,0); %Decision Directed Method for A Priori SNR
Gamma=gammaNew;
nu=Gamma.*xi./(1+xi); % A Function used in Calculation of Gain
G= (xi./(1+xi)).*exp(.5*expint(nu)); % Log spectral MMSE [Ephraim 1985]
X(:,i)=G.*Y(:,i); %Obtain the new Cleaned value
waitbar(i/numberOfFrames,h,num2str(fix(100*i/numberOfFrames)));
end
close(h);
output=OverlapAdd2(X,YPhase,W,SP*W); %Overlap-add Synthesis of speech
output=filter(1,[1 -pre_emph],output); %Undo the effect of Pre-emphasis
function ReconstructedSignal=OverlapAdd2(XNEW,yphase,windowLen,ShiftLen);
%Y=OverlapAdd(X,A,W,S);
%Y is the signal reconstructed signal from its spectrogram. X is a matrix
%with each column being the fft of a segment of signal. A is the phase
%angle of the spectrum which should have the same dimension as X. if it is
%not given the phase angle of X is used which in the case of real values is
%zero (assuming that its the magnitude). W is the window length of time
%domain segments if not given the length is assumed to be twice as long as
%fft window length. S is the shift length of the segmentation process ( for
%example in the case of non overlapping signals it is equal to W and in the
%case of %50 overlap is equal to W/2. if not givven W/2 is used. Y is the
%reconstructed time domain signal.
%Sep-04
%Esfandiar Zavarehei
if nargin<2
yphase=angle(XNEW);
end
if nargin<3
windowLen=size(XNEW,1)*2;
end
if nargin<4
ShiftLen=windowLen/2;
end
if fix(ShiftLen)~=ShiftLen
ShiftLen=fix(ShiftLen);
disp('The shift length have to be an integer as it is the number of samples.')
disp(['shift length is fixed to ' num2str(ShiftLen)])
end
[FreqRes FrameNum]=size(XNEW);
Spec=XNEW.*exp(j*yphase);
if mod(windowLen,2) %if FreqResol is odd
Spec=[Spec;flipud(conj(Spec(2:end,:)))];
else
Spec=[Spec;flipud(conj(Spec(2:end-1,:)))];
end
sig=zeros((FrameNum-1)*ShiftLen+windowLen,1);
weight=sig;
for i=1:FrameNum
start=(i-1)*ShiftLen+1;
spec=Spec(:,i);
sig(start:start+windowLen-1)=sig(start:start+windowLen-1)+real(ifft(spec,windowLen));
end
ReconstructedSignal=sig;
function Seg=segment(signal,W,SP,Window)
% SEGMENT chops a signal to overlapping windowed segments
% A= SEGMENT(X,W,SP,WIN) returns a matrix which its columns are segmented
% and windowed frames of the input one dimentional signal, X. W is the
% number of samples per window, default value W=256. SP is the shift
% percentage, default value SP=0.4. WIN is the window that is multiplied by
% each segment and its length should be W. the default window is hamming
% window.
% 06-Sep-04
% Esfandiar Zavarehei
if nargin<3
SP=.4;
end
if nargin<2
W=256;
end
if nargin<4
Window=hamming(W);
end
Window=Window(:); %make it a column vector
L=length(signal);
SP=fix(W.*SP);
N=fix((L-W)/SP +1); %number of segments
Index=(repmat(1:W,N,1)+repmat((0:(N-1))'*SP,1,W))';
hw=repmat(Window,1,N);
Seg=signal(Index).*hw;
function [NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(signal,noise,NoiseCounter,NoiseMargin,Hangover)
%[NOISEFLAG, SPEECHFLAG, NOISECOUNTER, DIST]=vad(SIGNAL,NOISE,NOISECOUNTER,NOISEMARGIN,HANGOVER)
%Spectral Distance Voice Activity Detector
%SIGNAL is the the current frames magnitude spectrum which is to labeld as
%noise or speech, NOISE is noise magnitude spectrum template (estimation),
%NOISECOUNTER is the number of imediate previous noise frames, NOISEMARGIN
%(default 3)is the spectral distance threshold. HANGOVER ( default 8 )is
%the number of noise segments after which the SPEECHFLAG is reset (goes to
%zero). NOISEFLAG is set to one if the the segment is labeld as noise
%NOISECOUNTER returns the number of previous noise segments, this value is
%reset (to zero) whenever a speech segment is detected. DIST is the
%spectral distance.
%Saeed Vaseghi
%edited by Esfandiar Zavarehei
%Sep-04
if nargin<4
NoiseMargin=3;
end
if nargin<5
Hangover=8;
end
if nargin<3
NoiseCounter=0;
end
FreqResol=length(signal);
SpectralDist= 20*(log10(signal)-log10(noise));
SpectralDist(find(SpectralDist<0))=0;
Dist=mean(SpectralDist);
if (Dist < NoiseMargin)
NoiseFlag=1;
NoiseCounter=NoiseCounter+1;
else
NoiseFlag=0;
NoiseCounter=0;
end
% Detect noise only periods and attenuate the signal
if (NoiseCounter > Hangover)
SpeechFlag=0;
else
SpeechFlag=1;
end
***********************************8
基于统计。。。。的谱减法
function [ss,po]=specsubm(s,fs,p)
%SPECSUBM performs speech enhancement using spectral subtraction [SS,PO]=(S,FS,P)
%
% implementation of spectral subtraction algorithm by R Martin (rather slow)
% algorithm parameters: t* in seconds, f* in Hz, k* dimensionless
% 1: tg = smoothing time constant for signal power estimate (0.04): high=reverberant, low=musical
% 2: ta = smoothing time constant for signal power estimate
% used in noise estimation (0.1)
% 3: tw = fft window length (will be rounded up to 2^nw samples)
% 4: tm = length of minimum filter (1.5): high=slow response to noise increase, low=distortion
% 5: to = time constant for oversubtraction factor (0.08)
% 6: fo = oversubtraction corner frequency (800): high=distortion, low=musical
% 7: km = number of minimisation buffers to use (4): high=waste memory, low=noise modulation
% 8: ks = oversampling constant (4)
% 9: kn = noise estimate compensation (1.5)
% 10:kf = subtraction floor (0.02): high=noisy, low=musical
% 11:ko = oversubtraction scale factor (4): high=distortion, low=musical
%
if nargin<3 po=[0.04 0.1 0.032 1.5 0.08 400 4 4 1.5 0.02 4].'; else po=p; end
ns=length(s);
ts=1/fs;
ss=zeros(ns,1);
ni=pow2(nextpow2(fs*po(3)/po(8)));
ti=ni/fs;
nw=ni*po(8);
nf=1+floor((ns-nw)/ni);
nm=ceil(fs*po(4)/(ni*po(7)));
win=0.5*hamming(nw+1)/1.08;win(end)=[];
zg=exp(-ti/po(1));
za=exp(-ti/po(2));
zo=exp(-ti/po(5));
px=zeros(1+nw/2,1);
pxn=px;
os=px;
mb=ones(1+nw/2,po(7))*nw/2;
im=0;
osf=po(11)*(1+(0:nw/2).'*fs/(nw*po(6))).^(-1);
imidx=[13 21]';
x2im=zeros(length(imidx),nf);
osim=x2im;
pnim=x2im;
pxnim=x2im;
qim=x2im;
for is=1:nf
idx=(1:nw)+(is-1)*ni;
x=rfft(s(idx).*win);
x2=x.*conj(x);
pxn=za*pxn+(1-za)*x2;
im=rem(im+1,nm);
if im
mb(:,1)=min(mb(:,1),pxn);
else
mb=[pxn,mb(:,1:po(7)-1)];
end
pn=po(9)*min(mb,[],2);
%os= oversubtraction factor
os=zo*os+(1-zo)*(1+osf.*pn./(pn+pxn));
px=zg*px+(1-zg)*x2;
q=max(po(10)*sqrt(pn./x2),1-sqrt(os.*pn./px));
ss(idx)=ss(idx)+irfft(x.*q);
end
if nargout==0
soundsc([s; ss],fs);
end
**********************************************
谱减发原型
% Spectral Subtraction in time domain ================
%
% This is an implementation of the Spectral Subtraction method described in
% Boll 79.
%
% Usage:
% x_hat = SpecSubTD_Boll79(y);
% Example:
% y = wavread_aurora(/aurora/speechdata/TESTA/N1_SNR5/FAK_1B.08);
% x_hat = SpecSubTD_Boll79(y);
%
% input:
% y: a Nx1 matrix of samples (assume 8kHz)
%
% output:
% x_hat: a Nx1 matrix of samples
%
% Updates and other algorithms available at: http://www.RobustASR.net
%
% Released under GNU GPL license
% Copyright 2001, Trausti Kristjansson
% ===================================================
% Overview of algorithm:
%
% take fft of overlapping frames of time domain signal (assume 8kHz)
% 1) Smooth data with simple 3 tap filter
% Estimate avarage noise power
% Find residual noise power
% 2) Spectral subtraction
% 3) Half wave rectification
% 4) Residual Noise Reduction
% 5) Additional Signal Attenuation During Nonspeech Activity
% take ifft and do overlap add to convert back into time domain
function [x_hat] = SpecSubTD_Boll79(y);
n_fl = ones(256,1);
% Take the FFT of overlapping frames
wind = hanning(256);
i =1;
for t = 1:128:(length(y)-256);
sp(:,i) = fft(wind.*y(t:(t+255)));
abs_sp(:,i) = abs(sp(:,i));
angle_sp(:,i) = angle(sp(:,i));
i = i+1;
end;
Y_FB = abs_sp;
% 1) Magnitude Averaging ===========================
% simple 3 tap lowpass filtering
Y_FBav = Y_FB;
for i = 2:(size(Y_FB,2)-1)
Y_FBav(:,i) = mean(Y_FB(:,(i-1):(i+1)),2);
end;
Y_FB = Y_FBav;
% use first 20 frames to estimate the noise mean
N_FB_mean = mean(Y_FB(:,1:20),2);
% Find maximum deviation for use in Residual Noise Reduction step
N_R = max(Y_FB(:,1:20)-N_FB_mean*ones(1,20),[],2);
% 2) Spectral Subtraction ==========================
X_hat_FB = Y_FB - (N_FB_mean-n_fl)*ones(1,size(Y_FB,2));
% 3) Half wave rectification =======================
% set values under a threshold to threshold
n_flooro = n_fl*ones(1,size(Y_FB,2));
[I,J] = find(X_hat_FB < n_flooro);
X_hat_FB(sub2ind(size(X_hat_FB),I,J)) = n_flooro(sub2ind(size(X_hat_FB),I,J));
% 4) Residual Noise Reduction ======================
% A type of median filtering:
% if noise is less than N_R, then use min value over adjacent frames
X_hat_FBrn = X_hat_FB;
for i = 2:(size(Y_FB,2)-1)
I = find( X_hat_FB(:,i) < (N_R + n_fl) );
X_hat_FBrn(I,i) = min(X_hat_FB(I,(i-1:i+1)),[],2);
end;
X_hat_FB = X_hat_FBrn;
% 5) Additional Signal Attenuation During Nonspeech Activity
% This step classifies frames into speech/non-speech,
% and replaces non-speech frames by attenuaated versions of the
% original frame. The replaced fames have power equivalent to
% the noise floor.
% find attenuation constant
c = 10^(-30/20);
% find non-speech frames
T = 20*log10(mean(X_hat_FB./(N_FB_mean*ones(1,size(X_hat_FB,2)))));
noiseFrames = find(T<-12);
% replace them with attenuated noisy frames.
X_hat_FB(:,noiseFrames) = c*Y_FB(:,noiseFrames);
% convert back to time domain, by overlap-add
x_hat = zeros(1,length(y));
j = sqrt(-1);
i = 1;
for t = 1:128:(length(y)-256);
x_hat(:,(t:t+255)) = x_hat(:,(t:t+255)) + real(ifft( X_hat_FB(:,i) .* exp(j*angle_sp(:,i)) ))';
i = i+1;
end;
****************************************************
卡尔曼滤波器
function output=KalmanSignalDenoiser(Noisy,Clean,fs)
% OUTPUT=KALMANSIGNALDENOISER(NOISY,CLEAN,FS)
% this purpose of this function is to demonstrate the capability of kalman
% filter for denoising noisy speech (corrupted by white noise). Kalman
% filtering of noisy speech usually have two steps:
% 1 . Estimating the AR parameters of speech segment
% 2 . Filtering the segment
% There are different approaches for extracting AR parameters of noisy
% speech in the literature, however, in this function none is implemented.
% Clean speech signal should be provided for this purpose.
%
% ARGUMENTS
% NOISY : noise contaminated speech
% CLEAN : clean speech signal
% FS : Sampling frequency which should be the same for both signals
%
% Output is the denoised speech signal
%
% Required functions:
% SEGMENT
%Sep-04
%Esfandiar Zavarehei
W=fix(.025*fs); %Window length is 25 ms
SP=1; %Shift percentage is 40% (10ms) %Overlap-Add method works good with this value(.4)
SpecP=13;
Window=ones(W,1);
x=segment(Clean,W,SP,Window);
y=segment(Noisy,W,SP,Window);
n=segment(Noisy-Clean,W,SP,Window);
R=var(n);
H=[zeros(1,SpecP-1) 1];
G=H'; GGT=G*H;
FUpper=[zeros(SpecP-1,1) eye(SpecP-1)];
I=eye(SpecP);
[A Q]=lpc(x,SpecP);
P=diag(repmat(R(1),1,SpecP));
o=zeros(1,W*size(x,2));% allocating memory to the output in advance save a lot of computation time
o(1:SpecP)=y(1:SpecP,1)';
hwb = waitbar(0,'Please wait...','Name','Processing');
start=SpecP+1;
Sp=o(1:SpecP)';
t=SpecP+1;
for n=1:size(x,2)
waitbar(n/size(x,2),hwb,['Please wait... ' num2str(fix(100*n/size(x,2))) ' %'])
F=[FUpper; fliplr(-A(n,2:end))];
for i=start:W
S_=F*Sp;
e=y(i,n)-S_(end);%innovation
P_=F*P*F'+GGT*Q(n);
K=(P_*H')/(H*P_*H' + R(n));
SOut=S_+K*e;
o(t-SpecP+1:t)=SOut'; %Notice that the previous SpecP-1 output samples are updated again
P=(I-K*H)*P_;
Sp=SOut;
t=t+1;
end
start=1;
end
close(hwb)
output=o;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -