📄 speaker.m
字号:
%File to cut and paste parts of a wav file in reverse order
%Author = E. Darren Ellis 05/01
[y, fs, nbits] = wavread('s11.wav'); %read in the wav file
sound(y,fs) %play back the wav file
t = 0:1/fs:length(y)/fs-1/fs; %create the proper time vector
subplot(211) %create a subplot
plot(t,y) %plot the original waveform
yfirst=y(1:15000); %partition the vector into two parts
ysecond=y(15001:30000);
save darren ysecond yfirst -ascii %save the vector in reverse order
load darren -ascii %read back in the new file
subplot(212) %prepare a new subplot
plot(t,darren) %plot the new file to compare it to the original
pause(2) %create a 2 second pause
sound(darren,fs); %play back the new sound file
%APPENDIX B
%Code to add gaussian noise to a signal and then plot the original
%signal in the time domain, the shifted FFT of the original signal in
%the frequency domain %and the shifted FFT of the original signal with
%gaussian noise added to it in the frequency domain.
%Author = E. Darren Ellis 05/01
[y, fs, nbits] = wavread('a18.wav'); %read in the wav file
t = 0:1/fs:length(y)/fs-1/fs; %generate the correct time vector
subplot(311) %set up a subplot
plot(t,y) %plot the signal in the time domain
%%%%%code provided by Dr. Qi to generate gaussian noise%%%%%
sigma = 0.02;
mu = 0;
n = randn(size(y))*sigma + mu*ones(size(y));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
signal=n+y; %add the gaussian noise to the original signal
yfft=fft(y); %take the FFT of the original signal.
xfft=fft(signal); %take the FFT of the signal with noise added
f = -length(y)/2:length(y)/2-1; %generate the appropriate frequency
%scale.
ysfft=fftshift(yfft); %calculate the shifted FFT of the original
%signal
xsfft=fftshift(xfft); %same as above but for the signal with noise
%added
subplot(312)
%plot the shifted FFT of the original signal in the frequency domain
plot(f,abs(ysfft));
subplot(313)
%plot the shifted FFT of the original signal with noise added in the
%frequency domain
plot(f,abs(xsfft));
%APPENDIX C
%Code to plot a noisy signal, take the shifted FFT of teh noisy signal
and apply a
%Butterworth filter to it. The filtered signal is then scaled and
plotted to compare
%to the original signal
%Author = E. Darren Ellis 05/01
[y, fs, nbits] = wavread('a71.wav'); %read in the wav file
t = 0:1/fs:length(y)/fs-1/fs; %generate the correct time vector
subplot(311) %create a subplot
plot(t,y) %plot the signal in the time domain
sound(y,fs) %play back the wav file
yfft=fft(y); %take the FFT of the original signal
f = -length(y)/2:length(y)/2-1; %create the appropriate
%frequency vector
ysfft=fftshift(yfft); %Shift the FFT of the
%original signal
subplot(312)
plot(f,abs(ysfft)); %plot the shifted FFT of the orginal signal
%%%%%code provided by Dr. Qi to generate and apply the Butterworth
%filter%%%%%
order = 3;
cut = 0.05;
[B, A] = butter(order, cut);
filtersignal = filter(B, A, ysfft);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
subplot(313)
plot(f,21*abs(filtersignal)); %plot the scaled and filtered
%signal to compare
%APPENDIX D
%Code for pitch analysis of a wav file. This code needs the pitch.m
%and pitchacorr.m files to be in the same directory. A plot of pitch
%contour versus time frame is created and the average pitch of the wav
%file is returned.
%Author = E. Darren Ellis 05/01
[y, fs, nbits] = wavread('a17.wav'); %read in the wav file
[t, f0, avgF0] = pitch(y,fs) %call the pitch.m routine
plot(t,f0) %plot pitch contour versus time frame
avgF0 %display the average pitch
sound(y) %play back the sound file
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%
%
% Function:
% Extract pitch information from speech files
% pitch can be obtained by obtaining the peak of autocorrelation
% usually the original speech file is segmented into frames
% and pitch contour can be derived by plot of peaks from frames
%
% Input:
% x: original speech
% fs: sampling rate
%
% Output:
% t: time frame
% f0: pitch contour
% avgF0: average pitch frequency
%
% Acknowledgement:
% this code is based on Philipos C. Loizou's colea Copyright (c)
%1995
%
%function [t, f0, avgF0] = pitch(y, fs)
% get the number of samples
ns = length(y);
% error checking on the signal level, remove the DC bias
mu = mean(y);
y = y - mu;
% use a 30msec segment, choose a segment every 20msec
% that means the overlap between segments is 10msec
fRate = floor(120*fs/1000);
updRate = floor(110*fs/1000);
nFrames = floor(ns/updRate)-1;
% the pitch contour is then a 1 x nFrames vector
f0 = zeros(1, nFrames);
f01 = zeros(1, nFrames);
% get the pitch from each segmented frame
k = 1;
avgF0 = 0;
m = 1;
for i=1:nFrames
xseg = y(k:k+fRate-1);
f01(i) = pitchacorr(fRate, fs, xseg);
% do some median filtering, less affected by noise
if i>2 & nFrames>3
z = f01(i-2:i);
md = median(z);
f0(i-2) = md;
if md > 0
avgF0 = avgF0 + md;
m = m + 1;
end
elseif nFrames<=3
f0(i) = a;
avgF0 = avgF0 + a;
m = m + 1;
end
k = k + updRate;
end
t = 1:nFrames;
t = 20 * t;
if m==1
avgF0 = 0;
else
avgF0 = avgF0/(m-1);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Pitch estimation using the autocorrelation method
% Modified based on colea Copyright (c) 1995 Philipos C. Loizou
%
function [f0] = pitchacorr(len, fs, xseg)
% LPF at 900Hz
[bf0, af0] = butter(4, 900/(fs/2));
xseg = filter(bf0, af0, xseg);
% find the clipping level, CL
i13 = len/3;
maxi1 = max(abs(xseg(1:i13)));
i23 = 2 * len/3;
maxi2 = max(abs(xseg(i23:len)));
if maxi1>maxi2
CL=0.68*maxi2;
else
CL= 0.68*maxi1;
end
% Center clip waveform, and compute the autocorrelation
clip = zeros(len,1);
ind1 = find(xseg>=CL);
clip(ind1) = xseg(ind1) - CL;
ind2 = find(xseg <= -CL);
clip(ind2) = xseg(ind2)+CL;
engy = norm(clip,2)^2;
RR = xcorr(clip);
m = len;
% Find the max autocorrelation in the range 60 <= f <= 320 Hz
LF = floor(fs/320);
HF = floor(fs/60);
Rxx = abs(RR(m+LF:m+HF));
[rmax, imax] = max(Rxx);
imax = imax + LF;
f0 = fs/imax;
% Check max RR against V/UV threshold
silence = 0.4*engy;
if (rmax > silence) & (f0 > 60) & (f0 <= 320)
f0 = fs/imax;
else % -- its unvoiced segment ---------
f0 = 0;
end
%APPENDIX E
%Code to calculate and plot the first three formants present in a
%speech file and
%calculate the vector differences between peak positions of the first
%five formants.
%This code requires formant.m and pickmax.m to be in the same directory
%Author = E. Darren Ellis 05/01
[y, fs, nbits] = wavread('a17.wav'); %read in my speech file.
[P,F,I] = formant(y); %apply formant routine and
%return P, F, and I.
sound(y) %play the speech file.
plot(F,P) %plot formants.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Function:
% Return the first five formants of a speech file
%
% Input:
% The speech file "y"
%
% Output:
% The PSD (P), the normalized frequency axis (F), the position of %
the peak (I)
%
% Author:
% Hairong Qi
%
% Date:
% 04/25/01
%
function [P, F, I] = formant(y)
% calculate the PSD using Yule-Walker's method
order = 12;
P = pyulear(y,order,[]);
P = 10*log10(P); % convert to DB
F = 0:1/128:1; % normalized frequency axis
% call pickmax to pick the peaks in the PSD
% Pm is the value of the peaks, I is the index of the peaks
[Pm,I] = pickmax(P);
I = I/128; % normalize the index
% you should use plot(F, P) to plot the PSD
% and I tells you the location of those formant lines.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%The following is also code provided by Dr. Qi
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Function: pick the index of local maxima
%
function [Y, I] = pickmax(y)
% pick the first 5 picks
Y = zeros(5,1);
I = zeros(5,1);
% get the difference
xd = diff(y);
% pick the index where the difference goes from + to -
% this is the local maxima
index = 1;
pos = 0;
for i=1:length(xd)
if xd(i)>0
pos = 1;
else
if pos==1
pos = 0;
Y(index) = xd(i);
I(index) = i-1;
index = index + 1;
if index>5
return
end
end
end
end
%APPENDIX F.
%Code to sort and compare voice files. This code first compares the
%reference wav file to all others based on average pitch. The top 12
%most likely matches are then compared by the differences in their
%&formant peak vectors. The resulting closest matches are then
%displayed. This code needs pitch.m, pitchacorr.m, formant.m, and
%pickmax.m in the same directory in order to run.
%Author = E. Darren Ellis 05/01
results=zeros(12,1); %create a vector for results.
diff=zeros(82,1); %create a vector for differences in pitch.
formantdiff=zeros(12,1); %create a vector for diff in formant vector
[y17, fs17, nbits17] = wavread('a17.wav'); %read in the wav file to
%compare all others to.
[t17, f017, avgF017] = pitch(y17,fs17); %call the pitch rouine for
%ref. wav file.
[P17,F17,I17] = formant(y17); %call the formant routine
%for ref. wav file.
plot(t17,f017) %plot the pitch contour of the ref. file
avgF17 = avgF017 %set the average pitch equal to avg17
sound(y17)
pause(3) %pause for 3 seconds
%This code was provided by Dr. Qi
%file name based on the index, i
for i=1:83
if i<10
filename = sprintf('a0%i.wav', i);
else
filename = sprintf('a%i.wav', i);
end
[y, fs, nbits] = wavread(filename);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[t, f0, avgF0] = pitch(y,fs); %call the pitch.m routine for the
%current wav file.
plot(t,f0) %plot the current wav file contour plot.
avgF0(i) = avgF0; %find the average pitch for the current wav file.
diff(i,1)=norm(avgF0(i)-avgF17); %create a vector of avg. pitch diff
%between current wav file and
%reference wav file.
i %display the index to see where the comparison is.
end
[Y,H]=sort(diff) %sort the pitch correlations in ascending order.
for j=1:12 %pick the lowest 20 pitch correlations to compare formants .
p=H(j) %set p equal to jth position of vector H .
if p<10
filename = sprintf('a0%i.wav', p);
else
filename = sprintf('a%i.wav', p);
end
filename %display the filename of the wav file being compared.
[y, fs, nbits] = wavread(filename);
[P,F,I] = formant(y); %call the formant.m routine for the current wav
sound(y) %play back the wav file being compared.
plot(F,P) %plot the formants for the comparison wav file.
pause(3) %pause for 3 seconds so sound will finish playing back.
formantdiff(j,1)=norm(I17-I); %create a vector of formant peak
%differences.
end
[Y1,H1]=sort(formantdiff) %sort the vector in ascending order
for k=1:12
results(k,1)=H(H1(k)); %calculate the numerical numbers of the
%closest wav matches.
end
H %display the vector H.
H1 %display the vector H1.
results %display the numerical numbers of the closest wav file
%matches.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -