📄 endpointdetect.m
字号:
function [endPoint, volume, zcr, soundSegment] = endPointDetect(y, fs, nbits, plotOpt, epdParam)
% endPointDetect: End point detection based on volume and zero-crossing rate
% Usage: [endPoint, volume, zcr, soundSegment] = endPointDetect(y, fs, nbits, plotOpt, epdParam)
% Roger Jang, 20041118
if nargin<1, selfdemo; return; end
if nargin<2, fs=8000; end
if nargin<3, nbits=8; end
if nargin<4, plotOpt=0; end
if nargin<5,
epdParam.frameSize = round(fs/31.25); % fs=8000 ===> frameSize=256;
epdParam.overlap = round(epdParam.frameSize/2);
epdParam.volRatio1=0.1;
epdParam.volRatio2=0.2;
epdParam.zcrShiftGain=4;
epdParam.zcrRatio=0.25;
epdParam.volRatio4zcr=0.05;
end
frameSize=epdParam.frameSize;
overlap=epdParam.overlap;
zcrRatio=epdParam.zcrRatio;
if max(y)-min(y)<=2
error('The range of the given y vector is too small! Perhaps it should be converted to integer format...');
end
% ====== Zero adjusted
y = y-round(mean(y));
% ====== Take frames
framedY = buffer2(y, frameSize, overlap);
frameNum = size(framedY, 2); % Number of frames
frameTime = frame2sampleIndex(1:frameNum, frameSize, overlap)/fs;
% ====== Compute volume/zcr
volume=frame2volume(framedY);
[minVol, index]=min(volume);
shiftAmount=epdParam.zcrShiftGain*max(abs(framedY(:,index))); % 以最小音量之音框中的讯号最大绝对值的 epdParam.zcrShiftGain 倍为平移量来进行ZCR
shiiftAmount=max(shiftAmount, 2);
zcr=frame2zcr(framedY, shiftAmount);
% ====== Compute volume/zcr thresholds
maxVol=max(volume);
volTh2=(maxVol-minVol)*epdParam.volRatio2+minVol;
%volTh1=(maxVol-minVol)*epdParam.volRatio1+minVol;
%volTh1=minVol;
zcrTh=max(zcr)*zcrRatio;
zcrVolTh=(maxVol-minVol)*epdParam.volRatio4zcr+minVol;
% ====== Identify voiced part that's larger than volTh2
voicedIndex = volume>volTh2;
soundSegment=findSegment(voicedIndex);
%========求取噪音的平均值,然后作为声音的开始界限。
%========同时确定有效声音的边界,从已知边界向两方扩展,直到frame的音量开始增加。
NoiseIndex=volume<=volTh2;
NoiseSegment=findSegment(NoiseIndex);
j=0;
NoiseTotal=0;
for i=1:length(NoiseSegment)
NoiseTotal=sum(volume(NoiseSegment(i).begin:NoiseSegment(i).end));
j=j+NoiseSegment(i).end-NoiseSegment(i).begin+1;
end;
volTh1=NoiseTotal/j;%声音噪音的平均值.此处还应该有个系数;
% ====== Delete short sound clips
%index = [];
%for i=1:length(soundSegment),
% if (soundSegment(i).end-soundSegment(i).begin)<4
% index = [index, i];
% end
%end
%soundSegment(index) = [];
% ====== Expansion 1: Expand end points to volume level1 (lower level)
for i=1:length(soundSegment),
head = soundSegment(i).begin;
while (head-1)>=1 & volume(head-1)< volume(head) & volume(head-1)>volTh1,
head=head-1;
end
soundSegment(i).begin = head;
tail = soundSegment(i).end;
while (tail+1)<=length(volume) & volume(tail+1)>volTh1 & volume(tail+1)=1 & volume(head-1)>volTh1,
% head=head-1;
% end
% soundSegment(i).begin = head;
% tail = soundSegment(i).end;
% while (tail+1)<=length(volume) & volume(tail+1)>volTh1,
% tail=tail+1;
% end
% soundSegment(i).end = tail;
% end
% % ====== Expansion 2: Expand end points to include high zcr region
% for i=1:length(soundSegment),
% head = soundSegment(i).begin;
% while (head-1)>=1 & zcr(head-1)>zcrTh & volume(head-1)>zcrVolTh % Extend at beginning
% head=head-1;
% end
% soundSegment(i).begin = head;
% tail = soundSegment(i).end;
% while (tail+1)<=length(zcr) & zcr(tail+1)>zcrTh & volume(tail+1)>zcrVolTh % Extend at ending
% tail=tail+1;
% end
% soundSegment(i).end = tail;
% end
end
% ====== 再跑一次 expansion 1, 以避免 expansion 1 & 2 的操作顺序不同而导致两个重叠的 segment 却有不同起点 or 终点
% ====== Delete repeated sound segments
index = [];
for i=1:length(soundSegment)-1,
if soundSegment(i).begin==soundSegment(i+1).begin & soundSegment(i).end==soundSegment(i+1).end,
index=[index, i];
end
end
soundSegment(index) = [];
% ====== Transform sample-point-based index
if length(soundSegment) ~=0,
for i=1:length(soundSegment),
soundSegment(i).beginSample = frame2sampleIndex(soundSegment(i).begin, frameSize, overlap);
soundSegment(i).endSample = min(length(y), frame2sampleIndex(soundSegment(i).end, frameSize, overlap));
soundSegment(i).beginFrame = soundSegment(i).begin;
soundSegment(i).endFrame = soundSegment(i).end;
end
endPoint=[soundSegment(1).beginSample, soundSegment(end).endSample]; % 取头尾
else
endPoint = [];
end
%soundSegment=rmfield(soundSegment, 'begin');
%soundSegment=rmfield(soundSegment, 'end');
soundSegment=rmfield(soundSegment, 'duration');
% ====== 如果一个 segment 间隔不到0.3秒,而且过零率大于规定值,则删除,当作杂音。
JianGe=[];
ZcrVol=[];
for i=1:length(soundSegment)
ZcrVol(i)=zcr(soundSegment(i).begin+round((soundSegment(i).end-soundSegment(i).begin)/2));%中间frame的过零率。
end;
for i=1:length(soundSegment)
JianGe(i)=(soundSegment(i).endSample-soundSegment(i).beginSample)/fs;%持续的时间。
end;
deletedIndex=[];
for i=1:length(JianGe)
if JianGe(i)<0.3 & ZcrVol(i)>zcrVolTh
deletedIndex=[deletedIndex,i];
end;
end;
soundSegment(deletedIndex)=[];
% ====== 如果相邻的 segment 间隔不到0.2秒,则合并
gap=[];
for i=1:length(soundSegment)-1
gap(i)=(soundSegment(i+1).beginSample-soundSegment(i).endSample)/fs;
end
group=findSegment(gap<0.1);%改为相邻0.1秒,不对的话再修改回来。
deletedIndex=[];
for i=1:length(group)
soundSegment(group(i).begin).endSample=soundSegment(group(i).end+1).endSample;
soundSegment(group(i).begin).endFrame=soundSegment(group(i).end+1).endFrame;
deletedIndex=[deletedIndex, (group(i).begin+1):(group(i).end+1)];
end
soundSegment(deletedIndex)=[];
%重新确定头尾。
if length(soundSegment) ~=0,
endPoint=[soundSegment(1).beginSample, soundSegment(end).endSample]; % 取头尾
else
endPoint = [];
end
if plotOpt,
axes1H=subplot(4,1,1);
time=(1:length(y))/fs;
plot(time, y);
axis([min(time), max(time), -2^nbits/2, 2^nbits/2]);
ylabel('Amplitude'); title('Waveform'); grid on
% Plot end points
yBound=[-2^nbits/2, 2^nbits/2];
for i=1:length(soundSegment),
line(frame2sampleIndex(soundSegment(i).beginFrame, frameSize, overlap)/fs*[1,1], yBound, 'color', 'm');
line(frame2sampleIndex( soundSegment(i).endFrame, frameSize, overlap)/fs*[1,1], yBound, 'color', 'g');
end
axes2H=subplot(4,1,2);
frameTime = frame2sampleIndex(1:frameNum, frameSize, overlap)/fs;
plot(frameTime, volume, '.-');
line([min(frameTime), max(frameTime)], volTh1*[1 1], 'color', 'r');
line([min(frameTime), max(frameTime)], volTh2*[1 1], 'color', 'r');
line([min(frameTime), max(frameTime)], zcrVolTh*[1 1], 'color', 'r');
axis tight
ylabel('Volume'); title('Volume'); grid on
% Plot end points
yBound = [min(volume) max(volume)];
for i=1:length(soundSegment),
line(frame2sampleIndex(soundSegment(i).beginFrame, frameSize, overlap)/fs*[1,1], yBound, 'color', 'm');
line(frame2sampleIndex( soundSegment(i).endFrame, frameSize, overlap)/fs*[1,1], yBound, 'color', 'g');
end
axes3H=subplot(4,1,3);
plot(frameTime, zcr, '.-');
line([min(frameTime), max(frameTime)], zcrTh*[1 1], 'color', 'c');
axis([min(frameTime), max(frameTime), 0, max(zcr)]);
ylabel('ZCR'); title('Zero crossing rate'); grid on
% Plot end points
yBound = [0 max(zcr)];
for i=1:length(soundSegment),
line(frame2sampleIndex(soundSegment(i).beginFrame, frameSize, overlap)/fs*[1,1], yBound, 'color', 'm');
line(frame2sampleIndex( soundSegment(i).endFrame, frameSize, overlap)/fs*[1,1], yBound, 'color', 'g');
end
axes4H=subplot(4,1,4);
voicedIndex=endPoint(1):endPoint(2);
voicedTime=time(voicedIndex);
voicedY=y(voicedIndex);
voicedH=plot(voicedTime, voicedY);
axis([time(endPoint(1)), time(endPoint(2)), -2^nbits/2, 2^nbits/2]);
ylabel('Amplitude'); title('Voiced waveform'); grid on
U.y=y; U.fs=fs; U.nbits=nbits;
U.axes1H=axes1H; U.axes2H=axes2H; U.axes3H=axes3H; U.axes4H=axes4H;
U.voicedIndex=voicedIndex; U.voicedH=voicedH;
U.voicedY=voicedY; U.voicedTime=voicedTime;
set(gcf, 'userData', U);
uicontrol('string', 'Play all', 'callback', 'U=get(gcf, ''userData''); sound(U.y/(2^U.nbits/2), U.fs);');
uicontrol('string', 'Play voiced', 'callback', 'U=get(gcf, ''userData''); sound(U.voicedY/(2^U.nbits/2), U.fs);', 'position', [100, 20, 60, 20]);
% Play the segmented sound
% head = soundSegment(1).beginFrame*(frameSize-overlap);
% tail = min(length(y), soundSegment(end).end*(frameSize-overlap));
% thisY = y(head:tail);
% fprintf('His return to hear the cutted sound %g:', i);
% pause;
% fprintf('\n');
% wavplay(thisY, fs, 'sync');
% fprintf('\n');
end
% ====== Self demo
%function selfdemo
%waveFile='清华大学资讯系.wav';
%plotOpt = 1;
%[y, fs, nbits] = wavReadInt(waveFile);
%endPoint = feval(mfilename, y, fs, nbits, plotOpt);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -