📄 myvoicemark.m
字号:
%%% voice - unvoice speech part decision
%%% 2004-3jingjing:端点检测程序使,适应于多种采样率
function [T]=myvoicemark(x,Fs)
FrameLength=round(Fs*0.0025); %%2.5ms, Fs=16000时为40 samples
energy_threshold=0.01;
zero_threshold=3.5;
N=length(x);
%% cross zero rate
Len=round(20*Fs/16000);%过零率分析单元
for n=1:N-Len
cross=0;
if(x(n)>0)
flag=0;
else
flag=1;
end
for m=n+1:n+Len %求出短时(Len=20)的过零率
if(flag==0&x(m)<=0)
cross=cross+1;
flag=1;
elseif(flag==1&x(m)>0)
cross=cross+1;
flag=0;
end
end
crslt(n)=cross; %共求出N-Len帧短时过零率,存于crslt中
end
%% cross rate smoothing
C=fft(crslt); %过零率波形的谱,形状不平滑
N2=length(C); %N2=N-Len
C1=zeros(1,N2);
lp=floor(N2/200);
%去掉过零率波形中的高频,即fft谱中间置零
C1(1:lp)=C(1:lp); %将C的前面一段复制到C1中
C1(N2-lp:N2)=C(N2-lp:N2); %将C的后面一段复制到C1中
C2=ifft(C1); %C1的中间一大段都为0,应该是为了平滑
C2=real(C2);
%% mark the voice part of speech by cross-zero-rate
C3=zeros(1,N2);
for n=1:N2
if(C2(n)<zero_threshold)
C3(n)=1; %C3中保存的为判断是否超过零率门限的标志,超过置0,未超过置1
end
end
%%energy
for n=1:N-2*FrameLength %共有N-2*FrameLength帧
E(n)=0;
for m=n:n+2*FrameLength %帧长2*FrameLength+1
E(n)=E(n)+(x(m).^2);
end
end
%% energy smoothing
E1=fft(E);
N1=length(E1) ; %N1=N-3*FrameLength
lp2=floor(N1/200);
E2=zeros(1,N1);
E2(1:lp2)=E1(1:lp2);
E2(N1-lp2:N1)=E1(N1-lp2:N1);
E3=ifft(E2); %平滑算法同过零率的平滑
%% energy normalization
E3=real(E3);
E3=E3./max(E3);
%% mark the voice part of speech by energy
%% be sure N1<N2
E4=zeros(1,N2);
for n=1:N1
if(E3(n)>energy_threshold)
E4(n)=1; %E4中保存的为判断是否超能量门限的标志,超过置1,未超过置0(对前N1个元素)
end
end
v=E4&C3; %% both zero cross rate and energy decision
if(v(1)==1)%v(1)=1则起始点为浊音点
flag=1; %v(1)同时超能量门限和不超过零率门限则flag置1,否则置0
else
flag=0;
end
cross2=0;%cross2记录v总共的过零次数
for n=2:N2
if(flag==1&v(n)==0)
flag=0;
cross2=cross2+1;
pt(cross2)=n;
end
if(flag==0&v(n)==1)
flag=1;
cross2=cross2+1;
pt(cross2)=n;
end
end %pt保存的是cross2分别为1、2……最开始的n值
T=[];
if(v(1)==1&(mod(cross2,2)==0)) %% High, even
b=1;
e=pt(1); %e为cross2等于1时的序号
if((e-b)>8*FrameLength)
T=[T [b,e]'];
end
pt(cross2+1)=N2; %% add a psudo point 加个伪点:序号为N2时等于cross2+1(因为是偶数)
for n=2:2:cross2
b=pt(n); %b为cross2等于n时的序号
e=pt(n+1); %b为cross2等于n+1时的序号
if((e-b)>8*FrameLength)
T=[T [b,e]'];
end
end
elseif(v(1)==1&(mod(cross2,2)==1)) %% High, odd
b=1;
e=pt(1);
if((e-b)>8*FrameLength)
T=[T [b,e]'];
end
for n=2:2:cross2-1
b=pt(n);
e=pt(n+1);
if((e-b)>8*FrameLength)
T=[T [b,e]'];
end
end
elseif(v(1)==0&(mod(cross2,2)==0)) %% Low, even
for n=1:2:cross2-1
b=pt(n);%起点
e=pt(n+1);%终点
if((e-b)>8*FrameLength)%8*FrameLength在Fs=16000时即为20ms帧长
T=[T [b,e]'];
end
end
elseif(v(1)==0&(mod(cross2,2)==1)) %% Low, odd
pt(cross2+1)=N2;
for n=1:2:cross2
b=pt(n);
e=pt(n+1);
if((e-b)>8*FrameLength)
T=[T [b,e]'];
end
end
end
%判断两组分割点的间隔
[M,N]=size(T);
flag=0;%表示最后一列未被存储过
if N>1
T_temp=[];
for n=1:N-1
if T(1,n+1)-T(2,n)>8*FrameLength
T_temp=[T_temp,T(:,n)];
flag=0;
else
T_temp=[T_temp,[T(1,n),T(2,n+1)]'];
flag=1;
end
end
if flag==0
T_temp=[T_temp,T(:,n+1)];
end
T=T_temp;
end
%T %每一列应该是标注语音的始末点
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -