📄 vc2.m
字号:
% Script: vc2.m is an m-file to execute the voice conversion algorithm which
% matches the speaking rate to the source speaker.
%
% vc2.m is a call function evoked by vcapp.m on figure(vwin_f)
%-----------------------------------------------------------------------
% time-align speech2 to match to the pace of speech1
%-----------------------------------------------------------------------
disp('Align the source parameter in time axis...')
% construct the warping path
x_ref=[];
for k=1:length(vcidx11)
x_ref=[x_ref vcidx11(k):1:vcidx12(k)];
end
[dum,tM2]=size(road);
road1=road;
del_idx=[];
for k=2:tM2
if road(2,k)-road(2,k-1)==0
del_idx=[del_idx k];
end
end
road1(:,del_idx)=[];
y_ref=interp1(road1(2,:),road1(1,:),x_ref);
y_ref=round(y_ref);
road1=[x_ref; y_ref];
% 1 calculate pitch period (pp1) for each frame for speech1
[M1,dum]=size(cofa1);
pp1=zeros(1,M1);
for k=1:length(vcidx11)
s_fram=vcidx11(k);
e_fram=vcidx12(k);
for kf=s_fram:e_fram
spoint=(kf-1)*m_len+1;
epoint=spoint+2*m_len+Order;
pitchs=diff( gci1( gci1>spoint & gci1<epoint ) );
if isempty(pitchs)
pp1(kf)=pp1(kf-1);
else
pp1(kf)=mean(pitchs);
end
end
end
% 2 calculate pitch period (pp2) for each frame for speech2
[M2,dum]=size(cofa2);
pp2=zeros(1,M2);
for k=1:length(vcidx21)
s_fram=vcidx21(k);
e_fram=vcidx22(k);
for kf=s_fram:e_fram
spoint=(kf-1)*m_len+1;
epoint=spoint+2*m_len+Order;
pitchs=diff( gci2(gci2>spoint & gci2<epoint ) );
if isempty(pitchs)
pp2(kf)=pp2(kf-1);
else
pp2(kf)=mean(pitchs);
end
end
end
clear pitchs;
% 1.3 collect the pitch period data ****
pp1_ref=pp1( x_ref );
pp2_ref=pp2( y_ref );
% 1.4 collect the ngm data ****
tM2=length(x_ref);
tmp=ngm1(x_ref, : );
tmp1=tmp(1:4*tM2);
ngm1_ref(1)=mean( tmp1(1:4) );
for kf=2:tM2-1
ngm1_ref(kf)=mean( tmp1( kf*4-4 : kf*4+1 ) );
end
ngm1_ref(tM2)=mean( tmp1(4*tM2-3:4*tM2) );
tmp=ngm2(y_ref, : );
tmp2=tmp(1:4*tM2);
ngm2_ref(1)=mean( tmp2(1:4) );
for kf=2:tM2-1
ngm2_ref(kf)=mean( tmp2( kf*4-4 : kf*4+1 ) );
end
ngm2_ref(tM2)=mean( tmp2(4*tM2-3:4*tM2) );
clear tmp tmp1 tmp2;
% 1.5 collect the gpcf1 data ****
gpcf1_ref=gpcf1( x_ref,: );
gpcf2_ref=gpcf2( y_ref,: );
% 1.6 collect the FF1 data ****
if vt==2
FF1_ref=FF1( x_ref, :);
FF2_ref=FF2( y_ref, :);
end
%-----------------------------------------------------------------------%
% Voice Conversion Algorithm %
% method=1 bias model %
% method=2 linear model %
% method=3 just copy the value from target %
% method=4 do not modify the value of the source but time-align %
%-----------------------------------------------------------------------%
% the following acoustic features (parameters) are created
% vctyp3 = voicetype classifications
% gci3 = gci index
% cofa3 = LP coeffiecints
% gm3 = voiced gain contour
% gpcf3 = glottal pulse shape
% nidx3 = stochastic codebook index
% ngm3 = whole gain index
% FF3 = formant frequency
% FB3 = formant bandwidth
%--------------------------------------------------------
%************ copy vctyp3, cofa3 and nidx3 **************
vctyp3=vctyp1;
cofa3=cofa1;
nidx3=nidx1;
%---------------------------------------------------------
%************** convert gci1 to match gci2 **************
% find the starting and ending GCI points for the source speech
for k=1:length(vcidx11)
spoint=(vcidx11(k)-1)*m_len-m_len/4;
[dum,idx]=min( abs(gci1-spoint) );
gci_s(k)=gci1(idx); % starting GCI point
epoint=(vcidx12(k)-1)*m_len+5*m_len/4;
[dum,idx]=min( abs(gci1-epoint) );
gci_e(k)=gci1(idx); % ending GCI point
end
gci3=[];
gci_method=get(v_pp_pit,'Value');
if gci_method==1
out=lrest(pp1_ref, pp2_ref, 0, SHOW);
b=out(1);
pp3_ref=pp1_ref+b;
kk=0;
for k=1:length(vcidx11)
s_fram=vcidx11(k);
e_fram=vcidx12(k);
spoint=gci_s(k);
for kf=s_fram:e_fram
kk=kk+1;
epoint=kf*m_len+2*Order+1;
if kf==e_fram
epoint=gci_e(k);
end
p00=pp3_ref( kk );
gci_insert=gci_dis(p00,0,spoint,epoint);
gci3=[gci3 gci_insert];
o_spoint=spoint;
spoint=max(gci_insert);
if isempty(spoint)
spoint=o_spoint;
end
end
clear s_fram e_fram
end
disp('The bias model is used to convert the pitch contour.');
disp(b);
disp('Normalized Standard Deviation=');
disp(out(3)/mean(pp2_ref));
elseif gci_method==2
out=lrest(pp1_ref,pp2_ref,1,SHOW);
a=out(2);
b=out(1);
pp3_ref=a*pp1_ref+b;
kk=0;
for k=1:length(vcidx21)
s_fram=vcidx21(k);
e_fram=vcidx22(k);
spoint=gci_s(k);
for kf=s_fram:e_fram
kk=kk+1;
epoint=kf*m_len+2*Order+1;
if ( kf==e_fram )
epoint=gci_e(k);
end
p00=pp3_ref(kk);
gci_insert=gci_dis(p00,0,spoint,epoint);
gci3=[gci3 gci_insert];
o_spoint=spoint;
spoint=max(gci_insert);
if isempty(spoint)
spoint=o_spoint;
end
end
end
disp('The linear model is used to convert the pitch contour.');
disp([a b]);
disp('Normalized Standard Deviation=');
disp(out(3)/mean(pp2_ref));
elseif gci_method==3
pp3_ref=pp2_ref;
kk=0;
for k=1:length(vcidx11)
s_fram=vcidx11(k);
e_fram=vcidx12(k);
spoint=gci_s(k);
for kf=s_fram:e_fram
kk=kk+1;
epoint=kf*m_len+2*Order;
if kf==e_fram
epoint=gci_e(k);
end
p00=pp3_ref( kk );
gci_insert=gci_dis(p00,0,spoint,epoint);
gci3=[gci3 gci_insert];
o_spoint=spoint;
spoint=max(gci_insert);
if isempty(spoint)
spoint=o_spoint;
end
end
end
elseif gci_method==4
gci3=gci1;
pp3_ref=pp1_ref;
end
if SHOW==1
figure(dtwdisplay_f);
plot(x_ref,pp1_ref,'y',x_ref,pp3_ref,'r',x_ref,pp2_ref,'g');
title('pitch conversion (source: yellow ; converted: red ; target:green)');
pause(1);
end
clear gci_insert p00 pp1 pp2 pp1_ref pp2_ref pp3_ref tmp;
%---------------------------------------------------
%************ convert ngm1 to match ngm2 **********
ngm3=ngm1;
ngm_method=get(v_pp_gai,'Value');
if ngm_method==1
out=lrest(ngm1_ref, ngm2_ref, 0, SHOW)
b=out(1);
ngm3_ref=abs(ngm1_ref+b);
x_ref=road1(1,:);
tM2=length(x_ref);
tmp_gm=zeros(tM2,4);
% ---interpolate ngm3---
s_fram=1;
for k=1:length(vcidx11);
e_fram=s_fram+vcidx12(k)-vcidx11(k);
% smoothing the starting frame
kf=s_fram;
ngm3_ref(kf)=0.13*ngm3_ref(kf+1);
gm_x=[1 m_len/2 m_len+m_len];
gm_y=[0 ngm3_ref(kf) ngm3_ref(kf+1)];
gm_z=m_len/4*[0 1 2 3]+m_len/8;
tmp=interp1(gm_x,gm_y,gm_z,'spline');
tmp_gm(kf,1:4)=tmp(:)';
for kf=s_fram+1:e_fram-1
gm_x=[-1*m_len/2 m_len/2 m_len+m_len];
gm_y=[ngm3_ref(kf-1) ngm3_ref(kf) ngm3_ref(kf+1)];
gm_z=m_len/4*[0 1 2 3]+m_len/8;
tmp=interp1(gm_x,gm_y,gm_z);
tmp_gm(kf,1:4)=tmp(:)';
end
% smoothing the ending frame
kf=e_fram;
ngm3_ref(kf)=0.13*ngm3_ref(kf-1);
gm_x=[-1*m_len/2 m_len/2 m_len];
gm_y=[ngm3_ref(kf-1) ngm3_ref(kf) 0];
gm_z=m_len/4*[0 1 2 3]+m_len/8;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -