vcapp.m
来自「这是一个用于语音信号处理的工具箱」· M 代码 · 共 211 行
M
211 行
% Script: vccapp.m is an m-file to execute the voice conversion algorithm.
%
% vcapp is a call function evoked by "apply_pb" on figure(vwin_f)
if apply==0 % cancel the setting
set(v_pp_pth,'Value',1);
set(v_pp_meas,'Value',1);
set(v_pp_pit,'Value',1);
set(v_pp_gai,'Value',1);
set(v_pp_glo,'Value',1);
set(v_pp_ff,'Value',1);
set(v_pp_typseg,'Value',1);
set(speech2_pb,'String','load analyzed file');
set(speech1_pb,'String','load analyzed file');
return;
end
%-----------------------------------------------------------------------------
% Use Dynamic Time Warping technique to find the time-aligned warping function
%-----------------------------------------------------------------------------
% 0. get the user defined Dynamic Time Warping settings
% basic=[measure constraint pAth Order fram_len over_len lens]
% 0.1 get the distortion measure method
val=get(v_pp_meas,'Value');
if val==1
measure=1; %==> log spectral
elseif val== 2
measure=2; %==> weighted cepstral;
elseif val==3
measure=3; %==> Itakura measure;
end
% 0.2 get the selected-path value
pAth=get(v_pp_pth,'Value');
% 0.3 get the display instruction
SHOW=get(v_pp_sho,'value');
if SHOW==1 %% display the DTW searching process
set(dtwdisplay_f,'Visible','on');
figure(dtwdisplay_f);
end
% 0.4 seperate the voiced region from the unvoiced region
% since only the voiced utterance are considered by DTW
%
vcidx=find(vctyp1>0); % source speech
vcidx11=vcidx(1);
inspect=find(diff(vcidx)>1);
if isempty(inspect)
vcidx12=vcidx(length(vcidx));
else
vcidx11=[vcidx11 vcidx(inspect+1)];
vcidx12=[vcidx(inspect) vcidx(length(vcidx))];
end
vcidx=find(vctyp2>0); % target speech
vcidx21=vcidx(1);
inspect=find(diff(vcidx)>1);
if isempty(inspect)
vcidx22=vcidx(length(vcidx));
else
vcidx21=[vcidx21 vcidx(inspect+1)];
vcidx22=[vcidx(inspect) vcidx(length(vcidx))];
end
clear vcidx inspect;
% 0.5 do DTW for each voiced segment
disp('DTW starts...')
road=[];
for k=1:length(vcidx11)
% 1. prepare speech1 (source) for DTW
% get rid of the starting and ending unvoiced regions
COF1=cofa1(vcidx11(k): vcidx12(k),:);
SIG1=speech1( (vcidx11(k)-1)*m_len+1: vcidx12(k)*m_len+Order+1);
% prepare speech2 (target) for DTW
% get rid of the starting and ending unvoiced region
COF2=cofa2(vcidx21(k): vcidx22(k),:);
road1=[];
% search the optimal path according to the local path constraints
if pAth==1
road1=path1(COF2,COF1,SIG1,[measure 1 pAth basic(4:7)],SHOW);
elseif pAth==2
road1=path2(COF2,COF1,SIG1,[measure 1 pAth basic(4:7)],SHOW);
elseif pAth==3
road1=path3(COF2,COF1,SIG1,[measure 1 pAth basic(4:7)],SHOW);
elseif pAth==4
road1=path4(COF2,COF1,SIG1,[measure 1 pAth basic(4:7)],SHOW);
elseif pAth==5
road1=pathita(COF2,COF1,SIG1,[measure 1 pAth basic(4:7)],SHOW);
elseif pAth==6
road1=pathita1(COF2,COF1,SIG1,[measure 1 pAth basic(4:7)],SHOW);
end
pause(1);
s1_fram=vcidx11(k);
s2_fram=vcidx21(k);
road1(2,:)=road1(2,:)+s1_fram-1; % source
road1(1,:)=road1(1,:)+s2_fram-1; % target
road=[road road1];
clear COF1 COF2 SIG2 road1;
end
plot(road(1,:),road(2,:),'k+');
xlabel('target');
ylabel('source');
title('warping path')
drawnow;
if get(v_pp_srate,'Value')==1
if get(v_pp_typseg,'Value')==1
vc1;
else
if get(v_pp_thres,'Value')==1
threshold=0.2;
elseif get(v_pp_thres,'Value')==2
threshold=0.5;
else
threshold=0.8;
end
seg=segment(speech2,basic,threshold);
if get(v_pp_typseg,'Value')==2
old_window=gcf;
hola=figure('Unit','normalized',...
'Position',[0.165 0.4 0.55 0.5],...
'Resize','on',...
'Color','white',...
'Numbertitle','off',...
'Name','Segmentation');
figure(hola);
segmentation=zeros(size(speech2));
for k=1:length(seg)
segmentation(seg(k)*(basic(5)-basic(6)))=10000;
end
w=1:length(speech2);
plot(w,speech2,w,segmentation);
figure(old_window);
vcs1;
else
segcheck;
end
end
else
if get(v_pp_typseg,'Value')~=1
wartyp_f=figure('Units','Normalized',...
'Numbertitle','off',...
'Position',[.265 .4 .3 .2 ],...
'Color','white',...
'Name','Formant Track Display');
text=uicontrol('Style','text',...
'Units','normalized',...
'Position',[0.1 0.61 0.8 0.15],...
'BackGroundColor','white',...
'String','Sorry, segmentation is not available');
text=uicontrol('Style','text',...
'Units','normalized',...
'Position',[0.1 0.5 0.8 0.15],...
'BackGroundColor','white',...
'String','with source speaking rate.');
return_pb=uicontrol('Style','Pushbutton',...
'Units','normalized','Position',[.2 .3 .6 .2],...
'Callback','close(wartyp_f);',...
'String','Return');
else
vc2;
end
end
%create message window
message_window = figure('Numbertitle','off',...
'Color',BACK_COLOR,...
'Name','Done',...
'Position',[117 248 560 150]);
message_window_display = uicontrol('Style','Text',...
'Position',[50 50 410 70],...
'Backgroundcolor','white',...
'Foregroundcolor','blue',...
'String','Voice conversion is completed. You may run the synthesizer after you close this window. Or if you selected manual segmentation and are using the formant tract model, you may correct the number of segments first. Then new synthesis calculations are made, after which you may select synthesize. There is no new message. Please close this window before continuing. ');
%pause(15.);
%close(message_window);
%clear message_window;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?