trk_main.m

来自「这是一个用于语音信号处理的工具箱」· M 代码 · 共 188 行

188 行

%
%  Formant tracking using iterative GIF analysis
%  Modified by D. G. Childers August, 1998

speech1=SPEECH;

GVV=zeros(size(speech1));
DGVV=zeros(size(speech1));
RESIDUE=zeros(size(speech1));

S=sprintf('./track/out/%s_g.mat',name);
analy_1c;
pkpk_1c;
num_gci=length(gci);

clear gci_a;
gci_a = gci;
num_gci=length(gci_a);

dg_final=[];
g_final=[];
ext_left=0;
ext_right=0;
LPF3=fir1(3,0.5);
ff=zeros(num_gci,5);
fb=zeros(num_gci,5);

wait_counter = 0;
wait_window = figure('Numbertitle','off',...
   'Color',BACK_COLOR,...
   'Name','Calculating formant contours..Please wait',...
   'Position',[117 248 560 150]);
pause(0.01);

wait_window_slider = uicontrol('Style','Slider',...
   'Position',[50 100 410 30],...
   'Max',num_gci,...
   'Min',0,...
   'Value',wait_counter);

wait_window_display = uicontrol('Style','Text',...
   'Position',[50 50 410 30],...
   'Backgroundcolor','white',...
   'Foregroundcolor','blue',...
   'String','Calculating the formant frequencies and bandwidths for each pitch period');

wait_counter = 1;
set(wait_window_slider,'Value',wait_counter);
pause(0.1);
for cur_gci=1:num_gci-1
   ydata_a=SPEECH(gci_a(cur_gci):gci_a(cur_gci+1));
	ydata_b=SPEECH(gci_a(cur_gci)-ext_left:...
			gci_a(cur_gci+1)+ext_right);
	ydata_p=diff(ydata_a);
	ydata_lp=filter(LPF3,1,ydata_b);

	% Estimate glottal effect to speech
	ydata_w=hamming(length(ydata_a)).*ydata_a;
	Hg1=lpc(ydata_w,1);
	
	% Eliminate the estimated glottal contribution
	tmp=filter(Hg1, 1, ydata_lp);
	tmp_w=hamming(length(tmp)).*tmp;
	
	% The first estimate for the vocal tract
	Hvt1=lpc(tmp_w,num_poles);
	
	% Eliminate the effect of vocal tract
	tmp=filter(Hvt1,1,ydata_lp);
	
	% The first estimate for the glottal excitation
	g1=integ_1a(tmp);
	g1_w=hamming(length(g1)).*g1;
	
	% Second iteration begins here
	Hg2=lpc(g1_w,4);
	
	% Eliminate the estimated glottal contribution
	tmp=filter(Hg2,1,ydata_lp);
	tmp_w=hamming(length(tmp)).*tmp;
	
	% The final model for the vocal tract
	Hvt2=lpc(tmp_w,num_poles);
	[Fs Ws]=freqz(1,Hvt2,2048);
	[ff(cur_gci,:) fb(cur_gci,:)]=frm_trk2(Hvt2, Ts);
	
	% Eliminate the effect of vocal tract
	dg2=filter(Hvt2,1,ydata_lp);
	dg2=filter([1 -1],[1 -.99],dg2);
	
	% The final estimate for the glottal excitation
	g2=integ_1a(dg2);
	g2=filter([1 -1],[1 -.99],g2);
	
	% Connect each analysis frame
	dg_final=[dg_final ; dg2(ext_left+1:length(dg2)-ext_right)];
	g_final=[g_final ; g2(ext_left+1:length(g2)-ext_right)];
   wait_counter = wait_counter+1;
   set(wait_window_slider,'Value',wait_counter);
end
set(wait_window_display,'String','Thank you for your patience. Now printing the formant contour');
pause(0.01);

close(wait_window);
clear wait_window wait_counter;

dg_final = filter([1 -1],[1 -.99],dg_final);

% Set position and size of analysis window
PV=[270 44 515 268];
s2 = ' Formant Contours - Output';

% Open analysis window
while exist('pit_out_win_h')==1
   try1 = 'get(pit_out_win_h,''position'');';
   eval(try1,catch2);
   if check ==0
      clear pit_out_win_h;
      check = 1;
      break;
   end
   s1 = get(pit_out_win_h,'Name');
   if ~strcmp(s1,s2)
      clear pit_out_win_h;
      break;
   end
   figure(pit_out_win_h);
   break;
end;

if exist('pit_out_win_h')~=1;
	pit_out_win_h=figure('Position',PV,...
		'Resize','on',...
      'Numbertitle','off',...
      'Name',s2);
end
NEW_LEN=length(SPEECH);           %This is for the new method.  The old variable was LEN
pit_trk=pitchtrk(gci, NEW_LEN, Ts);%Changed LEN to NEW_LEN
l1=round(length(pit_trk)*LEFT/NEW_LEN);%Changed LEN to NEW_LEN
if l1==0
	l1=1;
end
t_pit_trk=zeros(1,length(pit_trk));
med_filter=zeros(1,5);
for k=1:length(pit_trk)-4
   med_filter(1:5)=pit_trk(k:k+4);%median filter, length 5, pit_trk
   t_pit_trk(k)=median(med_filter');
end
pit_trk=t_pit_trk;

perturb=zeros(1,length(pit_trk));
for k=2:length(pit_trk)
   perturb(k)=pit_trk(k)-pit_trk(k-1);
end
t_perturb=zeros(1,length(pit_trk));
med_filter=zeros(1,5);
for k=1:length(pit_trk)-4
   med_filter(1:5)=perturb(k:k+4);%median filter, length 5, perturb
   t_perturb(k)=median(med_filter');
end
perturb=t_perturb;

l2=round(length(pit_trk)*RIGHT/NEW_LEN);%Changed LEN to NEW_LEN
%subplot(4,1,1);
%vh=1:length(pit_trk);
%vh=vh/length(pit_trk)*NEW_LEN;
%plot(vh,pit_trk);
%xlabel('time');
%title('Pitch contour');
%subplot(4,1,2), plot(vh,perturb);
%title('Perturbation - order 1');
formtrk=frm_trk(gci_a, ff, Ts, RIGHT-LEFT);
[M,N]=size(formtrk);
%
%formtrk contains 5 formants for each sample value.
%create a new file with the first four formants sampled
%at every 100th sample so that this file is compatible 
%with the articulatory synthesizer.
%
formtrk_4=formtrk(k:100:M,1:4);


subplot(2,1,1), plot(formtrk_4);
title('Formant contours');
subplot(2,1,2), plot(SPEECH, 'color',LINE_COLOR);
title('Speech');

trk_main.m - 源码说明

本页面展示了「这是一个用于语音信号处理的工具箱」中的 trk_main.m 源码文件，采用 M 编程语言编写，共 188 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与语音信号处理相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?