vdet5st.m

来自「这是一个用于语音信号处理的工具箱」· M 代码 · 共 256 行
256 行
disp(' ');
disp('SCRIPT:   vdet5st.m ***********************************************');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 	jmw 	1/17/94
%		1/12/94
%	
% 	playing with Weinstein, et al., algorithm for parsing based
% 	upon RMS values of BP filters
% 	gain calculated from sqrt of res NRG
%
%	this script atemps to classify voiced vowels from voiced consonants
%
%	THIS VERSION ALSO ADDS: multiple thresholds to better determine 
%	accurate v/c transition location...
%	
%	AND LOADS RESULTS FROM r1_ratio3st (sonorant score) to
%	eliminate non-sonorant segments being classified as vowels ...
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

		PLT = 1; 	% flag to plot results

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

file_string = sprintf('temp/%s.mat', name);
s=sprintf('loading ./%s from hard disk ...',file_string);
disp(s);
s=sprintf('load %s', file_string);
eval(s);
signal = eval(name);

file_string = sprintf('temp/%s_Data.mat', name);
s=sprintf('loading ./%s from hard disk ...',file_string);
disp(s);
s=sprintf('load %s', file_string);
eval(s);

file_string = sprintf('temp/%s_SONscore.mat', name);
s=sprintf('loading ./%s from hard disk ...',file_string);
disp(s);
s=sprintf('load %s', file_string);
eval(s);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% compute freq response
N=256;

start1 = 2;
stop1 = 52;
start2 = 53;
stop2=N;

[m,n]=size(cofa);
vc = zeros(m,1);
e1 = zeros(m,1);
e2 = zeros(m,1);
gain = zeros(m,1);

for i=1:m-1,

	if(VUS_voicetype(i)=='v' | VUS_voicetype(i+1)=='v'   )
		a = range(i,1);
		b = range(i,2);
	
		%res_nrg = residue(a:b) * residue(a:b)' ;
		%gain(i) =	sqrt(res_nrg); 

		gain(i) = 1;		% gain changed 9/30/94 jmw
	
		[h,w]=freqz(gain(i),cofa(i,:),N);
		mag=abs(h);
	
		e1(i)=sqrt(mag(start1:stop1)' * mag(start1:stop1));
		e2(i) = sqrt(mag(start2:stop2)' * mag(start2:stop2));
		vc(i) = e1(i) /e2(i);
	else
		;
	end;
end;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% smooth vc ratio
MFO = 5; % changed from 7 back to 5 2/21/94	% median filter order
s=sprintf('smoothing vc ratio with median filter of order %d ...', MFO);
disp(s);
vc=median1(vc,MFO);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% calculate (voiced) vowel score

v_thresh_LOW = 8;			% thresholds - empirically determined
v_thresh_HI  = 18;
R = v_thresh_HI - v_thresh_LOW;		% R = "range"
v_score = zeros(1,m);

for i=1:m,
	if (VUS_voicetype(i)=='v')
		if (vc(i) <=  v_thresh_LOW)
			v_score(i) = 1;
		elseif (vc(i) < v_thresh_HI)
			v_score(i) = (v_thresh_HI - vc(i)) / R;
		else
			v_score(i) = 0;
		end;		
	end;
end;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% eliminate non-sonorant frames classified as vowels
disp('removing non-sonorant frames classified as vowels ...');

NS_thresh = 0.5;	% empirically determined (halfway point)

for i=1:m,
	if (son_score(i) < NS_thresh)
		v_score(i) = 0;
	end;
end;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% segment

seg_cnt = 0;
flg = 1;

for i = 1:m,
        if ( v_score(i) > 0 & flg == 1)
                % start new segment
                seg_cnt = seg_cnt + 1;
                seg_frames(seg_cnt,1) = i;
                flg = 0;
        elseif ( v_score(i) > 0 & flg==0)
                % continue in current segment
                seg_frames(seg_cnt,2) = i;
        elseif ( v_score(i) == 0 & flg==0 )
                % segment just ended on previous frame - reset flag
                seg_frames(seg_cnt,2) = i-1;
                flg = 1;
        end;
end;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% discard segments that are tooooo short 

events = 0;
VWL_thresh = 150;		% - empirically determined

if ( seg_cnt >0)
        % segments do exist
        for i=1:seg_cnt,
                a=range(seg_frames(i,1),1);
                b=range(seg_frames(i,2),2);
                if ( (b-a) < VWL_thresh)
                        % discard - too short
                        events = events + 1; % counter of segments eliminated
                        a=seg_frames(i,1);
                        b=seg_frames(i,2);
                        v_score(a:b) = zeros(1,b-a+1);
                end;
        end;
end;

s=sprintf('eliminating vowel segments less than');
s1 = sprintf(' %d samples: ... %d events',VWL_thresh,events);
s=[s s1];
disp(s);
		
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if (PLT)

	titles = 1;	% turn off for diss figs

	disp('plotting results ...');

	h=gcf;
	figure(h);
	clf;

	subplot(311);
	plot(signal);
	grid;
	axis_1=axis;
	axis(axis_1);
	s=sprintf('%s', name);
	if (titles)
		title(s);
	end;

	subplot(312);
	stairs(range(:,1),vc);
	grid;
	axis([axis_1(1) axis_1(2) 0 40]);
s=sprintf('smoothed vc ratio (0-1000)/(1000-5000) - order %d median filt', MFO);
	hold on;
	%plot(range(:,1), ( v_thresh_HI * ones(1,m)), ':');
	%plot(range(:,1), ( v_thresh_LOW * ones(1,m)), ':');
	plot( ( v_thresh_HI * ones(1,axis_1(2))), '--');
	plot(( v_thresh_LOW * ones(1,axis_1(2))), '--');
	hold off;
	if (titles)
		title(s);
	end;

	subplot(313);
	stairs(range(:,1),v_score);
	grid on;
	axis([axis_1(1) axis_1(2) -0.5 1.5]);
	if (titles)
		title('Vowel Score (voiced segments only)');
	end;

	drawnow;
end;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% save to disk
vwl_score = v_score;
s=sprintf('saving ./%s_VWLscore to disk ...', name);
disp(s);
s=sprintf('save temp/%s_VWLscore vwl_score', name);
eval(s);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% clean up time

clear MFO                 flg                 signal              
clear N                   gain                son_score           
clear NS_thresh           h                   start1              
clear PLT                 i                   start2              
clear R                   m                   stop1               
clear VUS_voicetype       mag                 stop2               
clear VWL_thresh          n                   v_score             
clear a                                       v_thresh_HI         
clear axis_1              power               v_thresh_LOW        
clear b                   range               vc                  
clear                     res_nrg             voicetype           
clear cofa                residue             vwl_score           
clear e1                  s                   w                   
clear e2                  s1                  PLT
clear events              seg_cnt             
clear file_string         seg_frames		titles

s=sprintf('clear %s',name);
eval(s);
clear s
vdet5st.m - 源码说明

本页面展示了「这是一个用于语音信号处理的工具箱」中的 vdet5st.m 源码文件，采用 M 编程语言编写，共 256 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与语音信号处理相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?