📄 fbana.m
字号:
% Function: perform Formant Based Linear Prediction Analysis
function [voicetype,gci,ir,cofa,FF,FB,gm,gpcof,ncidx,ncgm]=fbana(signal,basic);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% %%
%%%%%%% Fixed-frame linear prediction analysis %%
%%%%%%% %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% The first step creates the following variables:
%
% cofa = LP coefficients.
% energy = energy of the signal in the analyzed frame
% rsd = resultant residue signal
% emp = first-order reflection coefficient.
% npow = square root of residue power.
% voicetype = 1 ==> voiced,
% = 0 ==> unvoiced/silence.
% nframe = number of frames.
% ntotal = number of samples.
%retrieve the basic specification
F_len=basic(5);
O_lap=basic(6);
Order=basic(4);
M_len=F_len-O_lap;
if (length(signal)<=Order)
disp('Error in [fbana] function');
error('Sorry, the length of signal must be longer than Order.');
end;
ntotal=length(signal);
nframe=floor(ntotal/M_len);
% allocate the vector space
cofa=zeros(nframe,Order+1);
energy=zeros(1,nframe);
emp=zeros(1,nframe);
compa=1e7; % define compa
sso=signal(1:F_len+Order);
[cofa1,emp(1),energy(1),rsd1,npow(1)]=lpc_h(sso,Order);
rsd1=sqrt(energy(1)/(rsd1*rsd1'))*rsd1;
cofa(1,:)=cofa1;
rsd(Order+1:F_len+Order)=rsd1;
ss1=rsd1(M_len+1:F_len);
for k=2:nframe-1
sso=signal((k-1)*M_len+1:k*M_len+Order+O_lap);
[cofa1,emp(k),energy(k),rsd1,npow(k)]=lpc_h(sso,Order);
%*** backward prediction ***%
%[cofa2,tmp]=lpc_h(rev(sso),Order);
%cofa1=polystab( (cofa1+cofa2)/2 );
%emp(k)=(emp(k)+tmp)/2;
%*** backward prediction ***%
rsd1=sqrt(energy(k)/(rsd1*rsd1'))*rsd1; % Normalize the LP gain
% Set the energy as the geometric mean of the energy terms for two
% individual subframes, each of which is of 100 samples (10ms).
energy1=sum( sso(1+Order:M_len/2+Order).^2 );
energy2=sum( sso(1+M_len/2+Order:M_len+Order).^2 );
energyidx(k)=sqrt(energy1*energy2);
ss2=rsd1(1:O_lap);
ss=(ss1.*(O_lap:-1:1)+ss2.*(1:O_lap))/(O_lap+1);
% Smooth the transition within the overlapped region
rsd((k-1)*M_len+Order+1:k*M_len+O_lap+Order)=[ss rsd1(O_lap+1:F_len)];
ss1=rsd1(1+M_len: M_len+O_lap);
cofa(k,1:length(cofa1))=cofa1;
end;
k=nframe;
sso=signal((k-1)*M_len+1:k*M_len+Order);
[cofa1,emp(k),energy(k),rsd1,npow(k)]=lpc_h(sso,Order);
rsd1=sqrt(energy(k)/(rsd1*rsd1'))*rsd1;
energy1=sum( sso(1+Order:M_len/2+Order).^2 );
energy2=sum( sso(1+M_len/2+Order+1: M_len+Order).^2 );
energyidx(k)=sqrt(energy1*energy2);
ss2=rsd1(1:O_lap);
ss=(ss1.*(O_lap:-1:1)+ss2.*(1:O_lap))/(O_lap+1);
rsd((k-1)*M_len+Order+1:k*M_len+Order)=[ss rsd1(O_lap+1:M_len)];
cofa(k,1:length(cofa1))=cofa1;
%------------------------
% Classify the voice type
%------------------------
voicetype(1)=0;
for kf=2:nframe-1
if emp(kf) > .3 & energyidx(kf) > 1.85*compa
voicetype(kf)=1;
else
voicetype(kf)=0;
end;
end;
voicetype(nframe)=0;
for k=2:nframe-1
if sum(voicetype(k-1:k+1)) > 1
voicetype(k)=1;
else
voicetype(k)=0;
end;
end;
%disp(' First-pass (frame-based) analysis is OK! '); toc;
%
% The first step creates the following variables:
% cofa = LP coefficients.
% energy = energy of the signal in the analyzed frame
% rsd = resultant residue signal
% emp = first-order reflection coefficient.
% npow = square root of residue power.
% voicetype = 1 ==> voiced,
% = 0 ==> unvoiced/silence.
% nframe = number of frames.
% ntotal = number of samples.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% %%
%%%%%%% Find Glottal closure index %%
%%%%%%% %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This step will create the following variables :
%
% ploc = glottal closure instant located in each frame
% ppa = estimated pitch period for each voiced frame.
% gci = glottal closure index
ploc=zeros(nframe,10);
ppa=zeros(1,nframe);
% Smooth integrated residue.
ir1=integ(rsd);
ir1=filtfilt([1 -1],[1 -.99],ir1);
ir=filtfilt([1 -1],conv([1 -.9],[1 -.7]),ir1);
% Perform frame-based pitch estimation.
% For details, please refer to Section 4.1.3 of Hu's disseration.
for kf=2:nframe-1
if voicetype(kf)
rr=ir(M_len*(kf-1)+Order+1+M_len/2-256:M_len*kf+Order-M_len/2+256);
ss=signal(M_len*(kf-1)+Order+1+M_len/2-256:M_len*kf+Order-M_len/2+256);
r=rr((1:256)*2);
r=hanning(256)'.*r;
rc=real(ifft(abs(fft(r))));
rc=[zeros(1,15) rc(16:128)];
[mm nn]=max(rc(1:125));
[m n]=max(rc(1:nn-12));
if m>.7*mm
nn=n;
end;
b=(rc(nn+1)-rc(nn-1))/2;
a=(rc(nn+1)+rc(nn-1))/2-rc(nn);
devia=-b/(2*a);
ppa(kf)=round((nn-1+devia)*2);
end;
end;
% Smooth the pitch contour to prevent unreasonable excursion.
ppsmooth=1;
if ppsmooth
for kf=3:nframe-1
if ppa(kf-1)&ppa(kf)
ppa1=ppa(kf-2:kf-1);
pplast=round(mean(ppa1(ppa1~=0)));
ppno=max([round(ppa(kf)/pplast) 1]);
if ppno~=1
ppa(kf)=round(ppa(kf)/ppno);
end;
end;
end;
end;
%disp('Coarse pitch estimation is done!');
for kf=2:nframe-1
if voicetype(kf)
ss=signal(M_len*(kf-1)+Order+1-M_len/2: M_len*kf+M_len/2+Order);
rr=rsd(M_len*(kf-1)+Order+1-M_len/2: M_len*kf+M_len/2+Order);
irload=ir(M_len*(kf-1)+Order+1-M_len/2: M_len*kf+M_len/2+Order);
[m nrr]=min(irload(M_len/2+1:3*M_len/2));
% Search for the minimum. The location of this minimum is considered
% the first GCI.
nrr=nrr+M_len/2;
if nrr==M_len/2+1
while irload(nrr-1)<m
m=irload(nrr-1);
nrr=nrr-1;
end;
elseif nrr==3*M_len/2
while irload(nrr+1)<m
m=irload(nrr+1);
nrr=nrr+1;
end;
end;
% Approximate the smoothed curve by two straight lines.
% Choose the intersection of these two lines as the GCI.
irthd=irload(nrr)/3;
nadd=1;
while irload(nrr+nadd)<irthd
nadd=nadd+1;
end;
xn1=0:nadd;
yn1=irload(nrr:nrr+nadd);
aa=polyfit(xn1,yn1,1);
nsub=1;
while irload(nrr-nsub)<irthd
nsub=nsub+1;
end;
x2=-nsub:0;
y2=irload(nrr-nsub:nrr);
bb=polyfit(x2,y2,1);
nrr=nrr+round((bb(2)-aa(2))/(aa(1)-bb(1)));
startp=M_len*(kf-1)+Order;
ptype=ir(startp+nrr-M_len/2-15:startp+nrr+30-M_len/2); % prototype
% Search for the other GCI's by examining the corresponding
% crosscorrelation.
for k=1-10:260
r1=ir(startp+(k-15:k+30));
% rs(k+10)=ptype*r1'/sqrt(r1*r1');
rs(k+10)=ptype*r1';
end;
nrs=nrr+10-M_len/2;
pp=ppa(kf);
pph=floor(pp*.35);
mid=nrs;
pk=mid+pp;
pptmp1=[];
k1=1;
% Search forward and backward within the range from 10 to (F_len+10).
while pk < (F_len+10)
range=pk-pph:min([pk+pph F_len+20]);
[m n]=max(rs(range));
pptmp1(k1)=n-1+pk-pph;
pk= pptmp1(k1)+pp;
k1=k1+1;
end;
pk=mid-pp;
pptmp2=[];
k2=1;
while pk > 10
range=max([1 pk-pph]):pk+pph;
[m n]=max(rs(range));
pptmp2(k2)=n-1+max([1 pk-pph]);
pk= pptmp2(k2)-pp;
k2=k2+1;
end;
pptmp=[rev(pptmp2) mid pptmp1]-10;
pptmp=pptmp(pptmp>=1 & pptmp<=(F_len-20));
pptmp=pptmp(rs(pptmp+10)>rs(nrs)*.2);
ploc(kf,1:length(pptmp))=pptmp;
end;
end;
% Eliminate the redundancy occuring at frame boundaries and smooth the
% pitch period if necessary.
ploc1=zeros(size(ploc));
gci=[];
for kf=2:nframe-1
if ploc(kf,1)
if ploc(kf-1,1)==0
ppup=ploc(kf,:);
else
ppup=ppdown;
end;
ppdown=ploc(kf+1,:);
ppmid=min(ppup(ppup>M_len)-M_len);
ppup=ppup(ppup>0 & ppup<=M_len);
if length(ppmid)==0
ppadd=max(ppup)-M_len;
else
ppadd=ppmid;
end;
ppdown=ppdown(ppdown>0);
ppmid1=max(ppdown(ppdown <= ppadd+25));
if length(ppmid1) & length(ppmid)
ppmid=round((ppmid+ppmid1)/2);
end;
ppdown=[ppmid ppdown(ppdown>ppadd+25)];
if length(ppup)
ploc1(kf,1:length(ppup))=ppup;
end;
Element=ploc1(kf,find(ploc1(kf,:)~=0));
gci=[gci Element+(kf-1)*M_len+Order];
end;
end;
%disp('GCI identification is done!'); toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% %%%%%%
%%%%%%% Formant Allocation %%%%%%
%%%%%%% %%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This step will create the following variables :
%
% FF = matrix of formant frequency
% FB = matrix of formant bandwidth
% Froot = matrix of formant roots
FF=zeros(nframe,5); % matrix for formant frequency
FB=zeros(nframe,5); % matrix for formant bandwidth
Froot=zeros(nframe,10); % matrix for formant roots
Fmethod=2;
%
% so far, Fmethod==2 produce the best synthetic speech
%
if Fmethod==1
%%----------------------------------------------------------------%
%% Fmethod=1 %
%% Directly split linear prediction frame by frame %
%% coefficients into 5 formant coefficients %
%%----------------------------------------------------------------%
for kf=1:nframe
cofa1=cofa(kf,:);
%[fpoly,bpoly,ff,fb]=splitlp3(cofa1); % without pole modification
[fpoly,bpoly,ff,fb]=splitlp5(cofa1); % with pole modification
fpoles=roots(fpoly)';
bpoles=roots(bpoly)';
Froot(kf,:)=fpoles;
FF(kf,:)=ff;
FB(kf,:)=fb;
end
% smooth the formant track
[FF,FB,Froot]=smfmtk(FF,FB);
elseif Fmethod==2
%%----------------------------------------------------------------%
%% Fmethod=2 %
%% Split linear prediction coefficients into formant and smooth %
%% the formant track at the same time %
%%----------------------------------------------------------------%
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -