lpsyn.m
来自「这是一个用于语音信号处理的工具箱」· M 代码 · 共 352 行
M
352 行
% Function: perform Formant Based Linear Prediction Speech Synthesis.
%
function [syns,exc12]=lpsyn(voicetype,gci,cofa,gm,gpcof,ncidx,ncgm,basic);
%retrieve the basic specification
F_len=basic(5);
O_lap=basic(6);
Order=basic(4);
M_len=F_len-O_lap;
[nframe,dum]=size(cofa);
ntotal=M_len*nframe+Order;
%--- Select Glottal Source model
%---- Smodel==1 --> 6 order polynomial model (sounds better)
%---- Smodel==2 --> LF model
Smodel=basic(3);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% %%
%%%%%%% Synthesis (voiced part) %%
%%%%%%% %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Function : synthesize speech by interplaying the analysis results
% under the structure of the proposed LP speech production model.
disp('LP Synthesis starts...');
signal1=zeros(1,ntotal);
exc12=zeros(1,ntotal); % 'exc12' = synthetic excitation.
gcof=zeros(1,7); % 'gcof' = smoothed polynomial.
pcofa=cofa(1,:); % 'pcofa' = LP coefficients of the previous frame.
gp=[0 -1 0 0 0 0 0 0 0 0];
Newstart=1;
numgci=length(gci);
if numgci>2
lenss=gci(2)-gci(1);
end
for nk=1:numgci
% if(nk==415)
% disp(nk);
% end
startp=gci(nk)+1; % starting point of the pitch period
kf=fix( (startp-Order-1)/M_len )+1; % frame number
% last gci
if nk==numgci
endp=startp+lenss-1; % ending point of the pitch period
else
endp=gci(nk+1); % ending point of the pitch period
olens=lenss;
lenss=gci(nk+1)-gci(nk);
end
% voiced-to-unvoiced transition
if lenss > F_len
if voicetype(kf)==1 & voicetype(kf+1)==0
lenss=olens;
endp=startp+lenss-1;
end
end
if voicetype(kf)==1 % construct the voiced excitation wave
% 0.0 take care of the unvoiced to voiced transition
%---------------------------------------------------
if ( voicetype(kf-1) + signal1(startp-2) )==0;
pcofa=cofa(kf-1,:);
gcof=zeros(1,7);
Newstart=1;
end
% 1.0 Interpolate LP coefficients; for details, please see
% Section 4.2. of Dr. Hu's dissertation.
%---------------------------------------------------------
cf0=cofa(kf-1,:); % previous frame \
cf1=cofa(kf,:); % current frame > LP coefficients
cf2=cofa(kf+1,:); % next frame /
pt1=startp+1-(kf-1)*M_len-Order;
pt2=endp+1-(kf-1)*M_len-Order;
rw0=1/(abs(pt1+M_len-F_len/2+1)+abs(pt2+M_len-F_len/2+1))^2;
rw1=1/(abs(pt1-F_len/2)+abs(pt2-F_len/2))^2;
rw2=1/(abs(pt1-F_len/2-M_len)+abs(pt2-F_len/2-M_len))^2;
rwall=rw0+rw1+rw2;
w0=rw0/rwall;
w1=rw1/rwall;
w2=rw2/rwall;
ocofa=pcofa;
pcofa=polystab(w0*cf0+w1*cf1+w2*cf2);
% 2.0 create the glottal pulse
%-----------------------------
% interpolate the glottal phase
if gcof(1)==0
gcof=0.5*gpcof(kf,:)+0.5*mean(gpcof);
elseif fix(endp/M_len)==kf % ending point in next frame
gcof=0.5*gcof+0.5*gpcof(kf+1,:);
else
gcof=0.5*gcof+0.5*gpcof(kf,:);
end
% generate the pulse by source model
o_gp=gp;
if Smodel==1
gp=gen_dgf1(gcof,lenss,1);
% gen_dgf1 <--> polym1
elseif Smodel==2
% generate LF pulse
gp=lfpuls(gcof(1),gcof(2),gcof(3),gcof(4),gcof(5),lenss+1,1);
% differentiate the pulse for LP synthesis
gp=diff(gp);
gp=gp-mean(gp);
end
if isempty(gp)
len1=length(o_gp);
gp=interp1(0:1/(len1-1):1,o_gp, 0:1/(lenss-1):1 );
gp=gp(:)';
end
% simulate the source-tract interaction
%dcof2=conv(pcofa.*((0.8).^(0:length(ocofa)-1)),[1 -.7]);
%ncof2=conv(pcofa.*((0.7).^(0:length(pcofa)-1)),[1 -.8]);
%ncof1=conv(pcofa.*((0.7).^(0:length(pcofa)-1)),[1 -.8]);
%dcof1=conv(ocofa.*((0.8).^(0:length(pcofa)-1)),[1 -.7]);
%len1=floor(lenss/2)-1;
%gp=cshift(gp,len1);
%gp1=gp.*hanning(lenss)';
%gp(len1+1:lenss)=gp(len1+1:lenss)-gp1(len1+1:lenss);
%gp=cshift(gp,-len1);
%gp1=[zeros(1,lenss-len1) gp1 zeros(1,len1)];
%gp2=filter(ncof2,dcof2,gp1);
%gp3=rev(filter(ncof1,dcof1,rev(gp1)));
%gp=gp+gp2(lenss+1:2*lenss);
%gp=gp+gp2(lenss+1:2*lenss)+gp3(1:lenss);
%gp1=[gp(lenss) gp];
%gp1=filter([1 -autoc2(gp1)],1,gp1);
%gp=gp1(2:lenss+1);
%gp=gp-mean(gp);
% 3.0 filter the pulse and determine the excitation gain
%-------------------------------------------------------
filt_len=length(pcofa)-1;
Ziy=signal1(startp-1:-1:startp-filt_len);
Smethod=1; %%%% Smethod==1 sounds a little better!!
if Smethod==1
%****************************
% Smethod==1 Dr.Hu's method
if Newstart==1
cofa1=pcofa;
cofa1(lenss)=0;
sscon=real(ifft(fft(gp)./fft(cofa1)));
amp=sqrt(gm(nk)*lenss/(sscon*sscon'));
signal1(startp:endp)=sscon*amp;
pwfsm=0;
Newstart=0;
else
Ziydc=mean(Ziy);
fxac=filt(1,ocofa,zeros(size(gp)),Ziy-Ziydc);
% 'fxac' = deviation signal.
fxdc=filt(1,ocofa,zeros(size(gp)),ones(size(Ziy)));
% 'fxdc' = mean value signal.
fx=fxac+Ziydc*fxdc;
gx=filter(1,pcofa,gp);
gxtemp=gx;
ncomp=0;
while ncomp<=5
Ziy1=gxtemp(lenss:-1:lenss-filt_len+1);
dcamp=mean(Ziy1);
Ziy1=Ziy1-dcamp;
%tamp=sqrt((Ziy1*Ziy1')/(Ziy*Ziy'));
%gxtemp=gx+tamp*fxac+dcamp*fxdc;
gxtemp=gx+fxac+dcamp*fxdc;
ncomp=ncomp+1;
end;
pwf=sqrt((gx*gx')/(gxtemp*gxtemp'));
if pwfsm==0
pwfsm=pwf;
end;
smf=.7;
pwfsm=(1-smf)*pwf+smf*pwfsm;
amp=sqrt(gm(nk)*lenss/(gx*gx'))*pwfsm;
signal1(startp:endp)=amp*gx+fx;
exc12(startp:endp)=gp*amp;
end
%************************************************
% Smethod=2. append zeros in the end excitation pulse
elseif Smethod==2
gp=[gp zeros(1,lenss)];
gx=filter(1, pcofa, gp);
sig_x=gx(lenss+1:2*lenss); % gpulse running into next segment
sig_r=gx(1:lenss); % synthetic speech of current segment
sig_o=signal1(startp:endp); % energy from previous pulse
Pr=gm(nk)*lenss; % gain
Pr1=Pr-sum(sig_o.^2);
loop=1;
while Pr1<0 & loop<100
sig_o=(0.98:-0.08/(lenss-1):0.9).*sig_o;
signal1(startp:endp)=sig_o;
Pr1=Pr-sum(sig_o.^2);
loop=loop+1;
end
if Pr1<0
amp=0.01;
else
amp=sqrt( Pr1/(sig_r*sig_r') );
end
sig_r=sig_r*amp;
sig_x=sig_x*amp;
signal1(startp:endp)=sig_r+sig_o;
signal1(endp+1:endp+lenss)=sig_x+signal1(endp+1:endp+lenss);
exc12(startp:endp)=amp*gp(1:lenss);
end %% if Smethod==1
end % voicetype(kf)==1
end %% for nk=1:numgci
%disp('Voiced segments --> ok'); toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% %%
%%%%%%% Synthesis (unvoiced part) %%
%%%%%%% %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% speech synthesis is proceeded using the Direct-I
% implementation. The filter memory is just the samples pasted away.
load nc; % Load the stochastic codebook
Xlen=M_len/4;
signal2=zeros(1,ntotal);
Ziy=zeros(1,Order);
for kf=1:nframe %notice proc(1,:)=='nc'
ocofa=(cofa(kf,1:Order+1));
% ocofa=polystab(cofa(kf,1:Order+1)); % Stablized the filter if necessary.
if kf==1 | kf==nframe
proc='nc';
elseif sum(voicetype(kf-1:kf))<2 | sum(voicetype(kf:kf+1))<2
proc='nc';
else
proc='gc';
end;
if proc=='nc'
for kk=1:4
startp=(kf-1)*M_len+Order+(kk-1)*Xlen+1;
endp=startp+Xlen-1;
noise=nc0(ncidx(kf,kk),:);
if Xlen~=50
noise=interpft(noise,Xlen);
end
noise=[noise zeros(1,Xlen)];
ss2=filt(1,ocofa,noise);
sig_x=ss2(Xlen+1:2*Xlen); %the next segment synthesis
sig_r=ss2(1:Xlen); %the current segment synthesis
sig_o=signal2(startp:endp); %previous synthesis
eng1=ncgm(kf,kk)*Xlen; % original energy
eng2=sum(sig_r.^2); % enery of current excitation
eng1=eng1-sum(sig_o.^2);
loop=1;
while eng1<0 & loop<100
sig_o=sig_o.*(0.98:-0.08/(Xlen-1):0.9);
eng1=eng1-sum(sig_o.^2);
loop=loop+1;
end
if eng1<0
amp=0.01;
else
amp=sqrt(eng1/eng2);
end
exc12(startp:endp)=amp*noise(1:Xlen);
signal2(startp:endp)=amp*sig_r+sig_o;
signal2(endp+1:endp+Xlen)=sig_x;
end %%for kk=1:4
end; %%if proc=='nc'
end;
%disp('Unvoiced/Silent segments --> ok'); toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% %%
%%%%%%% Combine voiced with unvoiced speech %%
%%%%%%% %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Combine 'signal1' and 'signal2' into 'syns.'
% Note : Voicing transition (See Dr. Hu's Dissertation Section 4.3.3)
% is taken care here.
for kf=1:nframe
startp=M_len*(kf-1)+Order;
range=startp+(1:M_len);
if kf==1 | kf==nframe | ~voicetype(kf)
syns(range)=signal2(range);
elseif voicetype(kf) & ~voicetype(kf-1)
pt1=(kf-1)*M_len+Order;
f_gci=min( gci(gci>pt1) ); % first gci after transition
range1=(f_gci+1:startp+M_len);
lnn=length(range1);
ss1=signal1(range1);
ss2=signal2(range1);
syns(range)=signal2(range);
syns(range1)=(ss2.*(lnn:-1:1)+ss1.*(1:lnn))/(lnn+1);
elseif voicetype(kf) & ~voicetype(kf+1)
pt1=kf*M_len+Order;
x_gci=max( gci(gci<pt1) );
range1=(startp+1:x_gci);
lnn=length(range1);
ss1=signal1(range1);
ss2=signal2(range1);
lnn=length(range1);
syns(range)=signal2(range);
syns(range1)=(ss1.*(lnn:-1:1)+ss2.*(1:lnn))/(lnn+1);
else
syns(range)=signal1(range);
end;
end;
% 'syns' = synthetic speech.
syns=filter([1 -1],[1 -.99],syns); % Remove low-frequency drift.
syns=filter([1 1],[1 .99],syns); % Remove high-frequency noise
%disp(''); toc;
%disp('Synthesis is OK! Job is done (finally).');
%disp('');
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?