lpsyn.m

来自「这是一个用于语音信号处理的工具箱」· M 代码 · 共 352 行

M
352
字号
% Function: perform Formant Based Linear Prediction Speech Synthesis.
% 

function [syns,exc12]=lpsyn(voicetype,gci,cofa,gm,gpcof,ncidx,ncgm,basic);


%retrieve the basic specification
F_len=basic(5);
O_lap=basic(6); 
Order=basic(4);
M_len=F_len-O_lap;

[nframe,dum]=size(cofa);
ntotal=M_len*nframe+Order;

          %--- Select Glottal Source model
          %---- Smodel==1 --> 6 order polynomial model (sounds better)
          %---- Smodel==2 --> LF model 
Smodel=basic(3);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%                             %%
%%%%%%%  Synthesis (voiced part)    %%
%%%%%%%                             %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 % Function : synthesize speech by interplaying the analysis results
 % under the structure of the proposed LP speech production model.

disp('LP Synthesis starts...');

signal1=zeros(1,ntotal);
exc12=zeros(1,ntotal); % 'exc12' = synthetic excitation.
gcof=zeros(1,7); % 'gcof' = smoothed polynomial.
pcofa=cofa(1,:); % 'pcofa' = LP coefficients of the previous frame.
gp=[0 -1 0 0 0 0 0 0 0 0];
Newstart=1;
numgci=length(gci);
if numgci>2
   lenss=gci(2)-gci(1);
end

for nk=1:numgci
%   if(nk==415)
%      disp(nk);
%   end   
   startp=gci(nk)+1;  % starting point of the pitch period
       kf=fix( (startp-Order-1)/M_len )+1; % frame number

       % last gci
       if nk==numgci
          endp=startp+lenss-1; % ending point of the pitch period
       else
          endp=gci(nk+1);  % ending point of the pitch period
          olens=lenss;
          lenss=gci(nk+1)-gci(nk);       
       end

       % voiced-to-unvoiced transition
       if lenss > F_len
          if voicetype(kf)==1 & voicetype(kf+1)==0
             lenss=olens; 
             endp=startp+lenss-1;
          end
       end

       if voicetype(kf)==1 % construct the voiced excitation wave

            % 0.0 take care of the unvoiced to voiced transition
            %---------------------------------------------------

            if ( voicetype(kf-1) + signal1(startp-2) )==0;
		pcofa=cofa(kf-1,:);
		gcof=zeros(1,7);
                Newstart=1;
	    end

	    % 1.0 Interpolate LP coefficients; for details, please see 
            %     Section 4.2. of Dr. Hu's dissertation.
            %---------------------------------------------------------

	    cf0=cofa(kf-1,:); % previous frame \
	    cf1=cofa(kf,:);   % current  frame  >  LP coefficients
	    cf2=cofa(kf+1,:); % next     frame /

            pt1=startp+1-(kf-1)*M_len-Order;
	    pt2=endp+1-(kf-1)*M_len-Order;
	    rw0=1/(abs(pt1+M_len-F_len/2+1)+abs(pt2+M_len-F_len/2+1))^2;
	    rw1=1/(abs(pt1-F_len/2)+abs(pt2-F_len/2))^2;
	    rw2=1/(abs(pt1-F_len/2-M_len)+abs(pt2-F_len/2-M_len))^2;
	    rwall=rw0+rw1+rw2;
	    w0=rw0/rwall;
	    w1=rw1/rwall;
	    w2=rw2/rwall;

	    ocofa=pcofa;
            pcofa=polystab(w0*cf0+w1*cf1+w2*cf2);

            % 2.0 create the glottal pulse
            %-----------------------------

            % interpolate the glottal phase

            if gcof(1)==0
                gcof=0.5*gpcof(kf,:)+0.5*mean(gpcof);
            elseif fix(endp/M_len)==kf % ending point in next frame
                gcof=0.5*gcof+0.5*gpcof(kf+1,:);
	    else
                gcof=0.5*gcof+0.5*gpcof(kf,:);
	    end

            % generate the pulse by source model
            o_gp=gp;
            if Smodel==1
                gp=gen_dgf1(gcof,lenss,1);
                % gen_dgf1 <--> polym1
            elseif Smodel==2
                % generate LF pulse
                gp=lfpuls(gcof(1),gcof(2),gcof(3),gcof(4),gcof(5),lenss+1,1);

                % differentiate the pulse for LP synthesis
                gp=diff(gp);
                gp=gp-mean(gp);
            end
            if isempty(gp)
               len1=length(o_gp);
               gp=interp1(0:1/(len1-1):1,o_gp, 0:1/(lenss-1):1 );
               gp=gp(:)';
            end

            % simulate the source-tract interaction

	    %dcof2=conv(pcofa.*((0.8).^(0:length(ocofa)-1)),[1 -.7]);
	    %ncof2=conv(pcofa.*((0.7).^(0:length(pcofa)-1)),[1 -.8]);
	    %ncof1=conv(pcofa.*((0.7).^(0:length(pcofa)-1)),[1 -.8]);
	    %dcof1=conv(ocofa.*((0.8).^(0:length(pcofa)-1)),[1 -.7]);

	    %len1=floor(lenss/2)-1;
	    %gp=cshift(gp,len1);
	    %gp1=gp.*hanning(lenss)';
	    %gp(len1+1:lenss)=gp(len1+1:lenss)-gp1(len1+1:lenss);
	    %gp=cshift(gp,-len1);
	    %gp1=[zeros(1,lenss-len1) gp1 zeros(1,len1)];
	    %gp2=filter(ncof2,dcof2,gp1);
	    %gp3=rev(filter(ncof1,dcof1,rev(gp1)));
	    %gp=gp+gp2(lenss+1:2*lenss);	
	    %gp=gp+gp2(lenss+1:2*lenss)+gp3(1:lenss);  
	    %gp1=[gp(lenss) gp];
	    %gp1=filter([1 -autoc2(gp1)],1,gp1);
	    %gp=gp1(2:lenss+1);
	    %gp=gp-mean(gp);

            % 3.0 filter the pulse and determine the excitation gain 
            %-------------------------------------------------------

            filt_len=length(pcofa)-1;
            Ziy=signal1(startp-1:-1:startp-filt_len);

 Smethod=1;  %%%% Smethod==1 sounds a little better!!

            if Smethod==1
                  %****************************
                  % Smethod==1 Dr.Hu's method

 	          if Newstart==1
		     cofa1=pcofa;
		     cofa1(lenss)=0;
		     sscon=real(ifft(fft(gp)./fft(cofa1)));
		     amp=sqrt(gm(nk)*lenss/(sscon*sscon'));
		     signal1(startp:endp)=sscon*amp;
                     pwfsm=0;
                     Newstart=0;
	          else
		     Ziydc=mean(Ziy);
		     fxac=filt(1,ocofa,zeros(size(gp)),Ziy-Ziydc);
		     % 'fxac' = deviation signal.
		     fxdc=filt(1,ocofa,zeros(size(gp)),ones(size(Ziy)));
		     % 'fxdc' = mean value signal.
		     fx=fxac+Ziydc*fxdc;	
		     gx=filter(1,pcofa,gp);
		     gxtemp=gx;
		     ncomp=0;
		     while ncomp<=5
			 Ziy1=gxtemp(lenss:-1:lenss-filt_len+1);
		 	 dcamp=mean(Ziy1);
           Ziy1=Ziy1-dcamp;
			 %tamp=sqrt((Ziy1*Ziy1')/(Ziy*Ziy'));
          %gxtemp=gx+tamp*fxac+dcamp*fxdc;
          gxtemp=gx+fxac+dcamp*fxdc;
			 ncomp=ncomp+1;
		     end;
		     pwf=sqrt((gx*gx')/(gxtemp*gxtemp'));
		     if pwfsm==0
			pwfsm=pwf;
		     end;
		     smf=.7;
		     pwfsm=(1-smf)*pwf+smf*pwfsm;
		     amp=sqrt(gm(nk)*lenss/(gx*gx'))*pwfsm;
		     signal1(startp:endp)=amp*gx+fx;
		     exc12(startp:endp)=gp*amp;
	           end
           	   
               %************************************************
               % Smethod=2. append zeros in the end excitation pulse

             elseif Smethod==2

                  gp=[gp zeros(1,lenss)];
                  gx=filter(1, pcofa, gp);

                  sig_x=gx(lenss+1:2*lenss); % gpulse running into next segment
                  sig_r=gx(1:lenss); % synthetic speech of current segment 
                  sig_o=signal1(startp:endp); % energy from previous pulse

                  Pr=gm(nk)*lenss; % gain 
                  Pr1=Pr-sum(sig_o.^2);
                  loop=1;
                  while Pr1<0 & loop<100
                     sig_o=(0.98:-0.08/(lenss-1):0.9).*sig_o;
                     signal1(startp:endp)=sig_o;
                     Pr1=Pr-sum(sig_o.^2);
                     loop=loop+1;
                  end
                  if Pr1<0
                     amp=0.01;
                  else
                     amp=sqrt( Pr1/(sig_r*sig_r') );
                  end
                  sig_r=sig_r*amp;
                  sig_x=sig_x*amp;

                  signal1(startp:endp)=sig_r+sig_o;
                  signal1(endp+1:endp+lenss)=sig_x+signal1(endp+1:endp+lenss);
                  exc12(startp:endp)=amp*gp(1:lenss);
            end %% if Smethod==1
     end  % voicetype(kf)==1
end  %% for nk=1:numgci

%disp('Voiced segments --> ok'); toc;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%                               %%
%%%%%%%  Synthesis (unvoiced part)    %%
%%%%%%%                               %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% speech synthesis is proceeded using the Direct-I
% implementation.  The filter memory is just the samples pasted away.

load nc; % Load the stochastic codebook

Xlen=M_len/4;
signal2=zeros(1,ntotal);
Ziy=zeros(1,Order);
for kf=1:nframe		%notice proc(1,:)=='nc'
	ocofa=(cofa(kf,1:Order+1));
%	ocofa=polystab(cofa(kf,1:Order+1)); % Stablized the filter if necessary.
	if kf==1 | kf==nframe
		proc='nc';
	elseif sum(voicetype(kf-1:kf))<2 | sum(voicetype(kf:kf+1))<2
		proc='nc';
	else 
		proc='gc';
	end;
	if proc=='nc'
	    	for kk=1:4
                        startp=(kf-1)*M_len+Order+(kk-1)*Xlen+1;
                        endp=startp+Xlen-1;

			noise=nc0(ncidx(kf,kk),:);

                        if Xlen~=50
                           noise=interpft(noise,Xlen);
                        end
                        
                        noise=[noise zeros(1,Xlen)];
                        ss2=filt(1,ocofa,noise);
                        sig_x=ss2(Xlen+1:2*Xlen); %the next segment synthesis
                        sig_r=ss2(1:Xlen); %the current segment synthesis
                        sig_o=signal2(startp:endp); %previous synthesis

                        eng1=ncgm(kf,kk)*Xlen; % original energy
                        eng2=sum(sig_r.^2); % enery of current excitation 
                        eng1=eng1-sum(sig_o.^2);
                        loop=1;
                        while eng1<0 & loop<100
                           sig_o=sig_o.*(0.98:-0.08/(Xlen-1):0.9);
                           eng1=eng1-sum(sig_o.^2);
                           loop=loop+1;
                        end
                        if eng1<0
                           amp=0.01;
                        else
                           amp=sqrt(eng1/eng2);
                        end
                        exc12(startp:endp)=amp*noise(1:Xlen);
                        signal2(startp:endp)=amp*sig_r+sig_o;
                        signal2(endp+1:endp+Xlen)=sig_x;
	    	end  %%for kk=1:4
 
	end; %%if proc=='nc'
end;

%disp('Unvoiced/Silent segments --> ok'); toc;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%                                         %%
%%%%%%%  Combine voiced with unvoiced speech    %%
%%%%%%%                                         %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Combine 'signal1' and 'signal2' into 'syns.'
% Note : Voicing transition (See Dr. Hu's Dissertation Section 4.3.3) 
%        is taken care here.

for kf=1:nframe
	startp=M_len*(kf-1)+Order;
	range=startp+(1:M_len);
	if kf==1 | kf==nframe | ~voicetype(kf)
		syns(range)=signal2(range);
	elseif voicetype(kf) & ~voicetype(kf-1)
                pt1=(kf-1)*M_len+Order;
                f_gci=min( gci(gci>pt1) );  % first gci after transition
		range1=(f_gci+1:startp+M_len);
		lnn=length(range1);
		ss1=signal1(range1);
		ss2=signal2(range1);
		syns(range)=signal2(range);
		syns(range1)=(ss2.*(lnn:-1:1)+ss1.*(1:lnn))/(lnn+1);
	elseif voicetype(kf) & ~voicetype(kf+1)
                pt1=kf*M_len+Order;
                x_gci=max( gci(gci<pt1) );
		range1=(startp+1:x_gci);
		lnn=length(range1);
		ss1=signal1(range1);
		ss2=signal2(range1);
		lnn=length(range1);
		syns(range)=signal2(range);
		syns(range1)=(ss1.*(lnn:-1:1)+ss2.*(1:lnn))/(lnn+1);
	else
		syns(range)=signal1(range);
	end;
  end;

% 'syns' = synthetic speech.
syns=filter([1 -1],[1 -.99],syns); % Remove low-frequency drift.
syns=filter([1 1],[1 .99],syns); % Remove high-frequency noise

%disp(''); toc;
%disp('Synthesis is OK! Job is done (finally).');
%disp('');

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?