📄 fxrapt.m

📁 voicebox ,很不错的matlab源程序
💻 M
📖 第 1 页 / 共 2 页
字号:
上一页 12
    vipkd(vpkd<max(vpkd)*candtr,:)=[];          % eliminate peaks that are small
    if size(vipkd,1)
        if size(vipkd,1)>ncands-1
            vipkd=sortrows(vipkd);
            vipkd(1:size(vipkd,1)-ncands+1,:)=[];   % eliminate lowest to leave only ncands-1
        end
        lagcan=round(vipkd(:,2)*kdsmp+lagoff);        % convert the lag candidate values to the full sample rate        
        nlcan=length(lagcan);
    else
        nlcan=0;
    end
    
    % If there are any candidate lag values (nlcan>0) then refine their accuracy at the full sample rate
    
    if nlcan
        laglist=reshape(repmat(lagcan(:)',nfullag,1)+repmat((-hnfullag:hnfullag)',1,nlcan),nfullag*nlcan,1);
        fho=(iframe-1)*kframe+spoff;
        sfh=s(fho+(1:kcorw+max(lagcan)+hnfullag));
        sfhdc=mean(sfh(sfhi));
        sfh=sfh-sfhdc;
        e0=sum(sfh(sfhi).^2);                     % energy of initial correlation window (only needed to store in tv(:,6)
        lagl2=repmat(lagcan(:)',nfullag+kcorw-1,1)+repmat((1-hnfullag:hnfullag+kcorw)',1,nlcan);
        nccf=normxcor(sfh(1:kcorw),sfh(lagl2),afact);
        
        [maxcc,maxcci]=max(nccf,[],1);
        vipk=[maxcc(:) lagcan(:)+maxcci(:)-hnfullag-1];
        vipk=vipk(:,[1 2 2]);
        maxccj=maxcci(:)'+nfullag*(0:nlcan-1);    % vector index into nccf array
        msk=mod(maxcci,nfullag-1)~=1 & 2*nccf(maxccj)-nccf(mod(maxccj-2,nfullag*nlcan)+1)-nccf(mod(maxccj,nfullag*nlcan)+1)>0;  % don't do quadratic interpolation for the end ones
        if any(msk)
            maxccj=maxccj(msk);
            vipk(msk,3)=vipk(msk,3)+(nccf(maxccj+1)-nccf(maxccj-1))'./(2*(2*nccf(maxccj)-nccf(maxccj-1)-nccf(maxccj+1)))';
        end
        vipk(maxcc<max(maxcc)*candtr,:)=[];          % eliminate peaks that are small
        if size(vipk,1)>ncands-1
            vipk=sortrows(vipk);
            vipk(1:size(vipk,1)-ncands+1,:)=[];   % eliminate lowest to leave only ncands-1
        end
        
        % vipk(:,1) has NCCF value, vipk(:,2) has integer peak position, vipk(:,3) has refined peak position
        
        mc=size(vipk,1);
    else
        mc=0;
    end
    
    % We now have mc lag candidates at the full sample rate
    
    mc1=mc+1;               % total number of candidates including "unvoiced" possibility
    mcands(iframe)=mc;      % save number of lag candidates (needed for pitch consistency cost calculation)
    if mc
        lagval(iframe,1:mc)=vipk(:,3)';
        cost(iframe,1)=vobias+max(vipk(:,1));   % voiceless cost
        cost(iframe,2:mc1)=1-vipk(:,1)'.*(1-beta*vipk(:,3)');   % local voiced costs
        tv(iframe,2)=min(cost(iframe,2:mc1));
    else
        cost(iframe,1)=vobias;          % if no lag candidates (mc=0), then the voiceless case is the only possibility
    end
    tv(iframe,1)=cost(iframe,1);
    if iframe>1                         % if it is not the first frame, then calculate pitch consistency and v/uv transition costs
        mcp=mcands(iframe-1);
        costm=zeros(mcp+1,mc1);         % cost matrix: rows and cols correspond to candidates in previous and current frames (incl voiceless)
        
        % if both frames have at least one lag candidate, then calculate a pitch consistency cost
        
        if mc*mcp                      
            lrat=abs(log(repmat(lagval(iframe,1:mc),mcp,1)./repmat(lagval(iframe-1,1:mcp)',1,mc)));
            costm(2:end,2:end)=freqwt*min(lrat,doublec+abs(lrat-log2));  % allow pitch doubling/halving
        end
        
        % if either frame has a lag candidate, then calcualte the cost of voiced/voiceless transition and vice versa
        
        if mc+mcp
            rr=sqrt((rmswin'*s(fho+rmsix).^2)/(rmswin'*s(fho+rmsix-kdrms).^2)); % amplitude "gradient"
            ss=0.2/(distitar(lpcauto(sp(fho+rmsix),lpcord),lpcauto(sp(fho+rmsix-kdrms),lpcord),'e')-0.8);   % Spectral stationarity: note: Talkin uses Hanning instead of Hamming windows for LPC
            costm(1,2:end)= vtranc+vtrsc*ss+vtrac/rr;   % voiceless -> voiced cost
            costm(2:end,1)= vtranc+vtrsc*ss+vtrac*rr;   
            tv(iframe,4:5)=[costm(1,mc1) costm(mcp+1,1)];
        end
        costm=costm+repmat(cost(iframe-1,1:mcp+1)',1,mc1);  % add in cumulative costs
        [costi,previ]=min(costm,[],1);
        cost(iframe,1:mc1)=cost(iframe,1:mc1)+costi;
        prev(iframe,1:mc1)=previ;
    else                            % first ever frame
        costm=zeros(1,mc1); % create a cost matrix in case doing a backward recursion
    end
    if mc
        tv(iframe,3)=cost(iframe,1)-min(cost(iframe,2:mc1));
        tv(iframe,6)=5*log10(e0*e0/afact);
    end
    if doback
        costms{iframe}=costm; % need to add repmatted cost into this
    end
end

% now do traceback

best=zeros(nframe,1);
[cbest,best(nframe)]=min(cost(nframe,1:mcands(nframe)+1));
for i=nframe:-1:2
    best(i-1)=prev(i,best(i));
end
vix=find(best>1);
fx=repmat(NaN,nframe,1);                        % unvoiced frames will be NaN
fx(vix)=fs*lagval(vix+nframe*(best(vix)-2)).^(-1); % leave as NaN if unvoiced
tt=zeros(nframe,3);
tt(:,1)=(1:nframe)'*kframe+spoff;       % find frame times
tt(:,2)=tt(:,1)+kframe-1;
jratm=(jumprat+1/jumprat)/2;
tt(2:end,3)=abs(fx(2:end)./fx(1:end-1)-jratm)>jumprat-jratm;    % new spurt if frequency ratio is outside (1/jumprat,jumprat)
tt(1,3)=1;           % first frame always starts a spurt
tt(1+find(isnan(fx(1:end-1))),3)=1; % NaN always forces a new spurt

% plot results if there are no output arguments of if the 'g' mode option is specified

if ~nargout | any(mode=='g')
    tf=spoff+(0:nframe-1)'*kframe;      % one sample before start of each frame
    blag=repmat(NaN,nframe,1);                        % unvoiced frames will be NaN
    blag(vix)=lagval(vix+nframe*(best(vix)-2)); % leave as NaN if unvoiced
    ts=(1:ns)/fs;                       % time scale for speech samples
    tsa=[1:tf(1) tf(end)+kframe+1:ns];  % indexes for unprocessed speech [-1 term is an error methinks]
    sup=repmat(NaN,ns,1);               % unprocessed speech - plot in black
    sup(tsa)=s(tsa);
    sv=reshape(s(tf(1)+1:tf(end)+kframe),kframe,nframe);               % processed speech
    su=sv;
    su(:,best>1)=NaN;                   % delete all voiced samples
    sv(:,best==1)=NaN;                  % delete all unvoiced samples
    tsuv=(tf(1)+1:tf(end)+kframe)/fs;
    su=su(:);
    sv=sv(:);
    subplot(211)
    plot(ts,sup,'-k',tsuv,su,'r-',tsuv,sv,'b-');
    title('Speech');
    subplot(212)
    plot((tf+(kframe+1)/2)/fs,lagval*1000/fs,'xr',(tf+(kframe+1)/2)/fs,blag*1000/fs,'-b')
    xlabel('Time (s)');
    ylabel('Period (ms)');
    title('Lag Candidates');
end
tt(isnan(fx),:)=[];    % remove NaN spurts
fx(isnan(fx),:)=[];  



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function v=normxcor(x,y,d)
% Calculate the normalized cross correlation of column vectors x and y
% we can calculate this in two ways but fft is much faster even for nx small
% We must have nx<=ny and the output length is ny-nx+1
% note that this routine does not do mean subtraction even though this is normally a good idea
% if y is a matrix, we correlate with each column
% d is a constant added onto the normalization factor
% v(j)=x'*yj/sqrt(d + x'*x * yj'*yj) where yj=y(j:j+nx-1) for j=1:ny-nx+1

if nargin<3
    d=0;
end
nx=length(x);
[ny,my]=size(y);
nv=1+ny-nx;
if nx>ny
    error('second argument is shorter than the first');
end

nf=pow2(nextpow2(ny));
w=irfft(repmat(conj(rfft(x,nf,1)),1,my).*rfft(y,nf,1));
s=zeros(ny+1,my);
s(2:end,:)=cumsum(y.^2,1);
v=w(1:nv,:)./sqrt(d+(x'*x).*(s(nx+1:end,:)-s(1:end-nx,:)));
上一页 12
💿 文件大小 218 K
👤 上传用户 fitnes51
📂 所属分类单片机开发
🏷️ 相关标签

#voicebox #matlab #源程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -