📄 multicuef0.m
字号:
function [f0raw,vuv,auxouts,prm]=MulticueF0v14(x,fs,f0floor,f0ceil)% Source information extraction using multiple cues% Default values are used when other arguments are missing.% You can modify specific parameters by assigning them.% The control parameters open to user in this version are% F0 search range.% Examples:% f0=MulticueF0v14(x,fs);% x: input signal (monaural signal)% fs: sampling frequency (Hz)% f0: fundamental frequency (Hz)% f0 is set to zero when unvoiced.% f0=MulticueF0v14(x,fs,f0floor,f0ceil)% f0floor: Lower limit of F0 search (Hz)% f0ceil: Upper limit of F0 search (Hz)% [f0raw,vuv,auxouts]=MulticueF0v14(x,fs,f0floor,f0ceil)% f0raw: fundamental frequency without V/UV information% vuv: V/UV indicator, 1:voiced, 0: unvoiced% auxouts: base information for f0 extraction (structure variable)%% [f0raw,vuv,auxouts,prmouts]=MulticueF0v14(x,fs,prmin)% prmin: structure variable for control parameters% f0raw: fundamental frequency without V/UV information% vuv: V/UV indicator, 1:voiced, 0: unvoiced% auxouts: base information for f0 extraction (structure variable)% prmouts: structure variable showing used control parameters%% Copyright(c) Wakayama University, 2004% This version is very experimental. No warranty.% Please contact: kawahara@sys.wakayama-u.ac.jp% Designed and coded by Hideki Kawahara% 31/August/2004 first conceiled version% 27/September/2004 revised version v10% 14/November/2004 revised version v11% 12/January/2005 revised version v12% 13/January/2005 graphics disabled version% 10/May/2005 added control inputswitch nargin case {2,4} case 3 if ~isstruct(f0floor) ok=displayusage; f0raw=[];vuv=[]; return; else prmin = f0floor; end; otherwise ok=displayusage; f0raw=[];vuv=[]; return;endswitch nargin case 3 case 4 prmin.F0searchLowerBound=f0floor; prmin.F0searchUpperBound=f0ceil; prmin.DisplayPlots=0; otherwise prmin.DisplayPlots=0;end;[f0raw,vuv,auxouts,prm]=SourceInfobyMultiCues050111(x,fs,prmin);nn=min(length(vuv),length(f0raw));f0raw=f0raw(1:nn)';vuv=vuv(1:nn)';switch nargout case 1 f0raw=f0raw.*vuv; case {3,4} otherwise ok=displayusage; return;endreturn;function ok=displayusage;fprintf(' Source information extraction using multiple cues\n');fprintf(' Default values are used when other arguments are missing.\n');fprintf(' You can modify specific parameters by assigning them.\n');fprintf(' The control parameters open to user in this version are\n');fprintf(' F0 search range.\n');fprintf(' Example:1\n');fprintf(' f0=MulticueF0v14(x,fs);\n');fprintf(' x: input signal (monaural signal)\n');fprintf(' fs: sampling frequency (Hz)\n');fprintf(' f0: fundamental frequency (Hz)\n');fprintf(' f0 is set to zero when unvoiced.\n');fprintf(' f0=MulticueF0v14(x,fs,f0floor,f0ceil)\n');fprintf(' f0floor: Lower limit of F0 search (Hz)\n');fprintf(' f0ceil: Upper limit of F0 search (Hz)\n');fprintf(' [f0raw,vuv,auxouts]=MulticueF0v14(x,fs,f0floor,f0ceil)\n');fprintf(' f0raw: fundamental frequency without V/UV information\n');fprintf(' vuv: V/UV indicator, 1:voiced, 0: unvoiced\n');fprintf(' auxouts: base information for f0 extraction (structure variable)\n');fprintf(' [f0raw,vuv,auxouts,prmouts]=MulticueF0v14(x,fs,prmin)\n');fprintf(' prmin: structure variable for control parameters\n');fprintf(' f0raw: fundamental frequency without V/UV information\n');fprintf(' vuv: V/UV indicator, 1:voiced, 0: unvoiced\n');fprintf(' auxouts: base information for f0 extraction (structure variable)\n');fprintf(' prmouts: structure variable showing used control parameters\n');fprintf('\n');fprintf(' Copyright(c) Wakayama University, 2004,2005\n');fprintf(' This version is very experimental. No warranty.\n');fprintf(' Please contact: kawahara@sys.wakayama-u.ac.jp\n');ok=1;function [f0raw,vuv,auxouts,prm]=SourceInfobyMultiCues050111(x,fs,prmin)% Source information extraction function% with combined source information% minimum requisite is to provide x and fs and receive f0.% Default values are used when other arguments are missing.% You can modify specific parameters by assigning using prmin.% Example:1% SourceInfobyMultiCues050111(x,fs);% Simplest usage% Example:2% f0raw=SourceInfobyMultiCues050111(x,fs); % F0 for voiced segment is what you get.% Example:3% [f0raw,vuv,auxouts,prm]=SourceInfobyMultiCues050111(x,fs);% You can check what defaults were and raw information.% Example:4% [f0raw,vuv,auxouts,prm]=SourceInfobyMultiCues050111(x,fs,prmin);% You have full control (and responsibility).% Designed and coded by Hideki Kawahara% 24/June/2004% 26/June/2004% 30/June/2004% 01/July/2004% 04/July/2004 New mixing method without mixed map% 05/July/2004 Revised tracking measure% 06/July/2004 Revised tracking lgorithm further and bug fix% 07/July/2004 Revised tracking, funcamental bug fix% 10/July/2004 Revised proprocessing for fs independence% 15/July/2004 Bug fix for batch job% 16/July/2004 Revised for better initial position% 17/July/2004 Updated default values to improve robustness% 18/July/2004 Bug fix in tracking (boundary value)% 19/July/2004 Bug fix in tracking revised default parameters% 21/July/2004 Bug fix in tracking revised default parameters% 23/July/2004 New deconvolution of autocorrelation% 25/July/2004 Bug fix in tracking% 25/July/2004 Revised map merge procedure% 27/July/2004 Straightforward tracking routine% 29/July/2004 Combined tracking and fine tuning parameters% 02/Aug./2004 New tracking algorithm% 07/Aug./2004 Yet other tracking (segment selection) algorithm% 14/Aug./2004 Added missing segment recovary% 15/Aug./2004 New V/UV decision logic was integrated% 31/Aug./2004 minor bug fix% 25/Aug./2004 bug fix and revised when no output is assigned% 11/Jan./2005 minor bug fix for boundary logic% 12/Jan./2005 bug fix for all zero segment% Assuming x consists of data% Assuming fs consists of sampling frequency (Hz)%------ check input argumentsprm=zsetdefaultparams;switch nargin case {2,3} otherwise help SourceInfobyMultiCues040701 f0raw=[];vuv=[]; return;end[nn,mm]=size(x);if min(nn,mm)>1 display('Using only the first channel.'); if nn<mm x=x(1,:); else x=x(:,1); end;end;x=x(:); % make sure x is a column vector.%--- safe guard for all zero segments 12/Jan./2005l1ms=round(fs/1000);if length(x==0)>l1ms zv=randn(size(x(x==0))); zv=cumsum(zv-mean(zv)); zv=zv/std(zv)*std(x)/10000; x(x==0)=zv;end;%------ set initial parametersif nargin==3 if isfield(prmin,'F0searchLowerBound')==1; prm.F0searchLowerBound=prmin.F0searchLowerBound;end; if isfield(prmin,'F0searchUpperBound')==1; prm.F0searchUpperBound=prmin.F0searchUpperBound;end; if isfield(prmin,'F0frameUpdateInterval')==1; prm.F0frameUpdateInterval=prmin.F0frameUpdateInterval;end; if isfield(prmin,'NofChannelsInOctave')==1; prm.NofChannelsInOctave=prmin.NofChannelsInOctave;end; if isfield(prmin,'IFWindowStretch')==1; prm.IFWindowStretch=prmin.IFWindowStretch;end; if isfield(prmin,'DisplayPlots')==1; prm.DisplayPlots=prmin.DisplayPlots;end; if isfield(prmin,'IFsmoothingLengthRelToFc')==1; prm.IFsmoothingLengthRelToFc=prmin.IFsmoothingLengthRelToFc;end; if isfield(prmin,'IFminimumSmoothingLength')==1; prm.IFminimumSmoothingLength=prmin.IFminimumSmoothingLength;end; if isfield(prmin,'IFexponentForNonlinearSum')==1; prm.IFexponentForNonlinearSum=prmin.IFexponentForNonlinearSum;end; if isfield(prmin,'IFnumberOfHarmonicForInitialEstimate')==1; prm.IFnumberOfHarmonicForInitialEstimate=prmin.IFnumberOfHarmonicForInitialEstimate;end; if isfield(prmin,'TimeConstantForPowerCalculation')==1; prm.TimeConstantForPowerCalculation=prmin.TimeConstantForPowerCalculation;end; if isfield(prmin,'ACtimeWindowLength')==1; prm.ACtimeWindowLength=prmin.ACtimeWindowLength;end; if isfield(prmin,'ACnumberOfFrequencySegments')==1; prm.ACnumberOfFrequencySegments=prmin.ACnumberOfFrequencySegments;end; if isfield(prmin,'ACfrequencyDomainWindowWidth')==1; prm.ACfrequencyDomainWindowWidth=prmin.ACfrequencyDomainWindowWidth;end; if isfield(prmin,'ACpowerExponentForNonlinearity')==1; prm.ACpowerExponentForNonlinearity=prmin.ACpowerExponentForNonlinearity;end; if isfield(prmin,'ACamplitudeCompensationInShortLag')==1; prm.ACamplitudeCompensationInShortLag=prmin.ACamplitudeCompensationInShortLag;end; if isfield(prmin,'ACexponentForACdistance')==1; prm.ACexponentForACdistance=prmin.ACexponentForACdistance;end; if isfield(prmin,'AClagSmoothingLength')==1; prm.AClagSmoothingLength=prmin.AClagSmoothingLength;end; if isfield(prmin,'ACtemporalSmoothingLength')==1; prm.ACtemporalSmoothingLength=prmin.ACtemporalSmoothingLength;end; if isfield(prmin,'ThresholdForSilence')==1; prm.ThresholdForSilence=prmin.ThresholdForSilence;end; if isfield(prmin,'ThresholdForVUV')==1; prm.ThresholdForVUV=prmin.ThresholdForVUV;end; if isfield(prmin,'WeightForAutocorrelationMap')==1; prm.WeightForAutocorrelationMap=prmin.WeightForAutocorrelationMap;end; if isfield(prmin,'WeightForInstantaneousFqMap')==1; prm.WeightForInstantaneousFqMap=prmin.WeightForInstantaneousFqMap;end; if isfield(prmin,'VUVthresholdOfAC1')==1; prm.VUVthresholdOfAC1=prmin.VUVthresholdOfAC1;end; if isfield(prmin,'SDforNormalizeMixingDistance')==1; prm.SDforNormalizeMixingDistance=prmin.SDforNormalizeMixingDistance;end; if isfield(prmin,'SDforTrackingNormalization')==1; prm.SDforTrackingNormalization=prmin.SDforTrackingNormalization;end; if isfield(prmin,'MaxumumPermissibleOctaveJump')==1; prm.MaxumumPermissibleOctaveJump=prmin.MaxumumPermissibleOctaveJump;end; if isfield(prmin,'ThresholdToStartSearch')==1; prm.ThresholdToStartSearch=prmin.ThresholdToStartSearch;end; if isfield(prmin,'ThresholdToQuitSearch')==1; prm.ThresholdToQuitSearch=prmin.ThresholdToQuitSearch;end; if isfield(prmin,'ThresholdForReliableRegion')==1; prm.ThresholdForReliableRegion=prmin.ThresholdForReliableRegion;end;end; %----- copy modified analysis conditions to internal variablesf0floor=prm.F0searchLowerBound; % f0floorf0ceil=prm.F0searchUpperBound; % f0ceilshiftm=prm.F0frameUpdateInterval; % % F0 calculation interval (ms)nvo=prm.NofChannelsInOctave; % nvo=24; % Number of channels in one octavemu=prm.IFWindowStretch; % mu=1.2; % window stretch from isometric windowimgi=prm.DisplayPlots; % imgi=1; % image display indicator (1: display image)smp=prm.IFsmoothingLengthRelToFc; % smp=1; % smoothing length relative to fc (ratio)minm=prm.IFminimumSmoothingLength; % minm=5; % minimum smoothing length (ms)pcIF=prm.IFexponentForNonlinearSum; % pc=0.5; % exponent to represent nonlinear summationncIF=prm.IFnumberOfHarmonicForInitialEstimate; % nc=1; % number of harmonic component to use (1,2,3)tcpower=prm.TimeConstantForPowerCalculation; % tcpower=10; % time constant for power calculation (ms)wtlm=prm.ACtimeWindowLength; % Time window length for Autocorrelation based method (ms)ndiv=prm.ACnumberOfFrequencySegments; % for Autocorrelation methodwflf=prm.ACfrequencyDomainWindowWidth; % for Autocorrelation method (Hz)pcAC=prm.ACpowerExponentForNonlinearity; % for Autocorrelation methodampAC=prm.ACamplitudeCompensationInShortLag; % for Autocorrelation method (ratio)betaAC=prm.ACexponentForACdistance; % Nonlinear distance measure for post processinglagslAC=prm.AClagSmoothingLength; % Lag smoothing length for post processing (s) !!timeslAC=prm.ACtemporalSmoothingLength; % Temporal smoothing length for post processing (ms)thsilence=prm.ThresholdForSilence; % for silence decision above average noise level (dB)thvuv=prm.ThresholdForVUV; % for V/UV decision based on first autocorrelation and C/NwAC=prm.WeightForAutocorrelationMap; % weight for combining maps (Autocorrelation)wIF=prm.WeightForInstantaneousFqMap; % weight for combining maps (Instantaneous Frequency)ac1th=prm.VUVthresholdOfAC1;mixsd=prm.SDforNormalizeMixingDistance; % Normalization factor for mixing F0 distance (octave)nsd=prm.SDforTrackingNormalization;f0jump=prm.MaxumumPermissibleOctaveJump;strel=prm.ThresholdToStartSearch;endrel=prm.ThresholdToQuitSearch;endrel2=prm.ThresholdForReliableRegion;nvc=ceil(log(f0ceil/f0floor)/log(2)*nvo); ... % Number of channels in whole search range%------- extract fixed points of frequency to instantaneous frequency map[f0v,vrv,dfv,nf,aav]= ... zfixpF0VexMltpBG4(x,fs,f0floor,nvc,nvo,mu,imgi,shiftm,smp,minm,pcIF,ncIF);[val,pos]=zmultiCandIF(f0v,vrv);[y,ind,fq]=zremoveACinduction(x,fs,pos);%------- Pre processing of AC induction if necessaryif ind==1 xbak=x;x=y; [f0v,vrv,dfv,nf,aav]= ... zfixpF0VexMltpBG4(x,fs,f0floor,nvc,nvo,mu,imgi,shiftm,smp,minm,pcIF,ncIF);end;%---- selecting multiple F0 candidates based on IF[val,pos]=zmultiCandIF(f0v,vrv);if imgi==1 hh=figure;semilogy(pos,'+');grid on;hold on; set(gca,'fontsize',16); axis([0 length(x)/fs*1000 f0floor f0ceil]);end;%---- selecting multiple F0 candidates based on modified Autocorrelationdn=max(1,floor(fs/max(8000,3*2*f0ceil)));if imgi==1; h1=figure;else h1=-1;end;[lagspec,lx]= ... zlagspectestnormal(decimate(x,dn),fs/dn,shiftm,length(x)/fs*1000,shiftm,wtlm,ndiv,wflf,pcAC,ampAC,h1);[f02,pl2]=zmultiCandAC(lx,lagspec,betaAC,lagslAC,timeslAC);if imgi==1 figure(hh);semilogy(f02,'o');hold off xlabel('time (ms)');ylabel('frequency (Hz)'); title('F0 candidates: o:autocorrelation +:instantaneous frequency')end;%----- Combine multiple source information with dynamic range normalization%[f0mapIF,f0mapAC,fx]=zCombineSourceInfoNorm(val,pos,f02,pl2,f0floor,f0ceil,nvo);%----- selecting multiple F0 candidates based on multiple source%[f0cand,relv]=zmultiCandf0map(fx,wIF*f0mapIF+wAC*f0mapAC);auxouts.F0candidatesByIF=pos;auxouts.CNofcandidatesByIF=val;auxouts.F0candidatesByAC=f02;auxouts.ACofcandidatesByAC=pl2;[f0cand,relv]=zcombineRanking4(auxouts,mixsd,wAC,wIF,prm); % New mixing routine%f0cand=f0cand(:,1:3);%relv=relv(:,1:3);if imgi==1 figure semilogy(f0cand,'+');grid on; set(gca,'fontsize',16); axis([0 length(f0cand) f0floor f0ceil]); title('F0 candidates by mixed source information'); xlabel('time (ms)') ylabel('frequency (Hz)')end;%keyboard%----- Calculate power envelopepws=zVpowercalc(x,fs,tcpower,shiftm,2000);pwsdb=10*log10(abs(pws)+0.00000000001);mxpwsdb=max(pwsdb);[hstgrm,binlvl]=hist(pwsdb,mxpwsdb+(-60:2));q10=interp1(cumsum(hstgrm+0.000000001)/sum(hstgrm)*100,binlvl,10); % 10% quantile level[dmy1,minid]=min(abs(q10-binlvl));bb=max(1,min(length(binlvl),minid+(-5:5))); % search range 10 dB % safeguardnoiselevel=sum(hstgrm(bb).*binlvl(bb))/sum(hstgrm(bb)); if imgi==1 figure plot(pwsdb);grid on; set(gca,'fontsize',16); axis([0 length(pwsdb) noiselevel-10 max(pwsdb)]); hold on; plot([0 length(pwsdb)],noiselevel*[1 1],'r'); plot([0 length(pwsdb)],noiselevel*[1 1]+3,'r-.'); title('Instantaneous power solid line:noise level, dash-dot line:threshold') xlabel('time (ms)');ylabel('power (dB)')end;%----- F0 trackingac1=zeros(1,length(f0cand));for ii=1:length(f0cand)ac1(ii)=zfirstac(x,fs,round(ii/1000*fs),30);end;auxouts.F0candidatesByMix=f0cand;auxouts.RELofcandidatesByMix=relv;auxouts.FirstAutoCorrelation=ac1;auxouts.InstantaneousPower=pwsdb;%f0raw0=zf0trackmulti(auxouts,ac1th,f0jump,nsd);%[f0raw0,segb]=zf0trackmulti25(auxouts,ac1th,f0jump,nsd,prm); %%27/July/2004%[f0s,rels,csegs]=contiguousSegment6(auxouts,strel,endrel,endrel2,prm);[f0s,rels,csegs]=zcontiguousSegment10(auxouts,prm);[f0raw0,relc]=zfillf0gaps6(auxouts,f0s,rels,csegs,prm);%[f0s,rels,csegs]=zcontiguousSegment2(auxouts,0.55,0.16,0.4);% first param. was 0.5 (by 3AM 28)%f0raw0(f0s>0)=f0s(f0s>0);if imgi==1; figure semilogy(f0raw0,'c');grid on; set(gca,'fontsize',16); axis([0 length(f0raw0) f0floor f0ceil]); drawnow;end;%------ F0 refinement using first three harmonic componentsf0raw0(isnan(f0raw0))=zeros(size(f0raw0(isnan(f0raw0))));f0raw0(f0raw0>f0ceil)=f0raw0(f0raw0>f0ceil)*0+f0ceil;f0raw0((f0raw0<f0floor)&(f0raw0>0))=f0raw0((f0raw0<f0floor)&(f0raw0>0))*0+f0floor;%dn2=floor(fs/(f0ceil*3*2));[f0raw2,ecr,ac1]=zrefineF06m(decimate(x,dn),fs/dn,f0raw0,1024,1.1,3,1,1,length(f0raw0));if imgi==1; hold on; semilogy(f0raw2,'g');grid on;end;%------ Threshoulding for silence and V/UV
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -