📄 dpfund.c
字号:
/* dpfund.c *//* * This material contains unpublished, proprietary software of * Entropic Research Laboratory, Inc. Any reproduction, distribution, * or publication of this work must be authorized in writing by Entropic * Research Laboratory, Inc., and must bear the notice: * * "Copyright (c) 1987-1990 AT&T, Inc. * "Copyright (c) 1986-1990 Entropic Speech, Inc. * "Copyright (c) 1990-1991 Entropic Research Laboratory, Inc. * All rights reserved" * * The copyright notice above does not evidence any actual or intended * publication of this source code. * * Written by: * Checked by: * Revised by: * * Brief description: * */static char *sccs_id = "@(#)dpfund.c 1.6 12/13/93 ATT/ESI/ERL";/* a fundamental frequency estimation algorithm using the normalized cross correlation function and dynamic programming *//**//* For each frame, up to MAXCANDS crosscorrelation peaks will be considered as F0 intervals. Each will be scored according to its within- frame properties (relative amplitude, relative location), and according to its connectivity with each of the candidates in the previous frame. Also, a probability of voicing measure will be computed based on max. ampl. of the correlation function and frame-to-frame continuity properties. *//* At each frame, each candidate has associated with it the following items: its peak value its peak value modified by its within-frame properties its location the candidate # in the previous frame yielding the min. err. (this is the optimum path pointer!) its cumulative error: (local error + connectivity error + cumulative error of its best-previous-frame-match).Candidates will be ordered according to increasing F0 interval, for convenience. */#include <Objects.h>#include "tracks.h"#define MAXPEAKS 40 /* max. # of peaks to examine for cands. */#define MAXCANDS 10 /* max. # peaks to allow as cands. at each fr. */int xpr=0, ypr=0;extern int debug;double is_voiced();double find_rmsmax();char *localloc();/*********************************************************************/ /* Here are the tweak factors for the F0 estimator. *//*********************************************************************/double CO_CAND = .3; /* all peaks within CO_CAND of the max are considered possible candidates. */double PEAK_WT = 1.0; /* weight given to peak "quality". */double LAG_WT = 0.3; /* linear attenuation coef. for lag weighting of crosscorrelation coefficients. *//* This default reduces the amplitude of the longest-lag correlation coefficient by .3. */double FREQ_WT = 2.0; /* The cost per octave per second of frequency change */double F_THRESH = 20.0; /* Allowable rate of frequency change before applying ANY frequency change penalty (oct/sec) *//*********************************************************************//*********************************************************************/Signal *dpfund(sp, wdur, fmin, fmax, frame_int, cros, dplat, maxrms_dur, maxrms_val)/* All frequencies are in Hz.; times are in seconds; delays in samples. */Signal *sp; /* a speech waveform signal */CROSS ***cros; /* structure array containing (or to receive) parameters derived from the crosscorrelation comp. */DPREC ***dplat; /* the DP lattice used during f0 est. returned for debug */double wdur, /* ref. wind. dur. for crosscorr. comps. */ maxrms_dur, /*window over past in which to pick max rms; 0 means all of past (as in older versions)*/ maxrms_val, /*fixed value for maxrms; i.e., no window used*/ fmin, /* minimum F0 to consider */ fmax, /* maximum F0 to consider */ frame_int; /* interval between frames for F0 estimates */{ double freqwt, lagwt, ftemp, duration, *f, err, ferr, errmin, mincand, locerr, fmaxval, engref, engpre, k1, kfac, efac, prewt, fthresh, ttemp, engthr, sum, *f0p, *rmsp, *acpkp, *kp, *vuvp, rms, **datap; double rmsmax = 0.0; double *rmsmaxa = NULL; int maxrms_frames = 0; /*number of previous frames for max rms*/ register short *p, *q, *r, *s; register int i, j, k, m; int l, nframes, smaxval; int step, size, nlags, start, stop, ncomp, maxloc, maxpre, loc1, loc2; int ncand, ncandp, minloc; short peaks[MAXPEAKS], locs[MAXPEAKS], *data; CROSS **cp; DPREC **dp; Signal *f0s, *new_signal(); char *cpp, temp[600], *new_ext(); if ((maxrms_val != 0.0) && (maxrms_dur != 0.0)) { printf("dpfund: can't specify both rms window and rms value\n"); return(NULL); } /*set up maxrms value if constant value supplied */ if (maxrms_val != 0.0) rmsmax = maxrms_val; if( (! sp) || (! sp->buff_size)){ printf("No sp!\n"); return(NULL); /* check data pointer */ } /*compute number of frames over which maxrms energy is kept*/ maxrms_frames = ((maxrms_dur == 0) ? 0 : round(1 + ((maxrms_dur - wdur)/frame_int))); data = ((short**)sp->data)[0]; step = round(frame_int * sp->freq); size = round(wdur * sp->freq); frame_int = ((double)step)/sp->freq; wdur = ((double)size)/sp->freq; start = round(sp->freq / fmax); stop = round(sp->freq / fmin); nlags = stop - start + 1; ncomp = size + stop + 1; /* # of samples required by xcorr comp. per fr. */ /*************************************************************/ /* SET UP THE FUDGE FACTORS */ /*************************************************************/ /* Lag-dependent weighting factor to emphasize early peaks. */ /* lagwt = ((10000.0/sp->freq) * LAG_WT)/(nlags+start); */ lagwt = (10000.0 * LAG_WT * fmin)/(sp->freq * (sp->freq + fmin)); /* Penalty for a time skip in F0 per frame */ freqwt = (100.0 * FREQ_WT) /(sp->freq * (frame_int)); /* Threshold to exceed before applying ANY frequency-shift penalty */ fthresh = F_THRESH * (frame_int); nframes = 1 + ((sp->buff_size - ncomp) / step); /* # of whole analysis frames */ cpp = new_ext(sp->name,"f0"); datap = (double**)localloc(5 * sizeof(double*)); if(datap && (f0s = new_signal(cpp,SIG_UNKNOWN,dup_header(sp->header),datap,nframes, 1.0/frame_int, 5))) { f0s->type = P_DOUBLES | SIG_F0; /* Allocate space for structure pointer, structures, and xcorr arrays. */ if(debug&DEB_ENTRY) notify("Allocating CROSS",0,0); *cros = cp = (CROSS**)localloc(sizeof(CROSS*) * nframes); for(i=0;i<nframes;i++){ cp[i] = (CROSS*)localloc(sizeof(CROSS)); cp[i]->correl = (short*)localloc(sizeof(short)*(10 +nlags)); /** extra room allocated to check segmentation fault problem !?!?!?!? **/ } /* Allocate arrays to return F0 and probability of voicing. */ /** extra room allocated to check segmentation fault problem !?!?!?!? **/ if(debug&DEB_ENTRY) notify("Allocating f0, vuv, acpk, rms and k",0,0); f0p = (double*)localloc(sizeof(double) * (10 + nframes)); vuvp = (double*)localloc(sizeof(double)*(10+nframes)); rmsp = (double*)localloc(sizeof(double) * (10 + nframes)); acpkp = (double*)localloc(sizeof(double) * (10 + nframes)); kp = (double*)localloc(sizeof(double) * (10 + nframes)); /* Allocate space for the DP storage lattice. */ /** extra room allocated to check segmentation fault problem !?!?!?!? **/ if(debug&DEB_ENTRY){ notify("Allocating dplat",0,0); printf(" nframes%d\n",nframes); } *dplat = dp = (DPREC**)localloc(sizeof(DPREC*) * (10 + nframes)); for(i=0;i<nframes;i++){ dp[i] = (DPREC*)localloc(sizeof(DPREC)); dp[i]->ncands = 0; /** extra room allocated to check segmentation fault problem !?!?!?!? **/ dp[i]->locs = (short*)localloc(sizeof(short) * (2 + MAXCANDS)); dp[i]->pvals = (short*)localloc(sizeof(short) * (2 + MAXCANDS)); dp[i]->mpvals = (double*)localloc(sizeof(double) * (2 + MAXCANDS)); dp[i]->prept = (short*)localloc(sizeof(short) * (2 + MAXCANDS)); dp[i]->dpvals = (double*)localloc(sizeof(double) * (2 + MAXCANDS)); /** extra room allocated to check segmentation fault problem !?!?!?!? **/ } /* Initialize rms peak finder. */ if (maxrms_frames != 0) /* must keep track of maxrms throughout*/ rmsmaxa = (double*)localloc(nframes * sizeof(double)); /* initialize the errors to zero */ for(f= dp[0]->dpvals, i=0; i<MAXCANDS; i++) *f++ = 0; /***********************************************************************/ /* MAIN FUNDAMENTAL FREQUENCY ESTIMATION LOOP */ /***********************************************************************/ if(debug&DEB_ENTRY){ printf("Entering main loop; "); printf(" nframes:%d size:%d start:%d nlags:%d\n", nframes,size,start,nlags); } for(i= 0; i < nframes; i++){ cross(data + (i * step), size, start, nlags, &engref, &k1, &maxloc, &fmaxval, cp[i]->correl); cp[i]->maxloc = maxloc; cp[i]->maxval = fmaxval; cp[i]->k1 = k1; cp[i]->rms = sqrt(engref/size); cp[i]->nlags = nlags; cp[i]->firstlag = start; k1 = cp[i]->k1; fmaxval = cp[i]->maxval; rms = cp[i]->rms; if (maxrms_val == 0) { /*set maxrms value(s) if window was specified*/ if (maxrms_frames == 0) { if (rms > rmsmax) rmsmax = rms; } else { rmsmaxa[i] = find_rmsmax(cp, i, maxrms_frames); } } get_cand(cp[i],peaks,locs,&ncand); /* return high peaks in xcorr */ if(ncand) { /* were there any f0 peak candidates? */ if(ncand > MAXCANDS){ /* need to prune candidates? */ for(j=0; j < ncand-1; j++){ for(k=0, m=ncand-1-j; k < m; k++) if(peaks[k] < peaks[k+1]){ /* sort by decreasing peak value */ smaxval = peaks[k]; maxloc = locs[k]; locs[k] = locs[k+1]; peaks[k] = peaks[k+1]; peaks[k+1] = smaxval; locs[k+1] = maxloc; } } ncand = MAXCANDS; } /* Move the peak value and location arrays into the dp structure */ for(j=0; j < ncand; j++){ dp[i]->locs[j] = locs[j]; dp[i]->pvals[j] = peaks[j]; } dp[i]->ncands = ncand; /* Apply a lag-dependent weighting to the peaks to encourage the selection of the first major peak. Translate the modified peak values into costs (high peak ==> low cost). */ for(j=0; j < ncand; j++){ ftemp = 1.0 - ((double)locs[j] * lagwt); dp[i]->mpvals[j] = 1.0 - ((double)peaks[j] * ftemp)/32767.0; } /* PERFORM THE DISTANCE MEASURES AND ACCUMULATE THE ERRORS. */ ncandp = ( i )? dp[i-1]->ncands : 0 ; for(k=0; k<ncand; k++){ /* for each of the current candidates... */ minloc = 0; errmin = 2.0e30; loc2 = dp[i]->locs[k]; for(j=0; j<ncandp; j++){ /* for each PREVIOUS candidate... */ /* Get error due to absolute time difference of peaks. */ loc1 = dp[i-1]->locs[j]; if(loc1) { /* did previous frame have valid candidates? */ if(loc1 > loc2){ /* compute fractional frequency change */ ttemp = loc1 - loc2; ftemp = ttemp/loc1; } else { ttemp = loc2 - loc1; ftemp = ttemp/loc2; } } else { /* don't penalize for F0 hops */ ftemp = 0; } /* Is this frequency change unusually large? */ ferr = (ftemp > fthresh)? ftemp * freqwt : 0 ; /* Add in cumulative error associated with previous peak. */ err = ferr + dp[i-1]->dpvals[j]; if(err < errmin){ /* find min. error */ errmin = err; minloc = j; } } /* Now have found the best path from this cand. to prev. frame */ if( i ){ dp[i]->dpvals[k] = errmin + (PEAK_WT * dp[i]->mpvals[k]); dp[i]->prept[k] = minloc;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -