📄 ptrack.c
字号:
/* This material contains proprietary software of Entropic Processing, Inc. Any reproduction, distribution, or publication without the the prior written permission of Entropic Processing, Inc. is strictly prohibited. Any public distribution of copies of this work authorized in writing by Entropic Processing, Inc. must bear the notice Copyright 1986, Entropic Proccessing, Inc (C) 1985, Entropic Processing, Inc. *//*---------------------------------------------------------------------------+| || PTRACK - perform pitch tracking using coherence method || written by: Shankar Narayan, Entropic Processing, Inc. || Modified by David Burton to conform to SPS standards - 4/23/86 || || Usage: ptrack [-h hisfile] [-x level] [-b bne] [-p parfile] in out || If specified, hisfile gets a log of how each pulse is processed. || -x specifies a level for debug messages. || -p specifies an alternate parameter file. || -b specify background noise power. || in specifies the input sampled data file. || out specifies the output pitch file. |----------------------------------------------------------------------------*/#ifdef SCCSstatic char *sccsid = "@(#)ptrack.c 1.7 8/20/86 EPI";#endif#include <sps/sps.h>#include <sps/pitch.h>#include <stdio.h>#define uuu 0#define uuu1 1#define uuu2 2#define uuuv 3#define uuvv 4#define uvvv 5#define uvvu 6#define vvv 7#define vvvu 8#define vvuu 9#define vvuu1 10#define vvuv 11#define vuuv 12#define vvv1 13#define Fprintf (void)fprintf#define nolookback 0#include <sys/types.h>#include <sys/times.h>extern char *v_chr[];extern int disp;int freq_knt[17], prev_ptrack_state, freq[14][14];float time_spent[14][14];struct tms timebuf;time_t t0, tf;/* SPS Parameters */int frmlen, stepsz, wflt_order, u_pflt_order, v_pflt_order;int pt_wsize, pitch_range_min, pitch_range_max;int debug_level = 0, coh_data = BOTH;short p_method, win_type, coh_method, dcrem, pb_offset, pulse_search, min_coh_len, max_coh_len;double coh_window_frac, harmonic_mult, tm_uv_thresh, am_uv_thresh, gain_halt ,hp_thresh;#define NPR 3float lpf_num[NPR], lpf_den[NPR], uv_thresh;int min_pitch, max_pitch;int ptrack_state = uuu;/*Assuming a frame length of 180, pitch buffer stores a total of 540 samples.A location called "smpnum" stores the absolute sample number (in the data file)being processed. Pitch buffer stores speech data starting from location(smpnum-180). The absolute value of location referred by variable "pulse_locn"is (smpnum+pulse_locn).*/#define BUF_SIZE 540int pulse_locn, smpnum;float rawdata[BUF_SIZE], resdata[BUF_SIZE];/* buffers used for storing pitch info for 4 frames */int prev_frame_size[4], prev_frame_locn[4], prev_frame_type[4];/* Important measurements provided by Basic pulse extractor */float RELENT_min, opt_ikdst, peakcor;int best_pitch, prev_best_pitch;int n_pit_rec = 0;FILE * outptr = stdout, *anaptr, *hptr = NULL, *ifd = stdin;void parm_err ();double bne = 0, bne_thr = 0;main (argc, argv)int argc;char **argv;{ float rdata[BUF_SIZE], pdata[BUF_SIZE], coh_dst[2]; int i, j, c, matsiz, istan, nskip, nan, ns, ds; int frame_locn[2], frame_size[2], frame_type[2], pitch_buf_size; char datatype; struct pitch *p;/* Process headers and generate new header */#include "hdrproc.c" p = allo_pitch_rec ();/* initialize pitch trajectory buffers */ for (i = 0; i < 4; i++) prev_frame_size[i] = 0; min_pitch = pitch_range_min; max_pitch = pitch_range_max;/* skip records if needed*/ /* first determine type of data */ if (ih -> common.ndouble) datatype = 'd'; if (ih -> common.nfloat) datatype = 'f'; if (ih -> common.nlong) datatype = 'l'; if (ih -> common.nshort) datatype = 'w'; if (ih -> common.nchar) datatype = 'b'; if (debug_level) Fprintf (stderr, "datatype is %c\n", datatype);/* skip initial data, if necessary */ nskip = istan - frmlen - 45; if (nskip > 0) skiprec (ifd, (long) nskip, size_rec(ih)); /* Initially read three frames of data */ pitch_buf_size = 3 * frmlen; smpnum = istan - pitch_buf_size; prev_frame_locn[0] = prev_frame_locn[1] = istan - frmlen; prev_frame_locn[2] = istan - 0.5 * frmlen; pulse_locn = pitch_buf_size; times (&timebuf); t0 = timebuf.tms_utime; while (smpnum < istan + nan) { /* if pitch buffer is not full, read new data */ if (pulse_locn > 0) { getpdata (rdata, pdata, datatype, pulse_locn); smpnum = smpnum + pulse_locn; for (i = 0, j = pulse_locn; i < pitch_buf_size - pulse_locn; i++) { rawdata[i] = rawdata[j]; resdata[i] = resdata[j++]; } for (i = 0, j = pitch_buf_size - pulse_locn; i < pulse_locn; i++) { rawdata[j] = rdata[i]; resdata[j++] = pdata[i]; } pulse_locn = 0; } if (ptrack_state == uuu || ptrack_state == uuu1) get_raw_pitch (u_pflt_order, nolookback); else get_raw_pitch (v_pflt_order, nolookback); prev_ptrack_state = ptrack_state; track_pitch (resdata, frame_locn, frame_size, frame_type, coh_dst); if (debug_level) printf ("frame_size[0] is %d\n", frame_size[0]); if (frame_size[0] > -1) cleanup (frame_locn[0], frame_size[0], frame_type[0], p, coh_dst); times (&timebuf); tf = timebuf.tms_utime; time_spent[prev_ptrack_state][ptrack_state] += tf - t0; freq[prev_ptrack_state][ptrack_state]++; t0 = tf; } if (hptr) { fprintf (hptr, "\n\nDisp = PULSE_DUR_EST - PEAK_LOCN in RES around (PULSE_LOCN + PULSE_DUR_EST)"); fprintf (hptr, "\n\n\t\tDisp\tfreq(Disp)\n"); for (i = 0; i < 17; i++) fprintf (hptr, "\t\t%3d\t%5d\n", i - 8, freq_knt[i]); fprintf (hptr, "\n\n\tPRE_STATE\tNEXT_STATE\tFREQ\tTotal_CPU_time\tAvg_CPU_time (msec)\n"); for (i = 0; i < 14; i++) for (j = 0; j < 14; j++) { int k, l; if (freq[i][j] > 0) { k = 1000.0 * time_spent[i][j] / 60.0 + 0.5; l = 1000.0 * time_spent[i][j] / (60 * freq[i][j]) + 0.5; fprintf (hptr, "\n\t%s\t\t%s\t\t%4d\t%8d\t%8d", v_chr[i], v_chr[j], freq[i][j], k, l); } } } /* Finish header, write it out, copy pitch data to true output file */ oh -> common.ndrec = n_pit_rec; write_header (oh, outptr); rewind (anaptr); tmhd = read_header (anaptr); for (i = 0; i < n_pit_rec; i++) { get_pitch_rec (p, anaptr); put_pitch_rec (p, outptr);/* printf("wrote PIT record number %d\n", i);*/ } (void) unlink (tmp_name); exit (0); /* NOTREACHED */}short getmthd (pstring)char *pstring;{ char *string; string = getsym_s (pstring); if (strcmp (string, "C_PDM") == 0) return C_PDM; else parm_err (string, pstring);}void parm_err (string, pstring)char *string, *pstring;{ Fprintf (stderr, "ptrack: unknown string %s for %s in SPS parameter file\n", string, pstring);}put_pitch (fd, p, tag, pval, coh_dst)FILE * fd;struct pitch *p;int tag;float pval, coh_dst[];{ p -> tag = tag; p -> pulse_dist = pval; p -> raw_pulse_dist = coh_dst[0]; put_pitch_rec (p, fd); if (debug_level) Fprintf (stderr, "put_pitch: pitch pos=%d, pval=%f, coh_dst = %f\n", tag, pval, coh_dst); n_pit_rec++; return;}#define locn_offset 5get_raw_pitch (fltorder, lookback_flag)int fltorder, lookback_flag;{ extern float peakcor, RELENT_min; extern int initflag, best_pitch, prev_best_pitch; extern char *v_chr[]; float raw_RELENT[162], res_RELENT[162], ikdstn[162], raw_Rxy[162], res_Rxy[162], Newton_Raphson (); int pitch, best_pitch1; switch (coh_data) { case RAW: /* Compute coherence of raw speech */ get_coherence (rawdata, fltorder, lookback_flag, raw_RELENT, raw_Rxy, ikdstn);/* Get pitch estimate from raw speech */ BPE_logic (fltorder, raw_RELENT, ikdstn, raw_Rxy, raw_Rxy, &best_pitch, 'r'); peakcor = raw_Rxy[best_pitch]; RELENT_min = raw_RELENT[best_pitch]; break; case RESIDUAL: /* compute coherence of residual speech */ get_coherence (resdata, fltorder, lookback_flag, res_RELENT, res_Rxy, ikdstn);/* Get pitch estimate from residual speech */ BPE_logic (fltorder, res_RELENT, ikdstn, res_Rxy, res_Rxy, &best_pitch, 'w'); RELENT_min = res_RELENT[best_pitch]; peakcor = res_Rxy[best_pitch]; break; case BOTH: /* Compute coherence of raw speech */ get_coherence (rawdata, fltorder, lookback_flag, raw_RELENT, raw_Rxy, ikdstn);/* compute coherence of residual speech */ get_coherence (resdata, fltorder, lookback_flag, res_RELENT, res_Rxy, ikdstn);/* Get pitch estimate from residual speech */ BPE_logic (fltorder, res_RELENT, ikdstn, raw_Rxy, res_Rxy, &best_pitch, 'w'); RELENT_min = res_RELENT[best_pitch]; peakcor = res_Rxy[best_pitch];/* If residual speech is not highly periodic, get a pitch estimate from raw speech */ if (RELENT_min > uv_thresh / 3) { BPE_logic (fltorder, raw_RELENT, ikdstn, raw_Rxy, raw_Rxy, &best_pitch1, 'r'); if (raw_RELENT[best_pitch1] < uv_thresh / 3) { best_pitch = best_pitch1; peakcor = raw_Rxy[best_pitch]; RELENT_min = raw_RELENT[best_pitch]; } }/* If residual speech is not coherent, check if raw speech is coherent for some lag around the expected value */ if (RELENT_min > uv_thresh) { int exp_pitch = best_pitch, pmin, pmax; if (ptrack_state == vvv) exp_pitch = prev_best_pitch; pmin = (exp_pitch - 4 > min_pitch) ? exp_pitch - 4 : min_pitch; pmax = (exp_pitch + 4 < max_pitch) ? exp_pitch + 4 : max_pitch; for (pitch = pmin; pitch <= pmax; pitch++) { float temp = raw_RELENT[pitch]; if (raw_Rxy[pitch] > 0.0 && temp < uv_thresh && temp < RELENT_min) { best_pitch = pitch; RELENT_min = temp; peakcor = raw_Rxy[best_pitch]; } } if (hptr && RELENT_min < uv_thresh) Fprintf (hptr, "BANG: RAW SPEECH USEFUL\n"); } } opt_ikdst = ikdstn[best_pitch]; if (hptr) Fprintf (hptr, "[%d,%s]%s:\tPitch =%3d RELENTmin =%6.3f Cxy(0) =%6.3f IKdist =%6.3f\n", pulse_locn + smpnum, v_chr[ptrack_state], (lookback_flag == 1) ? "(b)" : "", best_pitch, RELENT_min, peakcor, opt_ikdst); if (debug_level) Fprintf (stderr, "[%d,%s]%s:\tPitch =%3d RELENTmin =%6.3f Cxy(0) =%6.3f IKdist =%6.3f\n", pulse_locn + smpnum, v_chr[ptrack_state], (lookback_flag == 1) ? "(b)" : "", best_pitch, RELENT_min, peakcor, opt_ikdst);}get_coherence (data, fltorder, lookback_flag, RELENT, Rxy, ikdstn)int fltorder, lookback_flag;float data[], RELENT[], Rxy[], ikdstn[];{ extern int initflag; int pitch, vct_size, fbeg, fast_update_flag; fast_update_flag = 0; for (pitch = min_pitch - 1; pitch <= max_pitch + 1; pitch++) { vct_size = 6 * pitch / 10; if (vct_size < pitch_range_min) vct_size = pitch_range_min; fbeg = frmlen + pulse_locn - fltorder - locn_offset; if (lookback_flag) fbeg -= pitch; /* The following is done to ensure that only speech in the data buffer is accessed during lookback mode */ if (fbeg > 0) { initflag = 2; fast_coher (&data[fbeg], vct_size, fltorder, pitch, fast_update_flag, lookback_flag, &RELENT[pitch], &ikdstn[pitch], &Rxy[pitch]); fast_update_flag = 1; } else { RELENT[pitch] = 1.0; ikdstn[pitch] = 1.0; Rxy[pitch] = 0.0; } }}BPE_logic (fltorder, RELENT, ikdstn, Rxy1, Rxy2, ptr_BPE_pit_est, data_type)float RELENT[], ikdstn[], Rxy1[], Rxy2[];int *ptr_BPE_pit_est, fltorder, data_type;{ float t1, t2, t3, int_RELENT, int_pitch, fer, ber, REL_min; int pit_tab[100], best_PIT = min_pitch, ptab_size = 0, pitch; /* interpolate RELENT at local minima */ REL_min = 1.0; fer = RELENT[min_pitch] - RELENT[min_pitch - 1]; for (pitch = min_pitch; pitch <= max_pitch; pitch++) { ber = RELENT[pitch] - RELENT[pitch + 1]; if (Rxy1[pitch] > 0 && Rxy2[pitch] > 0 && fer < 0.0 && ber <= 0.0) { int_RELENT = RELENT[pitch]; if (fltorder == 0) { /* interpolate Rxy[pitch] function */ t1 = Rxy2[pitch + 1] - Rxy2[pitch - 1]; t2 = Rxy2[pitch + 1] + Rxy2[pitch - 1] - 2 * Rxy2[pitch]; int_pitch = pitch - 0.5 * t1 / t2; t3 = Rxy2[pitch] - 0.125 * t1 * t1 / t2; int_RELENT = 1.0 - t3 * t3; } if (int_RELENT < uv_thresh) { pit_tab[ptab_size++] = pitch; if (hptr) Fprintf (hptr, "\tPitch(%c)=%3d RELENT =%8.3f ikdst =%8.3f Cxy(0) =%8.3f\n", data_type, pitch, int_RELENT, ikdstn[pitch], Rxy2[pitch]); } if (int_RELENT < REL_min) { best_PIT = pitch; REL_min = int_RELENT; } RELENT[pitch] = int_RELENT; } fer = -ber; } if (ptab_size > 1) best_PIT = scan_pit_tab (pit_tab, ptab_size, RELENT, ikdstn); *ptr_BPE_pit_est = best_PIT;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -