⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ptrack.c

📁 speech signal process tools
💻 C
字号:
/*	 This material contains proprietary software of Entropic Processing, Inc. Any reproduction, distribution, or publication without the the prior	    written permission of Entropic Processing, Inc. is strictly prohibited. Any public distribution of copies of this work authorized in writing by Entropic Processing, Inc. must bear the notice			       	Copyright 1986, Entropic Proccessing, Inc	(C) 1985, Entropic Processing, Inc.         *//*---------------------------------------------------------------------------+|                                                                            ||  PTRACK - perform pitch tracking using coherence method		     || written by: Shankar Narayan, Entropic Processing, Inc.		     || Modified by David Burton to conform to SPS standards -     4/23/86         ||									     || Usage: ptrack [-h hisfile] [-x level] [-b bne] [-p parfile] in out	     ||   If specified, hisfile gets a log of how each pulse is processed.	     ||   -x specifies a level for debug messages.				     ||   -p specifies an alternate parameter file.				     ||   -b specify background noise power.					     ||   in specifies the input sampled data file.				     ||   out specifies the output pitch file.				     |----------------------------------------------------------------------------*/#ifdef SCCSstatic char *sccsid = "@(#)ptrack.c	1.7 8/20/86 EPI";#endif#include <sps/sps.h>#include <sps/pitch.h>#include <stdio.h>#define uuu   0#define uuu1  1#define uuu2  2#define uuuv  3#define uuvv  4#define uvvv  5#define uvvu  6#define vvv   7#define vvvu  8#define vvuu  9#define vvuu1 10#define vvuv  11#define vuuv  12#define vvv1  13#define Fprintf (void)fprintf#define nolookback 0#include <sys/types.h>#include <sys/times.h>extern char *v_chr[];extern int  disp;int     freq_knt[17], prev_ptrack_state, freq[14][14];float   time_spent[14][14];struct tms  timebuf;time_t t0, tf;/* SPS Parameters */int     frmlen, stepsz, wflt_order, u_pflt_order, v_pflt_order;int     pt_wsize, pitch_range_min, pitch_range_max;int     debug_level = 0, coh_data = BOTH;short   p_method, win_type, coh_method, dcrem, pb_offset, pulse_search,        min_coh_len, max_coh_len;double  coh_window_frac, harmonic_mult, tm_uv_thresh, am_uv_thresh, gain_halt       ,hp_thresh;#define NPR 3float   lpf_num[NPR], lpf_den[NPR], uv_thresh;int     min_pitch, max_pitch;int     ptrack_state = uuu;/*Assuming a frame length of 180, pitch buffer stores a total of 540 samples.A location called "smpnum" stores the absolute sample number (in the data file)being processed. Pitch buffer stores speech data starting from location(smpnum-180). The absolute value of location referred by variable "pulse_locn"is (smpnum+pulse_locn).*/#define BUF_SIZE 540int     pulse_locn, smpnum;float   rawdata[BUF_SIZE], resdata[BUF_SIZE];/* buffers used for storing pitch info for 4 frames */int     prev_frame_size[4], prev_frame_locn[4], prev_frame_type[4];/* Important measurements provided by Basic pulse extractor */float   RELENT_min, opt_ikdst, peakcor;int     best_pitch, prev_best_pitch;int     n_pit_rec = 0;FILE * outptr = stdout, *anaptr, *hptr = NULL, *ifd = stdin;void parm_err ();double  bne = 0, bne_thr = 0;main (argc, argv)int     argc;char  **argv;{    float   rdata[BUF_SIZE], pdata[BUF_SIZE], coh_dst[2];    int     i, j, c, matsiz, istan, nskip, nan, ns, ds;    int     frame_locn[2], frame_size[2], frame_type[2], pitch_buf_size;    char    datatype;    struct pitch   *p;/*  Process headers and generate new header */#include "hdrproc.c"    p = allo_pitch_rec ();/*  initialize pitch trajectory buffers */    for (i = 0; i < 4; i++)	prev_frame_size[i] = 0;    min_pitch = pitch_range_min;    max_pitch = pitch_range_max;/* skip records if needed*/    /* first determine type of data */    if (ih -> common.ndouble)	datatype = 'd';    if (ih -> common.nfloat)	datatype = 'f';    if (ih -> common.nlong)	datatype = 'l';    if (ih -> common.nshort)	datatype = 'w';    if (ih -> common.nchar)	datatype = 'b';    if (debug_level)	Fprintf (stderr, "datatype is %c\n", datatype);/* skip initial data, if necessary */    nskip = istan - frmlen - 45;    if (nskip > 0)	skiprec (ifd, (long) nskip, size_rec(ih));    /* Initially read three frames of data */    pitch_buf_size = 3 * frmlen;    smpnum = istan - pitch_buf_size;    prev_frame_locn[0] = prev_frame_locn[1] = istan - frmlen;    prev_frame_locn[2] = istan - 0.5 * frmlen;    pulse_locn = pitch_buf_size;    times (&timebuf);    t0 = timebuf.tms_utime;    while (smpnum < istan + nan)    {	/* if pitch buffer is not full, read new data */	if (pulse_locn > 0)	{	    getpdata (rdata, pdata, datatype, pulse_locn);	    smpnum = smpnum + pulse_locn;	    for (i = 0, j = pulse_locn; i < pitch_buf_size - pulse_locn; i++)	    {		rawdata[i] = rawdata[j];		resdata[i] = resdata[j++];	    }	    for (i = 0, j = pitch_buf_size - pulse_locn; i < pulse_locn; i++)	    {		rawdata[j] = rdata[i];		resdata[j++] = pdata[i];	    }	    pulse_locn = 0;	}	if (ptrack_state == uuu || ptrack_state == uuu1)	    get_raw_pitch (u_pflt_order, nolookback);	else	    get_raw_pitch (v_pflt_order, nolookback);	prev_ptrack_state = ptrack_state;	track_pitch (resdata, frame_locn, frame_size, frame_type, coh_dst);	if (debug_level)	    printf ("frame_size[0] is %d\n", frame_size[0]);	if (frame_size[0] > -1)	    cleanup (frame_locn[0], frame_size[0], frame_type[0], p, coh_dst);	times (&timebuf);	tf = timebuf.tms_utime;	time_spent[prev_ptrack_state][ptrack_state] += tf - t0;	freq[prev_ptrack_state][ptrack_state]++;	t0 = tf;    }    if (hptr)    {	fprintf (hptr, "\n\nDisp = PULSE_DUR_EST - PEAK_LOCN in RES around (PULSE_LOCN + PULSE_DUR_EST)");	fprintf (hptr, "\n\n\t\tDisp\tfreq(Disp)\n");	for (i = 0; i < 17; i++)	    fprintf (hptr, "\t\t%3d\t%5d\n", i - 8, freq_knt[i]);	fprintf (hptr, "\n\n\tPRE_STATE\tNEXT_STATE\tFREQ\tTotal_CPU_time\tAvg_CPU_time (msec)\n");	for (i = 0; i < 14; i++)	    for (j = 0; j < 14; j++)	    {		int     k, l;		if (freq[i][j] > 0)		{		    k = 1000.0 * time_spent[i][j] / 60.0 + 0.5;		    l = 1000.0 * time_spent[i][j] / (60 * freq[i][j]) + 0.5;		    fprintf (hptr, "\n\t%s\t\t%s\t\t%4d\t%8d\t%8d",			    v_chr[i], v_chr[j], freq[i][j], k, l);		}	    }    }    /* Finish header, write it out, copy pitch data to true output file */    oh -> common.ndrec = n_pit_rec;    write_header (oh, outptr);    rewind (anaptr);    tmhd = read_header (anaptr);    for (i = 0; i < n_pit_rec; i++)    {	get_pitch_rec (p, anaptr);	put_pitch_rec (p, outptr);/*    printf("wrote PIT record number %d\n", i);*/    }    (void) unlink (tmp_name);    exit (0);    /* NOTREACHED */}short   getmthd (pstring)char   *pstring;{    char   *string;    string = getsym_s (pstring);    if (strcmp (string, "C_PDM") == 0)	return C_PDM;    else	parm_err (string, pstring);}void parm_err (string, pstring)char   *string, *pstring;{    Fprintf (stderr, "ptrack: unknown string %s for %s in SPS parameter file\n",	    string, pstring);}put_pitch (fd, p, tag, pval, coh_dst)FILE * fd;struct pitch   *p;int     tag;float   pval, coh_dst[];{    p -> tag = tag;    p -> pulse_dist = pval;    p -> raw_pulse_dist = coh_dst[0];    put_pitch_rec (p, fd);    if (debug_level)	Fprintf (stderr, "put_pitch: pitch pos=%d, pval=%f, coh_dst = %f\n",		tag, pval, coh_dst);    n_pit_rec++;    return;}#define	locn_offset	5get_raw_pitch (fltorder, lookback_flag)int     fltorder, lookback_flag;{    extern float    peakcor, RELENT_min;    extern int  initflag, best_pitch, prev_best_pitch;    extern char *v_chr[];    float   raw_RELENT[162], res_RELENT[162], ikdstn[162],            raw_Rxy[162], res_Rxy[162], Newton_Raphson ();    int     pitch, best_pitch1;    switch (coh_data)    {	case RAW: /* Compute coherence of raw speech */	    get_coherence (rawdata, fltorder, lookback_flag, raw_RELENT, raw_Rxy, ikdstn);/*  Get pitch estimate from raw speech */	    BPE_logic (fltorder, raw_RELENT, ikdstn, raw_Rxy, raw_Rxy, &best_pitch, 'r');	    peakcor = raw_Rxy[best_pitch];	    RELENT_min = raw_RELENT[best_pitch];	    break;	case RESIDUAL: /* compute coherence of residual speech */	    get_coherence (resdata, fltorder, lookback_flag, res_RELENT, res_Rxy, ikdstn);/*  Get pitch estimate from residual speech */	    BPE_logic (fltorder, res_RELENT, ikdstn, res_Rxy, res_Rxy, &best_pitch, 'w');	    RELENT_min = res_RELENT[best_pitch];	    peakcor = res_Rxy[best_pitch];	    break;	case BOTH: /* Compute coherence of raw speech */	    get_coherence (rawdata, fltorder, lookback_flag, raw_RELENT, raw_Rxy, ikdstn);/* compute coherence of residual speech */	    get_coherence (resdata, fltorder, lookback_flag, res_RELENT, res_Rxy, ikdstn);/*  Get pitch estimate from residual speech */	    BPE_logic (fltorder, res_RELENT, ikdstn, raw_Rxy, res_Rxy, &best_pitch, 'w');	    RELENT_min = res_RELENT[best_pitch];	    peakcor = res_Rxy[best_pitch];/*  If residual speech is not highly periodic, get a pitch estimate from raw speech */	    if (RELENT_min > uv_thresh / 3)	    {		BPE_logic (fltorder, raw_RELENT, ikdstn, raw_Rxy, raw_Rxy, &best_pitch1, 'r');		if (raw_RELENT[best_pitch1] < uv_thresh / 3)		{		    best_pitch = best_pitch1;		    peakcor = raw_Rxy[best_pitch];		    RELENT_min = raw_RELENT[best_pitch];		}	    }/* If residual speech is not coherent, check if raw speech is     coherent for some lag around the expected value */	    if (RELENT_min > uv_thresh)	    {		int     exp_pitch = best_pitch, pmin, pmax;		if (ptrack_state == vvv)		    exp_pitch = prev_best_pitch;		pmin = (exp_pitch - 4 > min_pitch) ? exp_pitch - 4 : min_pitch;		pmax = (exp_pitch + 4 < max_pitch) ? exp_pitch + 4 : max_pitch;		for (pitch = pmin; pitch <= pmax; pitch++)		{		    float   temp = raw_RELENT[pitch];		    if (raw_Rxy[pitch] > 0.0 && temp < uv_thresh && temp < RELENT_min)		    {			best_pitch = pitch;			RELENT_min = temp;			peakcor = raw_Rxy[best_pitch];		    }		}		if (hptr && RELENT_min < uv_thresh)		    Fprintf (hptr, "BANG: RAW SPEECH USEFUL\n");	    }    }    opt_ikdst = ikdstn[best_pitch];    if (hptr)	Fprintf (hptr,		"[%d,%s]%s:\tPitch =%3d   RELENTmin =%6.3f   Cxy(0) =%6.3f   IKdist =%6.3f\n",		pulse_locn + smpnum, v_chr[ptrack_state], (lookback_flag == 1) ?		"(b)" : "", best_pitch, RELENT_min, peakcor, opt_ikdst);    if (debug_level)	Fprintf (stderr,		"[%d,%s]%s:\tPitch =%3d   RELENTmin =%6.3f   Cxy(0) =%6.3f   IKdist =%6.3f\n",		pulse_locn + smpnum, v_chr[ptrack_state], (lookback_flag == 1) ?		"(b)" : "", best_pitch, RELENT_min, peakcor, opt_ikdst);}get_coherence (data, fltorder, lookback_flag, RELENT, Rxy, ikdstn)int     fltorder, lookback_flag;float   data[], RELENT[], Rxy[], ikdstn[];{    extern int  initflag;    int     pitch, vct_size, fbeg, fast_update_flag;    fast_update_flag = 0;    for (pitch = min_pitch - 1; pitch <= max_pitch + 1; pitch++)    {	vct_size = 6 * pitch / 10;	if (vct_size < pitch_range_min)	    vct_size = pitch_range_min;	fbeg = frmlen + pulse_locn - fltorder - locn_offset;	if (lookback_flag)	    fbeg -= pitch;	/* 	 The following is done to ensure that only speech in the data	 buffer is accessed during lookback mode	 */	if (fbeg > 0)	{	    initflag = 2;	    fast_coher (&data[fbeg], vct_size, fltorder, pitch,		    fast_update_flag, lookback_flag,		    &RELENT[pitch], &ikdstn[pitch], &Rxy[pitch]);	    fast_update_flag = 1;	}	else	{	    RELENT[pitch] = 1.0;	    ikdstn[pitch] = 1.0;	    Rxy[pitch] = 0.0;	}    }}BPE_logic (fltorder, RELENT, ikdstn, Rxy1, Rxy2, ptr_BPE_pit_est, data_type)float   RELENT[], ikdstn[], Rxy1[], Rxy2[];int    *ptr_BPE_pit_est, fltorder, data_type;{    float   t1, t2, t3, int_RELENT, int_pitch, fer, ber, REL_min;    int     pit_tab[100], best_PIT = min_pitch, ptab_size = 0, pitch;    /*  interpolate RELENT at local minima */    REL_min = 1.0;    fer = RELENT[min_pitch] - RELENT[min_pitch - 1];    for (pitch = min_pitch; pitch <= max_pitch; pitch++)    {	ber = RELENT[pitch] - RELENT[pitch + 1];	if (Rxy1[pitch] > 0 && Rxy2[pitch] > 0 && fer < 0.0 && ber <= 0.0)	{	    int_RELENT = RELENT[pitch];	    if (fltorder == 0)	    {		/* interpolate Rxy[pitch] function */		t1 = Rxy2[pitch + 1] - Rxy2[pitch - 1];		t2 = Rxy2[pitch + 1] + Rxy2[pitch - 1] - 2 * Rxy2[pitch];		int_pitch = pitch - 0.5 * t1 / t2;		t3 = Rxy2[pitch] - 0.125 * t1 * t1 / t2;		int_RELENT = 1.0 - t3 * t3;	    }	    if (int_RELENT < uv_thresh)	    {		pit_tab[ptab_size++] = pitch;		if (hptr)		    Fprintf (hptr,			    "\tPitch(%c)=%3d  RELENT =%8.3f  ikdst =%8.3f  Cxy(0) =%8.3f\n",			    data_type, pitch, int_RELENT, ikdstn[pitch], Rxy2[pitch]);	    }	    if (int_RELENT < REL_min)	    {		best_PIT = pitch;		REL_min = int_RELENT;	    }	    RELENT[pitch] = int_RELENT;	}	fer = -ber;    }    if (ptab_size > 1)	best_PIT = scan_pit_tab (pit_tab, ptab_size, RELENT, ikdstn);    *ptr_BPE_pit_est = best_PIT;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -