cont_ad_base.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 1,062 行 · 第 1/3 页

C
1,062
字号
     * PWP: in SReed's original, he cleared the histogram here.     * I can't fathom why.     */    return 0;}/* * Main silence/speech region detection routine.  If currently in * SILENCE state, switch to SPEECH state if a window (r->winsize) * of frames is mostly non-silence.  If in SPEECH state, switch to * SILENCE state if the window is mostly silence. */static void boundary_detect (cont_ad_t *r, int32 frm){    spseg_t *seg;    int32 f;        assert (r->n_other >= 0);        r->win_validfrm++;    if (r->state == CONT_AD_STATE_SIL) {	if (r->frm_pow[frm] >= r->thresh_speech) {	    r->n_other++;	    r->n_in_a_row++;	} else {	    r->n_in_a_row = 0;	}#ifdef CONT_AD_DEBUG	printf (" . %2d.%2d", r->frm_pow[frm], r->n_other);#endif    } else {	if (r->frm_pow[frm] <= r->thresh_sil) {	    r->n_other++;	    r->n_in_a_row++;	} else {	    r->n_in_a_row = 0;	}#ifdef CONT_AD_DEBUG	printf (" # %2d.%2d", r->frm_pow[frm], r->n_other);#endif    }    fflush (stdout);    if (r->win_validfrm < r->winsize)	/* Not reached full analysis window size */	return;    assert (r->win_validfrm == r->winsize);//    fprintf(stderr, "State is %s n_other is %d\n", r->state == CONT_AD_STATE_SIL ?//	    "silence" : "speech", r->n_other);        if (r->state == CONT_AD_STATE_SIL) {	/* Currently in SILENCE state */	if (r->n_frm >= r->winsize + r->leader) {	    if (r->n_other >= r->speech_onset) {		/* Speech detected; create speech segment description */		seg = malloc (sizeof(*seg));				seg->startfrm = r->win_startfrm - r->leader;		if (seg->startfrm < 0)		    seg->startfrm += CONT_AD_ADFRMSIZE;		seg->nfrm = r->leader + r->winsize;		seg->next = NULL;		if (! r->spseg_head)		    r->spseg_head = seg;		else		    r->spseg_tail->next = seg;		r->spseg_tail = seg;				r->state = CONT_AD_STATE_SPEECH;				/* Now in SPEECH state; want to look for silence from end of this window */		r->win_validfrm = 1;		r->win_startfrm = frm;		/* Count #sil frames remaining in reduced window (of 1 frame) */		r->n_other = (r->frm_pow[frm] <= r->thresh_sil) ? 1 : 0;		r->n_in_a_row = r->n_other;	    }	}    } else {	if (r->n_other >= r->sil_onset) {	    /* End of speech detected; speech->sil transition */	    r->spseg_tail->nfrm += r->trailer;	    	    r->state = CONT_AD_STATE_SIL;	    	    /* Now in SILENCE state; start looking for speech trailer+leader frames later */	    r->win_validfrm -= (r->trailer + r->leader - 1);	    r->win_startfrm += (r->trailer + r->leader - 1);	    if (r->win_startfrm >= CONT_AD_ADFRMSIZE)		r->win_startfrm -= CONT_AD_ADFRMSIZE;	    	    /* Count #speech frames remaining in reduced window */	    r->n_other = 0;	    r->n_in_a_row = 0;	    for (f = r->win_startfrm;; ) {		if (r->frm_pow[f] >= r->thresh_speech) {		    r->n_other++;		    r->n_in_a_row++;		} else {		    r->n_in_a_row = 0;		}		if (f == frm)		    break;				f++;		if (f >= CONT_AD_ADFRMSIZE)		    f = 0;	    }	} else {	    r->spseg_tail->nfrm++;	}    }    /* Get rid of oldest frame in analysis window */    if (r->state == CONT_AD_STATE_SIL) {	if (r->frm_pow[r->win_startfrm] >= r->thresh_speech) {	    r->n_other--;	    if (r->n_in_a_row > 0)		r->n_in_a_row--;	}    } else {	if (r->frm_pow[r->win_startfrm] <= r->thresh_sil) {	    r->n_other--;	    if (r->n_in_a_row > 0)		r->n_in_a_row--;	}    }    r->win_validfrm--;    r->win_startfrm++;    if (r->win_startfrm >= CONT_AD_ADFRMSIZE)	r->win_startfrm = 0;}static int32 max_siglvl (cont_ad_t *r, int32 startfrm, int32 nfrm){    int32 siglvl, i, f;    siglvl = 0;    if (nfrm > 0) {	for (i = 0, f = startfrm; i < nfrm; i++, f++) {	    if (f >= CONT_AD_ADFRMSIZE)		f -= CONT_AD_ADFRMSIZE;	    if (r->frm_pow[f] > siglvl)		siglvl = r->frm_pow[f];	}    }    return siglvl;}void get_audio_data(cont_ad_t *r, int16 *buf, int32 max) {}/* * Main function called by the application to filter out silence regions.  Maintains a * linked list of speech segments pointing into r->adbuf and feeds data to application * from them. */int32 cont_ad_read (cont_ad_t *r, int16 *buf, int32 max){    int32 head, tail, tailfrm, len, flen, eof;    int32 i, f, l;    spseg_t *seg;    int num_to_copy = 0, num_left = max;        if (max < r->spf) {	fflush(stdout);	fprintf(stderr, "cont_ad_read requires buffer of at least %d samples\n", r->spf);	abort();    }        /*     * First read as much of raw A/D as possible and available.  adbuf is not really a     * circular buffer, so may have to read in two steps for wrapping around.     */    head = r->headfrm * r->spf;    tail = head + r->n_sample;    len = r->n_sample - (r->n_frm * r->spf);	/* #partial frame samples at the tail */    assert ((len >= 0) && (len < r->spf));        eof = 0;	/* Clear end-of-file indication */    if (tail < r->adbufsize) {      if (r->adfunc != NULL) {	if ((l = (*(r->adfunc))(r->ad, r->adbuf+tail, r->adbufsize - tail)) < 0) {	  eof = 1;	  l = 0;	}      } else {	num_to_copy = r->adbufsize - tail;	num_left -= num_to_copy;	if (num_to_copy > max) {	  num_to_copy = max;	  num_left = 0;	}	memcpy(r->adbuf+tail, buf, num_to_copy*sizeof(int16));	memcpy(buf, buf+num_to_copy, num_left*sizeof(int16));	l = num_to_copy;      }#ifdef CONT_AD_RAWDUMP      if ((l > 0) && rawfp)	fwrite (r->adbuf+tail, sizeof(int16), l, rawfp);#endif      tail += l;      len += l;      r->n_sample += l;    }    if ((tail >= r->adbufsize) && (! eof)) {      tail -= r->adbufsize;      if (tail < head) {	if (r->adfunc != NULL) {	  if ((l = (*(r->adfunc))(r->ad, r->adbuf+tail, head - tail)) < 0) {	    eof = 1;	    l = 0;	  }	} else {	  num_to_copy = head-tail;	  if (num_to_copy > num_left)	    num_to_copy = num_left;	  memcpy(r->adbuf+tail, buf, num_to_copy*sizeof(int16));	  l = num_to_copy;	}#ifdef CONT_AD_RAWDUMP	if ((l > 0) && rawfp)	  fwrite (r->adbuf+tail, sizeof(int16), l, rawfp);#endif	tail += l;	len += l;	r->n_sample += l;      }    }      /* Compute frame power for unprocessed+new data and find speech/silence boundaries */    tailfrm = (r->headfrm + r->n_frm);	/* Next free frame slot to be filled */    if (tailfrm >= CONT_AD_ADFRMSIZE)	tailfrm -= CONT_AD_ADFRMSIZE;    for (; len >= r->spf; len -= r->spf) {	compute_frame_pow (r, tailfrm);	r->n_frm++;	r->tot_frm++;		boundary_detect (r, tailfrm);	/* find speech/sil change, if any */	if (++tailfrm >= CONT_AD_ADFRMSIZE)	    tailfrm = 0;	/* RKM: 2004-06-23: Moved this block from outside the enclosing loop */	/* Update thresholds if time to do so */	if (r->thresh_update <= 0) {	  find_thresh (r);	  decay_hist (r);	  r->thresh_update = CONT_AD_THRESH_UPDATE;	  	  /* Since threshold has been updated, recompute r->n_other */	  r->n_other = 0;	  r->n_in_a_row = 0;	  if (r->state == CONT_AD_STATE_SIL) {	    for (i = r->win_validfrm, f = r->win_startfrm; i > 0; --i) {		if (r->frm_pow[f] >= r->thresh_speech) {		    r->n_other++;		    r->n_in_a_row++;		} else {		    r->n_in_a_row = 0;		}		f++;		if (f >= CONT_AD_ADFRMSIZE)		    f = 0;	    }	} else {	    for (i = r->win_validfrm, f = r->win_startfrm; i > 0; --i) {		if (r->frm_pow[f] <= r->thresh_sil) {		    r->n_other++;		    r->n_in_a_row++;		} else {		    r->n_in_a_row = 0;		}		f++;		if (f >= CONT_AD_ADFRMSIZE)		    f = 0;	    }	  }	}    }    /*     * At last ready to copy speech data, if any.  Skip past any silence before the     * first available speech segment.  If no speech segment, simply consume as much of     * silence as possible.     */    if ((seg = r->spseg_head) == NULL) {	assert (r->state == CONT_AD_STATE_SIL);		/* No speech segment available; consume accumulated silence if any */	flen = r->n_frm - (r->winsize + r->leader - 1);	if (flen > 0) {	/* Can consume flen silence frames from current head of data */	    r->siglvl = max_siglvl (r, r->headfrm, flen);	    r->n_frm -= flen;	    r->n_sample -= (flen * r->spf);	    r->headfrm += flen;	    if (r->headfrm >= CONT_AD_ADFRMSIZE)		r->headfrm -= CONT_AD_ADFRMSIZE;	}	len = 0;	/* #samples being copied */    } else {	/* Copy integral #frames of speech data pointed to by seg (may be 0-length!) */	flen = max / r->spf;	if (flen > seg->nfrm)	    flen = seg->nfrm;	len = (flen * r->spf);	/* #samples being copied */	r->siglvl = max_siglvl (r, seg->startfrm, flen);	/* Copy data to buf.  If seg wrapped around adbuf break into two operations */	if (seg->startfrm + flen > CONT_AD_ADFRMSIZE) {	    f = CONT_AD_ADFRMSIZE - seg->startfrm;	    l = (f * r->spf);	    memcpy (buf, r->adbuf + (seg->startfrm * r->spf), l * sizeof(int16));	    	    buf += l;	    seg->startfrm = 0;	/* Wrapped around */	    seg->nfrm -= f;	    flen -= f;	}	if (flen > 0) {	    l = (flen * r->spf);	    memcpy (buf, r->adbuf + (seg->startfrm * r->spf), l * sizeof(int16));	    seg->startfrm += flen;	    if (seg->startfrm >= CONT_AD_ADFRMSIZE)		seg->startfrm -= CONT_AD_ADFRMSIZE;	    seg->nfrm -= flen;	}		/* Update r->headfrm to seg->startfrm; fix r->n_frm, r->n_sample accordingly */	if ((f = (seg->startfrm - r->headfrm)) < 0)	    f += CONT_AD_ADFRMSIZE;	r->n_frm -= f;	r->n_sample -= (f * r->spf);	r->headfrm = seg->startfrm;	assert ((r->n_frm >= 0) && (r->n_sample >= 0));		/* Free seg if empty and not recording into it */	if ((seg->nfrm == 0) && (seg->next || (r->state == CONT_AD_STATE_SIL))) {	    r->spseg_head = seg->next;	    if (! seg->next)		r->spseg_tail = NULL;	    free (seg);	}    }    assert (r->win_validfrm <= r->n_frm);    /* Update timestamp.  Total raw A/D read - those remaining to be consumed */    r->read_ts = (r->tot_frm - r->n_frm) * r->spf;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?