cont_ad_base.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 1,062 行 · 第 1/3 页
C
1,062 行
* PWP: in SReed's original, he cleared the histogram here. * I can't fathom why. */ return 0;}/* * Main silence/speech region detection routine. If currently in * SILENCE state, switch to SPEECH state if a window (r->winsize) * of frames is mostly non-silence. If in SPEECH state, switch to * SILENCE state if the window is mostly silence. */static void boundary_detect (cont_ad_t *r, int32 frm){ spseg_t *seg; int32 f; assert (r->n_other >= 0); r->win_validfrm++; if (r->state == CONT_AD_STATE_SIL) { if (r->frm_pow[frm] >= r->thresh_speech) { r->n_other++; r->n_in_a_row++; } else { r->n_in_a_row = 0; }#ifdef CONT_AD_DEBUG printf (" . %2d.%2d", r->frm_pow[frm], r->n_other);#endif } else { if (r->frm_pow[frm] <= r->thresh_sil) { r->n_other++; r->n_in_a_row++; } else { r->n_in_a_row = 0; }#ifdef CONT_AD_DEBUG printf (" # %2d.%2d", r->frm_pow[frm], r->n_other);#endif } fflush (stdout); if (r->win_validfrm < r->winsize) /* Not reached full analysis window size */ return; assert (r->win_validfrm == r->winsize);// fprintf(stderr, "State is %s n_other is %d\n", r->state == CONT_AD_STATE_SIL ?// "silence" : "speech", r->n_other); if (r->state == CONT_AD_STATE_SIL) { /* Currently in SILENCE state */ if (r->n_frm >= r->winsize + r->leader) { if (r->n_other >= r->speech_onset) { /* Speech detected; create speech segment description */ seg = malloc (sizeof(*seg)); seg->startfrm = r->win_startfrm - r->leader; if (seg->startfrm < 0) seg->startfrm += CONT_AD_ADFRMSIZE; seg->nfrm = r->leader + r->winsize; seg->next = NULL; if (! r->spseg_head) r->spseg_head = seg; else r->spseg_tail->next = seg; r->spseg_tail = seg; r->state = CONT_AD_STATE_SPEECH; /* Now in SPEECH state; want to look for silence from end of this window */ r->win_validfrm = 1; r->win_startfrm = frm; /* Count #sil frames remaining in reduced window (of 1 frame) */ r->n_other = (r->frm_pow[frm] <= r->thresh_sil) ? 1 : 0; r->n_in_a_row = r->n_other; } } } else { if (r->n_other >= r->sil_onset) { /* End of speech detected; speech->sil transition */ r->spseg_tail->nfrm += r->trailer; r->state = CONT_AD_STATE_SIL; /* Now in SILENCE state; start looking for speech trailer+leader frames later */ r->win_validfrm -= (r->trailer + r->leader - 1); r->win_startfrm += (r->trailer + r->leader - 1); if (r->win_startfrm >= CONT_AD_ADFRMSIZE) r->win_startfrm -= CONT_AD_ADFRMSIZE; /* Count #speech frames remaining in reduced window */ r->n_other = 0; r->n_in_a_row = 0; for (f = r->win_startfrm;; ) { if (r->frm_pow[f] >= r->thresh_speech) { r->n_other++; r->n_in_a_row++; } else { r->n_in_a_row = 0; } if (f == frm) break; f++; if (f >= CONT_AD_ADFRMSIZE) f = 0; } } else { r->spseg_tail->nfrm++; } } /* Get rid of oldest frame in analysis window */ if (r->state == CONT_AD_STATE_SIL) { if (r->frm_pow[r->win_startfrm] >= r->thresh_speech) { r->n_other--; if (r->n_in_a_row > 0) r->n_in_a_row--; } } else { if (r->frm_pow[r->win_startfrm] <= r->thresh_sil) { r->n_other--; if (r->n_in_a_row > 0) r->n_in_a_row--; } } r->win_validfrm--; r->win_startfrm++; if (r->win_startfrm >= CONT_AD_ADFRMSIZE) r->win_startfrm = 0;}static int32 max_siglvl (cont_ad_t *r, int32 startfrm, int32 nfrm){ int32 siglvl, i, f; siglvl = 0; if (nfrm > 0) { for (i = 0, f = startfrm; i < nfrm; i++, f++) { if (f >= CONT_AD_ADFRMSIZE) f -= CONT_AD_ADFRMSIZE; if (r->frm_pow[f] > siglvl) siglvl = r->frm_pow[f]; } } return siglvl;}void get_audio_data(cont_ad_t *r, int16 *buf, int32 max) {}/* * Main function called by the application to filter out silence regions. Maintains a * linked list of speech segments pointing into r->adbuf and feeds data to application * from them. */int32 cont_ad_read (cont_ad_t *r, int16 *buf, int32 max){ int32 head, tail, tailfrm, len, flen, eof; int32 i, f, l; spseg_t *seg; int num_to_copy = 0, num_left = max; if (max < r->spf) { fflush(stdout); fprintf(stderr, "cont_ad_read requires buffer of at least %d samples\n", r->spf); abort(); } /* * First read as much of raw A/D as possible and available. adbuf is not really a * circular buffer, so may have to read in two steps for wrapping around. */ head = r->headfrm * r->spf; tail = head + r->n_sample; len = r->n_sample - (r->n_frm * r->spf); /* #partial frame samples at the tail */ assert ((len >= 0) && (len < r->spf)); eof = 0; /* Clear end-of-file indication */ if (tail < r->adbufsize) { if (r->adfunc != NULL) { if ((l = (*(r->adfunc))(r->ad, r->adbuf+tail, r->adbufsize - tail)) < 0) { eof = 1; l = 0; } } else { num_to_copy = r->adbufsize - tail; num_left -= num_to_copy; if (num_to_copy > max) { num_to_copy = max; num_left = 0; } memcpy(r->adbuf+tail, buf, num_to_copy*sizeof(int16)); memcpy(buf, buf+num_to_copy, num_left*sizeof(int16)); l = num_to_copy; }#ifdef CONT_AD_RAWDUMP if ((l > 0) && rawfp) fwrite (r->adbuf+tail, sizeof(int16), l, rawfp);#endif tail += l; len += l; r->n_sample += l; } if ((tail >= r->adbufsize) && (! eof)) { tail -= r->adbufsize; if (tail < head) { if (r->adfunc != NULL) { if ((l = (*(r->adfunc))(r->ad, r->adbuf+tail, head - tail)) < 0) { eof = 1; l = 0; } } else { num_to_copy = head-tail; if (num_to_copy > num_left) num_to_copy = num_left; memcpy(r->adbuf+tail, buf, num_to_copy*sizeof(int16)); l = num_to_copy; }#ifdef CONT_AD_RAWDUMP if ((l > 0) && rawfp) fwrite (r->adbuf+tail, sizeof(int16), l, rawfp);#endif tail += l; len += l; r->n_sample += l; } } /* Compute frame power for unprocessed+new data and find speech/silence boundaries */ tailfrm = (r->headfrm + r->n_frm); /* Next free frame slot to be filled */ if (tailfrm >= CONT_AD_ADFRMSIZE) tailfrm -= CONT_AD_ADFRMSIZE; for (; len >= r->spf; len -= r->spf) { compute_frame_pow (r, tailfrm); r->n_frm++; r->tot_frm++; boundary_detect (r, tailfrm); /* find speech/sil change, if any */ if (++tailfrm >= CONT_AD_ADFRMSIZE) tailfrm = 0; /* RKM: 2004-06-23: Moved this block from outside the enclosing loop */ /* Update thresholds if time to do so */ if (r->thresh_update <= 0) { find_thresh (r); decay_hist (r); r->thresh_update = CONT_AD_THRESH_UPDATE; /* Since threshold has been updated, recompute r->n_other */ r->n_other = 0; r->n_in_a_row = 0; if (r->state == CONT_AD_STATE_SIL) { for (i = r->win_validfrm, f = r->win_startfrm; i > 0; --i) { if (r->frm_pow[f] >= r->thresh_speech) { r->n_other++; r->n_in_a_row++; } else { r->n_in_a_row = 0; } f++; if (f >= CONT_AD_ADFRMSIZE) f = 0; } } else { for (i = r->win_validfrm, f = r->win_startfrm; i > 0; --i) { if (r->frm_pow[f] <= r->thresh_sil) { r->n_other++; r->n_in_a_row++; } else { r->n_in_a_row = 0; } f++; if (f >= CONT_AD_ADFRMSIZE) f = 0; } } } } /* * At last ready to copy speech data, if any. Skip past any silence before the * first available speech segment. If no speech segment, simply consume as much of * silence as possible. */ if ((seg = r->spseg_head) == NULL) { assert (r->state == CONT_AD_STATE_SIL); /* No speech segment available; consume accumulated silence if any */ flen = r->n_frm - (r->winsize + r->leader - 1); if (flen > 0) { /* Can consume flen silence frames from current head of data */ r->siglvl = max_siglvl (r, r->headfrm, flen); r->n_frm -= flen; r->n_sample -= (flen * r->spf); r->headfrm += flen; if (r->headfrm >= CONT_AD_ADFRMSIZE) r->headfrm -= CONT_AD_ADFRMSIZE; } len = 0; /* #samples being copied */ } else { /* Copy integral #frames of speech data pointed to by seg (may be 0-length!) */ flen = max / r->spf; if (flen > seg->nfrm) flen = seg->nfrm; len = (flen * r->spf); /* #samples being copied */ r->siglvl = max_siglvl (r, seg->startfrm, flen); /* Copy data to buf. If seg wrapped around adbuf break into two operations */ if (seg->startfrm + flen > CONT_AD_ADFRMSIZE) { f = CONT_AD_ADFRMSIZE - seg->startfrm; l = (f * r->spf); memcpy (buf, r->adbuf + (seg->startfrm * r->spf), l * sizeof(int16)); buf += l; seg->startfrm = 0; /* Wrapped around */ seg->nfrm -= f; flen -= f; } if (flen > 0) { l = (flen * r->spf); memcpy (buf, r->adbuf + (seg->startfrm * r->spf), l * sizeof(int16)); seg->startfrm += flen; if (seg->startfrm >= CONT_AD_ADFRMSIZE) seg->startfrm -= CONT_AD_ADFRMSIZE; seg->nfrm -= flen; } /* Update r->headfrm to seg->startfrm; fix r->n_frm, r->n_sample accordingly */ if ((f = (seg->startfrm - r->headfrm)) < 0) f += CONT_AD_ADFRMSIZE; r->n_frm -= f; r->n_sample -= (f * r->spf); r->headfrm = seg->startfrm; assert ((r->n_frm >= 0) && (r->n_sample >= 0)); /* Free seg if empty and not recording into it */ if ((seg->nfrm == 0) && (seg->next || (r->state == CONT_AD_STATE_SIL))) { r->spseg_head = seg->next; if (! seg->next) r->spseg_tail = NULL; free (seg); } } assert (r->win_validfrm <= r->n_frm); /* Update timestamp. Total raw A/D read - those remaining to be consumed */ r->read_ts = (r->tot_frm - r->n_frm) * r->spf;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?