cont_ad_base.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 1,062 行 · 第 1/3 页
C
1,062 行
if (len == 0) return (eof ? -1 : 0); else return len;}/* * Calibrate input channel for silence threshold. */int32 cont_ad_calib (cont_ad_t *r){ int32 i, f, s, k, len, tailfrm; /* clear histogram */ for (i = 0; i < CONT_AD_POWHISTSIZE; i++) r->pow_hist[i] = 0; tailfrm = r->headfrm + r->n_frm; if (tailfrm >= CONT_AD_ADFRMSIZE) tailfrm -= CONT_AD_ADFRMSIZE; s = (tailfrm * r->spf); for (f = 0; f < (CONT_AD_POWHISTSIZE<<1); f++) { len = r->spf; while (len > 0) { /*Trouble */ if ((k = (*(r->adfunc))(r->ad, r->adbuf+s, len)) < 0) return -1; len -= k; s += k; } s -= r->spf; compute_frame_pow (r, tailfrm); } r->thresh_update = CONT_AD_THRESH_UPDATE; return (find_thresh (r));}int32 cont_ad_calib_loop (cont_ad_t *r, int16 *buf, int32 max){ int32 i, s, len, tailfrm; static int32 finished = 1; static int32 f = 0; if (finished) { finished = 0; f = 0; /* clear histogram */ for (i = 0; i < CONT_AD_POWHISTSIZE; i++) r->pow_hist[i] = 0; } tailfrm = r->headfrm + r->n_frm; if (tailfrm >= CONT_AD_ADFRMSIZE) tailfrm -= CONT_AD_ADFRMSIZE; s = (tailfrm * r->spf); len = r->spf; for (; f < (CONT_AD_POWHISTSIZE<<1); f++) { if (max < len) return 1; memcpy (r->adbuf+s, buf, len*sizeof(int16)); max -= len; memcpy (buf, buf+len, max*sizeof(int16)); compute_frame_pow (r, tailfrm); } finished = 1; return (find_thresh (r));}/* PWP 1/14/98 -- modified for compatibility with old code */int32 cont_ad_set_thresh (cont_ad_t *r, int32 sil, int32 speech){ if ((sil < 0) || (speech < 0)) { fprintf(stderr, "cont_ad_set_thresh: invalid threshold arguments: %d, %d\n", sil, speech); return -1; } r->delta_sil = (3 * sil) / 2; r->delta_speech = (3 * speech) / 2; return 0;}/* * PWP 1/14/98 -- set the changable params. * * delta_sil, delta_speech, min_noise, and max_noise are in dB, * winsize, speech_onset, sil_onset, leader and trailer are in frames of * 16 ms length (256 samples @ 16kHz sampling). */int32 cont_ad_set_params (cont_ad_t *r, int32 delta_sil, int32 delta_speech, int32 min_noise, int32 max_noise, int32 winsize, int32 speech_onset, int32 sil_onset, int32 leader, int32 trailer, float32 adapt_rate){ if ((delta_sil < 0) || (delta_speech < 0) || (min_noise < 0) || (max_noise < 0)) { E_ERROR("threshold arguments: " "%d, %d, %d, %d must all be >=0\n", delta_sil, delta_speech, min_noise, max_noise); return -1; } if ((speech_onset > winsize) || (speech_onset <= 0) || (winsize <= 0)) { E_ERROR("speech_onset, %d, must be <= winsize, %d, and both >0\n", speech_onset, winsize); return -1; } if ((sil_onset > winsize) || (sil_onset <= 0) || (winsize <= 0)) { E_ERROR("sil_onset, %d, must be <= winsize, %d, and both >0\n", sil_onset, winsize); return -1; } if (((leader + trailer) > winsize) || (leader <= 0) || (trailer <= 0)) { E_ERROR("leader, %d, plus trailer, %d, must be <= winsize, %d, and both >0\n", leader, trailer, winsize); return -1; } if ((adapt_rate < 0.0) || (adapt_rate > 1.0)) { E_ERROR("adapt_rate, %e; must be in range 0..1\n", adapt_rate); return -1; } r->delta_sil = delta_sil; r->delta_speech = delta_speech; r->min_noise = min_noise; r->max_noise = max_noise; r->winsize = winsize; r->speech_onset = speech_onset; r->sil_onset = sil_onset; r->leader = leader; r->trailer = trailer; r->adapt_rate = adapt_rate; if (r->win_validfrm >= r->winsize) r->win_validfrm = r->winsize - 1; return 0;}/* * PWP 1/14/98 -- get the changable params. * * delta_sil, delta_speech, min_noise, and max_noise are in dB, * winsize, speech_onset, sil_onset, leader and trailer are in frames of * 16 ms length (256 samples @ 16kHz sampling). */int32 cont_ad_get_params (cont_ad_t *r, int32 *delta_sil, int32 *delta_speech, int32 *min_noise, int32 *max_noise, int32 *winsize, int32 *speech_onset, int32 *sil_onset, int32 *leader, int32 *trailer, float32 *adapt_rate){ if (!delta_sil || !delta_speech || !min_noise || !max_noise || !winsize || !speech_onset || !sil_onset || !leader || !trailer || !adapt_rate) { fprintf(stderr, "cont_ad_get_params: some param slots are NULL\n"); return (-1); } *delta_sil = r->delta_sil; *delta_speech = r->delta_speech; *min_noise = r->min_noise; *max_noise = r->max_noise; *winsize = r->winsize; *speech_onset = r->speech_onset; *sil_onset = r->sil_onset; *leader = r->leader; *trailer = r->trailer; *adapt_rate = r->adapt_rate; return 0;}/* * Reset, discarded any accumulated speech. */int32 cont_ad_reset (cont_ad_t *r){ spseg_t *seg; while (r->spseg_head) { seg = r->spseg_head; r->spseg_head = seg->next; free (seg); } r->spseg_tail = NULL; r->headfrm = 0; r->n_frm = 0; r->n_sample = 0; r->win_startfrm = 0; r->win_validfrm = 0; r->n_other = 0; r->n_in_a_row = 0; r->state = CONT_AD_STATE_SIL; return 0;}int32 cont_ad_close (cont_ad_t *cont){ free (cont->adbuf); free (cont->pow_hist); free (cont->frm_pow); free (cont); return 0;}int32 cont_ad_detach (cont_ad_t *c){ c->ad = NULL; c->adfunc = NULL; return 0;}int32 cont_ad_attach (cont_ad_t *c, ad_rec_t *a, int32 (*func)(ad_rec_t *, int16 *, int32)){ c->ad = a; c->adfunc = func; return 0;}int32 cont_set_thresh(cont_ad_t *r, int32 silence, int32 speech) { int i, f; r->thresh_speech = speech; r->thresh_sil = silence; /* Since threshold has been updated, recompute r->n_other */ r->n_other = 0; r->n_in_a_row = 0; if (r->state == CONT_AD_STATE_SIL) { for (i = r->win_validfrm, f = r->win_startfrm; i > 0; --i) { if (r->frm_pow[f] >= r->thresh_speech) { r->n_other++; r->n_in_a_row++; } else { r->n_in_a_row = 0; } f++; if (f >= CONT_AD_ADFRMSIZE) f = 0; } } else { for (i = r->win_validfrm, f = r->win_startfrm; i > 0; --i) { if (r->frm_pow[f] <= r->thresh_sil) { r->n_other++; r->n_in_a_row++; } else { r->n_in_a_row = 0; } f++; if (f >= CONT_AD_ADFRMSIZE) f = 0; } } return 0;}/* * One-time initialization. */cont_ad_t *cont_ad_init (ad_rec_t *a, int32 (*func)(ad_rec_t *, int16 *, int32)){ cont_ad_t *r; if ((r = malloc (sizeof(*r))) == NULL) { perror("allocation of cont_ad_t failed"); return NULL; } r->ad = a; r->adfunc = func; if (a != NULL) r->sps = a->sps; else r->sps = CONT_AD_SPS; /* Set samples/frame such that when sps=16000, spf=256 */ r->spf = (r->sps * 256) / CONT_AD_SPS; r->adbufsize = CONT_AD_ADFRMSIZE * r->spf; if ((r->adbuf = malloc (r->adbufsize * sizeof(*r->adbuf))) == NULL) { perror("allocation of audio buffer failed"); free (r); return NULL; } if ((r->pow_hist = calloc (CONT_AD_POWHISTSIZE, sizeof(*r->pow_hist))) == NULL) { perror("allocation of power history buffer failed"); free (r->adbuf); free (r); return NULL; } if ((r->frm_pow = calloc (CONT_AD_ADFRMSIZE, sizeof(*r->frm_pow))) == NULL) { perror("allocation of frame power buffer failed"); free (r->pow_hist); free (r->adbuf); free (r); return NULL; } r->read_ts = 0; r->prev_sample = 0; r->tot_frm = 0; r->noise_level = CONT_AD_DEFAULT_NOISE; r->auto_thresh = 1; r->delta_sil = CONT_AD_DELTA_SIL; r->delta_speech = CONT_AD_DELTA_SPEECH; r->min_noise = CONT_AD_MIN_NOISE; r->max_noise = CONT_AD_MAX_NOISE; r->winsize = CONT_AD_WINSIZE; r->speech_onset = CONT_AD_SPEECH_ONSET; r->sil_onset = CONT_AD_SIL_ONSET; r->leader = CONT_AD_LEADER; r->trailer = CONT_AD_TRAILER; r->thresh_sil = r->noise_level + r->delta_sil; r->thresh_speech = r->noise_level + r->delta_speech; r->thresh_update = CONT_AD_THRESH_UPDATE; r->adapt_rate = (float32)CONT_AD_ADAPT_RATE; r->state = CONT_AD_STATE_SIL; r->spseg_head = NULL; r->spseg_tail = NULL; cont_ad_reset (r); #ifdef CONT_AD_RAWDUMP rawfp = fopen ("ad.raw", "wb");#endif return r;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?