📄 live_decode_api.c

📁 CMU大名鼎鼎的SPHINX－3大词汇量连续语音识别系统
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
{  ld_process_raw_impl(_decoder, _samples, _num_samples, FALSE);}voidld_process_ceps(live_decoder_t *_decoder, 		float32 **_cep_frames,		int32 _num_frames){  int32 num_features = 0;  int32 begin_utt = _decoder->num_frames_entered == 0;  assert(_decoder != NULL);	  if (_num_frames > 0) {    num_features = feat_s2mfc2feat_block(kbcore_fcb(_decoder->kbcore),					 _cep_frames,					 _num_frames,					 begin_utt,					 FALSE,					 _decoder->features);    _decoder->num_frames_entered += _num_frames;  }  if (num_features > 0) {    utt_decode_block(_decoder->features, 		     num_features, 		     &_decoder->num_frames_decoded,		     &_decoder->kb, 		     _decoder->hmm_log);  }}intld_retrieve_hyps(live_decoder_t *_decoder, char **_uttid, char **_hyp_str, 		 hyp_t ***_hyp_segs){  int rv = LD_SUCCESS;  assert(_decoder != NULL);  /* re-record the hypothesis if there is a frame number mismatch */  if (_decoder->num_frames_decoded != _decoder->hyp_frame_num) {    rv = ld_record_hyps(_decoder, FALSE);  }    if (_uttid != NULL) {    *_uttid = _decoder->uttid;  }  if (_hyp_str != NULL) {    *_hyp_str = _decoder->hyp_str;  }  if (_hyp_segs != NULL) {    *_hyp_segs = _decoder->hyp_segs;  }    return rv;}/***************************************************************************//***************************************************************************//***************************************************************************/intld_set_uttid(live_decoder_t *_decoder, char *_uttid){  char *local_uttid = NULL;  struct tm *times;  time_t t;  assert(_decoder != NULL);  if (_decoder->uttid != NULL) {    ckd_free(_decoder->uttid);    _decoder->uttid = NULL;  }  /* automatically-generated uttid */  if (_uttid == NULL) {    t = time(NULL);    times = localtime(&t);    if ((local_uttid = ckd_malloc(17)) == NULL) {      E_WARN("Failed to allocate space for utterance id.\n");      return LD_ERROR_OUT_OF_MEMORY;    }    sprintf(local_uttid, "*%4d%2d%2dZ%2d%2d%2d",	    times->tm_year, times->tm_mon, times->tm_mday,	    times->tm_hour, times->tm_min, times->tm_sec);  }  /* user-defined uttid */  else {    if ((local_uttid = ckd_malloc(strlen(_uttid) + 1)) == NULL) {      E_WARN("Failed to allocate space for utterance id.\n");      return LD_ERROR_OUT_OF_MEMORY;    }    strcpy(local_uttid, _uttid);  }  _decoder->uttid = local_uttid;  /* Also set the kb internal uttid. This makes the uttid in the results. */  kb_set_uttid(_decoder->uttid, &(_decoder->kb));  return LD_SUCCESS;}intld_record_hyps(live_decoder_t *_decoder, int _end_utt){  int32	id;  int32	i = 0;  glist_t hyp_list;  gnode_t *node;  hyp_t *hyp;  char *hyp_strptr = 0;  char *hyp_str = 0;  hyp_t **hyp_segs = 0;  int hyp_seglen = 0;  int hyp_strlen = 0;  int finish_wid = 0;  kb_t *kb = 0;  dict_t *dict;  int rv;  assert(_decoder != NULL);  ld_free_hyps(_decoder);  kb = &_decoder->kb;  dict = kbcore_dict(_decoder->kbcore);  id = _end_utt ?    vithist_utt_end(kb->vithist, _decoder->kbcore) :    vithist_partialutt_end(kb->vithist, _decoder->kbcore);  if (id < 0) {    E_WARN("Failed to retrieve viterbi history.\n");    return LD_ERROR_INTERNAL;  }  /** record the segment length and the overall string length */  hyp_list = vithist_backtrace(kb->vithist, id);  finish_wid = dict_finishwid(dict);  for (node = hyp_list; node != NULL; node = gnode_next(node)) {    hyp = (hyp_t *)gnode_ptr(node);    hyp_seglen++;    if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) {      hyp_strlen +=	strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) + 1;    }  }  if (hyp_strlen == 0) {    hyp_strlen = 1;  }  /** allocate array to hold the segments and/or decoded string */  hyp_str = (char *)ckd_calloc(hyp_strlen, sizeof(char));  hyp_segs = (hyp_t **)ckd_calloc(hyp_seglen + 1, sizeof(hyp_t *));  if (hyp_segs == NULL || hyp_str == NULL) {    E_WARN("Failed to allocate storage for hypothesis.\n");    rv = LD_ERROR_OUT_OF_MEMORY;    goto ld_record_hyps_cleanup;  }		  /** iterate thru to fill in the array of segments and/or decoded string */  i = 0;  hyp_strptr = hyp_str;  for (node = hyp_list; node != NULL; node = gnode_next(node), i++) {    hyp = (hyp_t *)gnode_ptr(node);    hyp_segs[i] = hyp;        if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) {      strcat(hyp_strptr, dict_wordstr(dict, dict_basewid(dict, hyp->id)));      hyp_strptr += strlen(hyp_strptr);      *hyp_strptr = ' ';      hyp_strptr += 1;    }  }  glist_free(hyp_list);    hyp_str[hyp_strlen - 1] = '\0';  hyp_segs[hyp_seglen] = 0;  _decoder->hyp_frame_num = _decoder->num_frames_decoded;  _decoder->hyp_segs = hyp_segs;  _decoder->hyp_str = hyp_str;  return LD_SUCCESS; ld_record_hyps_cleanup:  if (hyp_segs != NULL) {    ckd_free(hyp_segs);  }  if (hyp_str != NULL) {    ckd_free(hyp_segs);  }  if (hyp_list != NULL) {    for (node = hyp_list; node != NULL; node = gnode_next(node)) {      if ((hyp = (hyp_t *)gnode_ptr(node)) != NULL) {	ckd_free(hyp);      }    }  }  return rv;}voidld_free_hyps(live_decoder_t *_decoder){  hyp_t **h;  /** set the reference frame number to something invalid */  _decoder->hyp_frame_num = -1;  /** free and reset the hypothesis string */  if (_decoder->hyp_str) {    ckd_free(_decoder->hyp_str);    _decoder->hyp_str = 0;  }    /** free and reset the hypothesis word segments */  if (_decoder->hyp_segs) {    for (h = _decoder->hyp_segs; *h; h++) {      ckd_free(*h);    }    ckd_free(_decoder->hyp_segs);    _decoder->hyp_segs = 0;  }}voidld_process_raw_impl(live_decoder_t *_decoder,		    int16 *samples,		    int32 num_samples,		    int32 end_utt){  float32 dummy_frame[MAX_CEP_LEN];  float32 **frames = 0;  int32 num_frames = 0;  int32 num_features = 0;  int32 begin_utt = _decoder->num_frames_entered == 0;  int32 return_value;  int i;  assert(_decoder != NULL);  if (begin_utt) {    fe_start_utt(_decoder->fe);  }    if(_decoder->swap){    for (i = 0; i < num_samples; i++) {      SWAP_INT16(samples + i);    }  }  return_value = fe_process_utt(_decoder->fe, samples, num_samples, &frames, &num_frames);  if (end_utt) {    return_value = fe_end_utt(_decoder->fe, dummy_frame, &num_frames);    if(num_frames!=0){      /* ARCHAN: If num_frames !=0, assign this last ending frame to	 frames again.  The computation will then be correct.  Should	 clean up the finite state logic in fe_interface layer.       */      frames=(float32 **)ckd_calloc_2d(1,_decoder->fe->NUM_CEPSTRA,sizeof(float32));      memcpy(frames[0],dummy_frame,_decoder->fe->NUM_CEPSTRA*sizeof(float32));    }  }	  if (FE_ZERO_ENERGY_ERROR == return_value) {    E_WARN("Zero energy frame(s). Consider using dither\n");  }  if (num_frames > 0) {    num_features = feat_s2mfc2feat_block(kbcore_fcb(_decoder->kbcore),					 frames,					 num_frames,					 begin_utt,					 end_utt,					 _decoder->features);    _decoder->num_frames_entered += num_frames;  }  if (num_features > 0) {    utt_decode_block(_decoder->features, 		     num_features, 		     &_decoder->num_frames_decoded, 		     &_decoder->kb, 		     _decoder->hmm_log);  }	  if (frames != NULL) {    ckd_free_2d((void **)frames);  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -