📄 live_decode_api.c
字号:
/* ====================================================================* Copyright (c) 1999-2004 Carnegie Mellon University. All rights* reserved.** Redistribution and use in source and binary forms, with or without* modification, are permitted provided that the following conditions* are met:** 1. Redistributions of source code must retain the above copyright* notice, this list of conditions and the following disclaimer. ** 2. Redistributions in binary form must reproduce the above copyright* notice, this list of conditions and the following disclaimer in* the documentation and/or other materials provided with the* distribution.** This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium.** THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.** ====================================================================**//************************************************** CMU ARPA Speech Project** Copyright (c) 2000 Carnegie Mellon University.* ALL RIGHTS RESERVED.*************************************************** May 14, 2004* Created by Yitao Sun (yitao@cs.cmu.edu) based on the live.c created by* Rita Singh. This version is meant to expose features with a simpler and* more explicit API.** Jun 10, 2004* Modified by Yitao Sun. Added argument parsing.*//* OLD LOGS before renaming to live_decode_API.h----------------------------revision 1.8date: 2004/09/03 18:17:11; author: yitao; state: Exp; lines: +15 -11changed ld_process_frames to ld_process_ceps----------------------------revision 1.7date: 2004/08/25 20:44:31; author: yitao; state: Exp; lines: +65 -411. added code to record uttid in live-decode2. added more code to flesh out remote-decode. not compiling yet.----------------------------revision 1.6date: 2004/08/24 18:05:50; author: yitao; state: Exp; lines: +2 -2fixed compilation bug in function ld_utt_free_hyps().----------------------------revision 1.5date: 2004/08/23 20:41:36; author: yitao; state: Exp; lines: +7 -14basic implementation for remote-decode API. not compiling yet.----------------------------revision 1.4date: 2004/08/10 22:13:48; author: yitao; state: Exp; lines: +18 -10added some minor comments in the code. no significant change.----------------------------revision 1.3date: 2004/08/09 21:40:36; author: yitao; state: Exp; lines: +122 -931. fixed some bugs in Live-Decode API. changed kb.c, kb.h, utt.c, live_decode.c, live_decode.h.2. changed some filenames in src/programs/. now there are 2 sets of livedecode and livepretend: one that uses the old API (livedecode and livepretend), and one that uses the new API (livedecode2 and livepretend2).3. modified Makefile.am to reflect the filename changes above.----------------------------revision 1.2date: 2004/08/08 23:34:50; author: arthchan2003; state: Exp; lines: +1 -1temporary fixes of live_decode.c and live_decode.h----------------------------revision 1.1date: 2004/08/06 15:07:39; author: yitao; state: Exp;*** empty log message ***=============================================================================*/#include "live_decode_API.h"#include "live_decode_args.h"#include "utt.h"#include "bio.h"#include <time.h>/* Utility function declarations */static intld_init_impl(live_decoder_t *_decoder, int32 _internal_cmdln);static intld_set_uttid(live_decoder_t *_decoder, char *_uttid);static intld_record_hyps(live_decoder_t *_decoder, int _end_utt);static voidld_free_hyps(live_decoder_t *_decoder);static voidld_process_raw_impl(live_decoder_t *_decoder, int16 *_samples, int32 _num_samples, int32 _end_utt);static intld_init_impl(live_decoder_t *_decoder, int32 _internal_cmdln){ param_t fe_param; int rv = LD_SUCCESS; assert(_decoder != NULL); unlimit(); /* allocate and initialize front-end */ fe_param.SAMPLING_RATE = (float32)cmd_ln_int32 ("-samprate"); fe_param.FRAME_RATE = cmd_ln_int32("-frate"); fe_param.WINDOW_LENGTH = cmd_ln_float32("-wlen"); fe_param.FB_TYPE = strcmp("mel_scale", cmd_ln_str("-fbtype")) == 0 ? MEL_SCALE : LOG_LINEAR; fe_param.NUM_CEPSTRA = cmd_ln_int32("-ncep"); fe_param.NUM_FILTERS = cmd_ln_int32("-nfilt"); fe_param.FFT_SIZE = cmd_ln_int32("-nfft"); fe_param.LOWER_FILT_FREQ = cmd_ln_float32("-lowerf"); fe_param.UPPER_FILT_FREQ = cmd_ln_float32("-upperf"); fe_param.PRE_EMPHASIS_ALPHA = cmd_ln_float32("-alpha"); if ((_decoder->fe = fe_init(&fe_param)) == NULL) { E_WARN("Failed to initialize front-end.\n"); rv = LD_ERROR_OUT_OF_MEMORY; goto ld_init_impl_cleanup; } /* capture decoder parameters */ kb_init(&_decoder->kb); _decoder->hmm_log = cmd_ln_int32("-hmmdump") ? stderr : NULL; /* initialize decoder variables */ _decoder->kbcore = _decoder->kb.kbcore; _decoder->hyp_frame_num = -1; _decoder->uttid = NULL; _decoder->ld_state = LD_STATE_IDLE; _decoder->hyp_str = NULL; _decoder->hyp_segs = NULL; _decoder->swap= (cmd_ln_int32("-machine_endian") != cmd_ln_int32("-input_endian")); _decoder->phypdump= (cmd_ln_int32("-phypdump")); _decoder->rawext= (cmd_ln_str("-rawext")); if(_decoder->phypdump) E_INFO("Partial hypothesis WILL be dumped\n"); else E_INFO("Partial hypothesis will NOT be dumped\n"); if(_decoder->swap) E_INFO("Input data WILL be byte swapped\n"); else E_INFO("Input data will NOT be byte swapped\n"); _decoder->internal_cmdln = _internal_cmdln; _decoder->features = feat_array_alloc(kbcore_fcb(_decoder->kbcore), LIVEBUFBLOCKSIZE); if (_decoder->features == NULL) { E_WARN("Failed to allocate internal feature buffer.\n"); rv = LD_ERROR_OUT_OF_MEMORY; goto ld_init_impl_cleanup; } return LD_SUCCESS; ld_init_impl_cleanup: if (_decoder->fe != NULL) { fe_close(_decoder->fe); } if (_decoder->features != NULL) { /* consult the implementation of feat_array_alloc() for how to free our * internal feature vector buffer */ ckd_free((void *)**_decoder->features); ckd_free_2d((void **)_decoder->features); } if (_internal_cmdln == TRUE) { cmd_ln_free(); } _decoder->ld_state = LD_STATE_FINISHED; return rv;}intld_init_with_args(live_decoder_t *_decoder, int _argc, char **_argv){ assert(_decoder != NULL); if (cmd_ln_parse(arg_def, _argc, _argv) != 0) { E_WARN("Failed to parse arguments.\n"); return LD_ERROR_INTERNAL; } return ld_init_impl(_decoder, TRUE);}intld_init(live_decoder_t *_decoder){ return ld_init_impl(_decoder, FALSE);}voidld_finish(live_decoder_t *_decoder){ assert(_decoder != NULL); if (_decoder->fe != NULL) { fe_close(_decoder->fe); } if (_decoder->features != NULL) { /* consult the implementation of feat_array_alloc() for how to free our * internal feature vector buffer */ ckd_free((void *)**_decoder->features); ckd_free_2d((void **)_decoder->features); } if (_decoder->internal_cmdln == TRUE) { cmd_ln_free(); } kb_free(&_decoder->kb); ld_free_hyps(_decoder); if (_decoder->uttid != NULL) { ckd_free(_decoder->uttid); _decoder->uttid = NULL; } _decoder->ld_state = LD_STATE_FINISHED;}intld_begin_utt(live_decoder_t *_decoder, char *_uttid){ assert(_decoder != NULL); if (_decoder->ld_state != LD_STATE_IDLE) { E_WARN("Failed to begin a new utterance because decoder is not idle.\n"); return LD_ERROR_INVALID_STATE; } ld_free_hyps(_decoder); utt_begin(&_decoder->kb); _decoder->num_frames_decoded = 0; _decoder->num_frames_entered = 0; _decoder->kb.nfr = 0; _decoder->kb.utt_hmm_eval = 0; _decoder->kb.utt_sen_eval = 0; _decoder->kb.utt_gau_eval = 0; _decoder->kb.utt_cisen_eval = 0; _decoder->kb.utt_cigau_eval = 0; _decoder->ld_state = LD_STATE_DECODING; return ld_set_uttid(_decoder, _uttid);}voidld_end_utt(live_decoder_t *_decoder){ assert(_decoder != NULL); ld_process_raw_impl(_decoder, NULL, 0, TRUE); _decoder->kb.tot_fr += _decoder->kb.nfr; ld_record_hyps(_decoder, TRUE); utt_end(&_decoder->kb); _decoder->ld_state = LD_STATE_IDLE;}voidld_process_raw(live_decoder_t *_decoder, int16 *_samples, int32 _num_samples)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -