📄 fbs.h
字号:
/* ==================================================================== * Copyright (c) 1999-2001 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * fbs.h -- Interface exported by the decoder module * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1996 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** * * HISTORY * * $Log: fbs.h,v $ * Revision 1.1.1.1 2006/05/23 18:45:02 dhuggins * re-importation * * Revision 1.11 2005/05/24 20:55:24 rkm * Added -fsgbfs flag * * Revision 1.10 2005/01/26 17:54:51 rkm * Added -maxhmmpf absolute pruning parameter in FSG mode * * Revision 1.9 2005/01/13 15:22:11 rkm * Fixed some comments relating to s2_fsg_t structure * * Revision 1.8 2004/07/23 23:36:33 egouvea * Ravi's merge, with the latest fixes in the FSG code, and making the log files generated by FSG, LM, and allphone have the same 'look and feel', with the backtrace information presented consistently * * Revision 1.8 2004/07/20 20:48:40 rkm * Added uttproc_load_fsg() * * Revision 1.7 2004/07/20 13:40:55 rkm * Added FSG get/set start/final state functions * * Revision 1.5 2004/07/16 19:55:28 rkm * Added state information to hypothesis * * Revision 1.7 2004/07/16 00:57:10 egouvea * Added Ravi's implementation of FSG support. * * Revision 1.4 2004/06/22 15:35:46 rkm * Added partial result reporting options in batch mode * * Revision 1.3 2004/06/16 17:45:48 rkm * Added query_pscr2lat() to API * * Revision 1.2 2004/05/27 14:22:57 rkm * FSG cross-word triphones completed (but for single-phone words) * * Revision 1.1.1.1 2004/03/01 14:30:19 rkm * * * Revision 1.5 2004/02/27 21:01:25 rkm * Many bug fixes in multiple FSGs * * Revision 1.4 2004/02/27 16:15:13 rkm * Added FSG switching * * Revision 1.3 2004/02/27 15:05:21 rkm * *** empty log message *** * * Revision 1.2 2004/02/23 15:09:50 rkm * *** empty log message *** * * Revision 1.1.1.1 2003/12/03 20:05:04 rkm * Initial CVS repository * * Revision 1.6 2001/12/07 17:30:00 lenzo * Clean up and remove extra lines. * * Revision 1.5 2001/12/07 05:14:19 lenzo * License 1.2. * * Revision 1.4 2001/11/20 21:22:31 lenzo * Win32 re-compatibility fixes. * * Revision 1.3 2000/12/05 01:45:11 lenzo * Restructuring, hear rationalization, warning removal, ANSIfy * * Revision 1.2 2000/02/08 20:44:32 lenzo * Changed uttproc_allphone_cepfile() to uttproc_allphone_file. * * Revision 1.1.1.1 2000/01/28 22:09:07 lenzo * Initial import of sphinx2 * * * * 05-Jan-99 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added cdcn.h and uttproc_get_cdcn_ptr(). * * 04-Nov-98 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added conf field to search_hyp_t. * * 30-Oct-98 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added ascr, lscr fields to search_hyp_t. * * 19-Oct-98 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added uttproc_set_logfile(). * * 10-Sep-98 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added uttproc_allphone_cepfile(). * * 20-Aug-98 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added functions uttproc_agcemax_get() and set(). * * 20-Apr-98 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added uttproc_set_auto_uttid_prefix(). * * 24-Mar-98 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added additional phone_perp field to search_hyp_t for confidence measure * based on phone perplexity. * * 08-Mar-98 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added additional latden field to search_hyp_t for confidence measure * based on lattice density. * * 07-Aug-96 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added uttproc_result_seg and uttproc_partial_result_seg. * Changed search_hyp_t to support linked list and include word string. * * 17-Jun-96 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added uttproc_set_context(). * * 04-Jun-96 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Added BLOCKING option to uttproc_rawdata, uttproc_cepdata, uttproc_result. * Removed uttproc_set_uttid and added id argument to uttproc_begin_utt. * * 24-May-96 M K Ravishankar (rkm@cs) at Carnegie Mellon University * Substantially modified to be driven with externally provided data, rather * than explicitly reading an A/D source. * Added uttproc_abort_utt() and uttproc_partial_result(). * Added raw and mfc logging function. * * 01-May-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added functions uttproc_cepmean_set, uttproc_cepmean_get, * uttproc_agcmax_set, uttproc_agcmax_get. * * 07-Aug-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added uttproc_rawdata(). * * 05-Aug-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added uttproc_beginutt(), uttproc_cepdata(), and uttproc_endutt(). * * 13-Jun-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Simplified the uttproc interface by combining functions and redefining * others. * * 01-Jun-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added uttproc_set_lm() and uttproc_set_startword(). * * 01-May-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Created. */#ifndef _FBS_H_#define _FBS_H_#include "s2types.h"#include "fe.h"/* * The decoder is set up to process one finite-duration utterance at a time. The * maximum duration of an utterance is about 60sec, though other resource limits, * such as the back pointer table size, could constrain the duration further. *//* * Recognition result (hypothesis) with word segmentation information. */typedef struct search_hyp_s { char const *word; /* READ-ONLY */ int32 wid; /* For internal use of decoder */ int32 sf, ef; /* Start, end frames within utterance for this word */ int32 ascr, lscr; /* Acoustic, LM scores (not always used!) */ int32 fsg_state; /* At which this entry terminates (FSG mode only) */ float conf; /* Confidence measure (roughly prob(correct)) for this word; NOT FILLED IN BY THE RECOGNIZER at the moment!! */ struct search_hyp_s *next; /* Next word segment in the hypothesis; NULL if none */ int32 latden; /* Average lattice density in segment. Larger values imply more confusion and less certainty about the result. To use it for rejection, cutoffs must be found independently */} search_hyp_t;/***************************** INIT/CLEANUP *****************************//* * Called once at initialization with the list of arguments to initialize to initialize * the decoder. If the -ctlfn argument is given, it will process the argument file in * batch mode and exit the application. * Return value: 0 if successful, -1 otherwise. */int32 fbs_init (int32 argc, char **argv); /* Arguments for initialization *//* * Called before quitting the application to tie up loose ends in the decoder. * Return value: 0 if successful, -1 otherwise. */int32 fbs_end ( void );/******************************* Decoding *******************************//* * Called at the beginning of each utterance. uttid is an input string identifying the * utterance; utterance data (raw or mfc files, if any) logged under this name. The * recognition result in the "match" file also identified with this id. If uttid is * NULL, an automatically generated running sequence number (of the form %08d) is used * instead. * Return value: 0 if successful, else -1. */int32 uttproc_begin_utt (char const *uttid);/* * Decode the next block of input samples in the current utterance. The "block" argument * specifies whether the decoder should block until all pending data have been processed. * If 0, it is "non-blocking". That is, the decoder steps through only a few pending * frames (at least 1), and the remaining input data is queued up internally for later * processing. In particular, this function can be called with 0-length data to simply * process internally queued up frames. * * NOTE: The decoder will not actually process the input data if any of the processing * depends on the entire utterance. (For example, if CMN/AGC is based on entire current * utterance.) The data are queued up internally for processing after uttproc_end_utt is * called. Also, one cannot combine uttproc_rawdata and uttproc_cepdata within the same * utterance. * * Return value: #frames internally queued up and remaining to be decoded; -1 if any * error occurs. */int32 uttproc_rawdata (int16 *raw, /* In: Block of int16 samples */ int32 nsample, /* In: #Samples in above block; can be 0!! */ int32 block); /* In: if !0, process all data before returning *//* * Like uttproc_rawdata, but the input data are cepstrum vectors rather than raw samples. * One cannot combine uttproc_cepdata and uttproc_rawdata within the same utterance. * Return value: #frames internally queued up and remaining to be decoded; -1 if any * error occurs. */int32 uttproc_cepdata (float32 **cep, /* In: cep[i] = i-th frame of cepstrum data */ int32 nfrm, /* In: #frames of cep data; can be 0!! */ int32 block); /* In: if !0, process all data before returning *//* * For bookkeeping purposes, marking that no more data is forthcoming in the current * utterance. It should be followed by uttproc_result to obtain the final recognition * result. * Return value: 0 if successful, else -1. */int32 uttproc_end_utt ( void );/* * Obtain recognition result for utterance after uttproc_end_utt has been called. In * the blocking form, all queued up data is processed and final result returned. In the * non-blocking version, only a few pending frames (at least 1) are processed. In the * latter case, the function can be called repeatedly to allow the decoding to complete. * * Return value: #frames remaining to be processed. If non-zero (non-blocking mode) the * final result is not yet available. If 0, frm and hyp contain the final recognition * result. If there is any error, the function returns -1. */int32 uttproc_result (int32 *frm, /* Out: *frm = #frames in current utterance */ char **hyp, /* Out: *hyp = recognition string; READ-ONLY. Contents clobbered by the next uttproc_result or uttproc_partial_result call */ int32 block); /* In: If !0, process all data and return final result *//*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -