📄 decoder.h
字号:
// file: decoder.h//// this is the header file for the Decoder class//// make sure definitions are made only once//#ifndef __ISIP_DECODER#define __ISIP_DECODER// isip include files//#ifndef __ISIP_INTEGRAL#include <integral.h>#endif// forward declaration of classes//#ifndef __ISIP_HASH_TABLE#include <hash_table.h>#endif#ifndef __ISIP_INSTANCE#include <instance.h>#endif#ifndef __ISIP_LATTICE#include <lattice.h>#endif#ifndef __ISIP_LEX_TREE#include <lex_tree.h>#endif#ifndef __ISIP_LINK_LIST#include <link_list.h>#endif#ifndef __ISIP_MEMORY_MANAGER#include <memory_manager.h>#endif#ifndef __ISIP_MODEL#include <model.h>#endif#ifndef __ISIP_NGRAM#include <ngram.h>#endif#ifndef __ISIP_PHONE#include <phone.h>#endif#ifndef __ISIP_STATE#include <state.h>#endif#ifndef __ISIP_TOKEN#include <token.h>#endif#ifndef __ISIP_TRACE#include <trace.h>#endif#ifndef __ISIP_WORD#include <word.h>#endif#ifndef __ISIP_EXTRACT_FEATURE#include <extract_feature.h>#endif#ifndef __ISIP_NBEST_NODE#include <nbest_node.h>#endif#ifndef __ISIP_PATH_LIST#include <path_list.h>#endif// Decoder: the main decoder class that implements a lexical-tree// based Viterbi search//class Decoder { //--------------------------------------------------------------------------- // // protected data // //---------------------------------------------------------------------------protected: // memory manager parameters // Memory_manager* manager_d; logical_1 restructure_memory_d; // function and align modes // int_4 function_mode_d; // functional mode int_4 context_mode_d; // acoustic context mode int_4 align_mode_d; // alignment level of output int_4 input_format_d; // binary or ascii features int_4 input_feature_format_d; // feature file format logical_1 demo_mode_d; // demo mode flag int_4 segment_mode_d; // segmental level of alignments // delta and acceleration computations // int_4 delta_win_d; logical_1 delta_d; logical_1 acc_d; // number of utterances decoded // int_4 num_utt_d; // levels in the search hierarchy // int_4 num_levels_d; // timing information // int_4 frame_d; // current frame float_4 frame_dur_d; // frame duration in seconds // input feature data information // int_4 num_feat_d; // size of input feature vector float_4* features_d; // input feature vector // acoustic model information // // context independent models // int_4 num_ci_d; // number of CI models char_1** ci_models_d; // CI model strings // context dependent models // int_4 cd_size_d; // size of context int_4 num_cd_d; // number of CD models Phone** cd_models_d; // array of CD models int_4* cdmap_d; // map models to model indices // Hidden Markov models // int_4 num_models_d; // number of HMMs Model** models_d; // array of models // state transition matrices // int_4 num_trans_d; // number of matrices float_4** transitions_d; // array of transition matrices // model states // int_4 num_states_d; // number of states State** states_d; // array of states // lexicon // int_4 num_words_d; // number of words in lexicon Hash_table* lexicon_d; // hash table of words // lm related parameters // float_4 lmscale_d; // LM scaling factor float_4 wdpenalty_d; // word insertion penalty int_4 ngram_order_d; // ngram lm order Ngram* ngram_d; // ngram language model Lattice* lattice_d; // lattice for rescoring Link_list* gram_list_d; // list of grammars // flag to determine if decoder outputs compacted lattice // logical_1 compact_lattice_d; // path information // // active model instances // int_4 inst_count_d; Hash_table* inst_table_d; Link_list* inst_list_d; // hypotheses information // int_4 num_hyps_d; // number of output hypotheses int_4 num_nbest_d; // max number of output hypotheses Trace** nbest_d; // array of output hypotheses // path history information // int_4 num_hist_d; // word histories per path Link_list* history_list_d; // list of active word histories // lexical tree information // Lex_tree* proto_tree_d; // prototype lex tree Link_list* lextree_list_d; // list of active tree copies // list of traces at model level // int_4 num_active_models_d; int_4* active_models_d; Link_list** model_trlist_d; // list of traces at word level // int_4 num_active_words_d; int_4* active_words_d; Link_list** word_trlist_d; float_4* word_scores_d; // pruning-related information // // beam pruning information // float_4* max_score_d; // max path score at each level float_4* beam_d; // beam width at each level float_4* beam_thresh_d; // beam threshold at each level // instance pruning information // int_4 mapmi_limit_d; // number of allowed instances float_4 mapmi_thresh_d; // mapmi pruning threshold // nbest pruning parameters // int_4 nbest_max_paths_d; // max path score int_4 nbest_beam_d; // beam width // word-end pruning information // int_4 num_wordends_d; // number of allowed words float_4 wordend_thresh_d; // word end pruning threshold // demo information // int_4 num_steps_d; // number of frames to decode // trace generation and other statistics // int_4* num_traces_total_d; // number of active traces int_4* num_traces_gen_d; // number of generated traces int_4* num_traces_del_d; // number of deleted traces int_4* total_gen_d; // cumulative generated traces int_4* total_del_d; // cumulative deleted traces // special models // int_4 sil_model_d; // silence model index int_4 sp_model_d; // short pause (sp) model index float_4 sp_score_d; // sp model skip score // parameters for Communicator API // float_8 *buf_d; // block buffer address int_4 size_d; // block buffer size int_4 current_d; // buffer offset which point to the processing data // object for frontend // Extract_feature frontend_d; // time counting parameters // float_8 decode_time_d; // model-level segmental infromation (alignments) // Link_list* model_seglist_d; //--------------------------------------------------------------------------- // // public methods // //---------------------------------------------------------------------------public: // required methods // char_1* name_cc(); volatile void error_handler_cc(char_1* mname, char_1* msg); logical_1 debug_cc(FILE *fp, char_1* message); int_4 size_cc(); // destructors/constructors // ~Decoder(); Decoder(); Decoder(Decoder& decoder); // method to expand a system variable string // static logical_1 expand_filename_cc(char_1*& fname); // methods to read data from file // logical_1 read_transitions_cc(FILE* fp); logical_1 read_states_cc(FILE* fp); logical_1 read_hmms_cc(FILE* fp); logical_1 read_ci_models_cc(FILE* fp); logical_1 read_cd_models_cc(FILE* fp); logical_1 read_lexicon_cc(FILE* fp); logical_1 read_ngram_cc(FILE* fp); logical_1 read_gram_list_cc(FILE* fp); logical_1 read_model_segment_cc(FILE* fp); // methods to read lattice for rescoring // logical_1 read_lattice_cc(FILE* fp); logical_1 read_lattice_cc(char_1* str); // method to compute lattice WER // logical_1 lattice_wer_cc(FILE* flat, FILE* fref, FILE* fout); // method to convert lattice to an nbest list // logical_1 lattice_to_nbest_cc(FILE* flat, FILE* fout, int_4 max_paths, float_8 beam_width); logical_1 lattice_to_nbest_cc(Lattice* flat, char_1*& hypo, int_4 max_paths, float_8 beam_width); logical_1 lattice_to_nbest_cc(Lattice* flat, FILE* fout, int_4 max_paths, float_8 beam_width); // methods to set parameters // logical_1 set_restructure_memory_cc(logical_1 value) { restructure_memory_d = value; return ISIP_TRUE; } logical_1 set_function_mode_cc(int_4 value) { function_mode_d = value; return ISIP_TRUE; } logical_1 set_context_mode_cc(int_4 value) { context_mode_d = value; return ISIP_TRUE; } logical_1 set_align_mode_cc(int_4 value) { align_mode_d = value; return ISIP_TRUE; } logical_1 set_segment_mode_cc(int_4 value) { segment_mode_d = value; return ISIP_TRUE; } logical_1 set_input_format_cc(int_4 value) { input_format_d = value; return ISIP_TRUE; } logical_1 set_input_feature_format_cc(int_4 value) { input_feature_format_d = value; return ISIP_TRUE; } logical_1 set_delta_win_cc(int_4 value) { delta_win_d = value; return ISIP_TRUE; } logical_1 set_delta_cc(logical_1 value) { delta_d = value; return ISIP_TRUE; } logical_1 set_acc_cc(logical_1 value) { acc_d = value; return ISIP_TRUE; } logical_1 set_demo_mode_cc(logical_1 value) { demo_mode_d = value; return ISIP_TRUE; } logical_1 set_frame_cc(int_4 value) { frame_d = value; return ISIP_TRUE; } logical_1 set_frame_dur_cc(float_4 value) { frame_dur_d = value; return ISIP_TRUE;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -