📄 decoder.h

📁 这是处理语音信号的程序
💻 H
📖 第 1 页 / 共 2 页
字号:
12 下一页
// file: decoder.h//// this is the header file for the Decoder class//// make sure definitions are made only once//#ifndef __ISIP_DECODER#define __ISIP_DECODER// isip include files//#ifndef __ISIP_INTEGRAL#include <integral.h>#endif// forward declaration of classes//#ifndef __ISIP_HASH_TABLE#include <hash_table.h>#endif#ifndef __ISIP_INSTANCE#include <instance.h>#endif#ifndef __ISIP_LATTICE#include <lattice.h>#endif#ifndef __ISIP_LEX_TREE#include <lex_tree.h>#endif#ifndef __ISIP_LINK_LIST#include <link_list.h>#endif#ifndef __ISIP_MEMORY_MANAGER#include <memory_manager.h>#endif#ifndef __ISIP_MODEL#include <model.h>#endif#ifndef __ISIP_NGRAM#include <ngram.h>#endif#ifndef __ISIP_PHONE#include <phone.h>#endif#ifndef __ISIP_STATE#include <state.h>#endif#ifndef __ISIP_TOKEN#include <token.h>#endif#ifndef __ISIP_TRACE#include <trace.h>#endif#ifndef __ISIP_WORD#include <word.h>#endif#ifndef __ISIP_EXTRACT_FEATURE#include <extract_feature.h>#endif#ifndef __ISIP_NBEST_NODE#include <nbest_node.h>#endif#ifndef __ISIP_PATH_LIST#include <path_list.h>#endif// Decoder: the main decoder class that implements a lexical-tree// based Viterbi search//class Decoder {  //---------------------------------------------------------------------------  //  // protected data  //  //---------------------------------------------------------------------------protected:    // memory manager parameters  //  Memory_manager* manager_d;  logical_1 restructure_memory_d;    // function and align modes  //  int_4 function_mode_d;                  // functional mode  int_4 context_mode_d;                   // acoustic context mode  int_4 align_mode_d;                     // alignment level of output  int_4 input_format_d;                   // binary or ascii features  int_4 input_feature_format_d;           // feature file format  logical_1 demo_mode_d;                  // demo mode flag  int_4 segment_mode_d;                   // segmental level of alignments  // delta and acceleration computations  //  int_4 delta_win_d;  logical_1 delta_d;  logical_1 acc_d;  // number of utterances decoded  //  int_4 num_utt_d;    // levels in the search hierarchy  //  int_4 num_levels_d;  // timing information  //  int_4 frame_d;                           // current frame  float_4 frame_dur_d;                     // frame duration in seconds  // input feature data information  //  int_4 num_feat_d;                        // size of input feature vector  float_4* features_d;                     // input feature vector    // acoustic model information  //  // context independent models  //  int_4 num_ci_d;                          // number of CI models  char_1** ci_models_d;                    // CI model strings  // context dependent models  //  int_4 cd_size_d;                         // size of context  int_4 num_cd_d;                          // number of CD models  Phone** cd_models_d;                     // array of CD models  int_4* cdmap_d;                          // map models to model indices    // Hidden Markov models  //  int_4 num_models_d;                      // number of HMMs  Model** models_d;                        // array of models    // state transition matrices  //  int_4 num_trans_d;                       // number of matrices  float_4** transitions_d;                 // array of transition matrices  // model states  //  int_4 num_states_d;                      // number of states  State** states_d;                        // array of states  // lexicon  //  int_4 num_words_d;                       // number of words in lexicon  Hash_table* lexicon_d;                   // hash table of words    // lm related parameters  //  float_4 lmscale_d;                       // LM scaling factor  float_4 wdpenalty_d;                     // word insertion penalty  int_4 ngram_order_d;                     // ngram lm order  Ngram* ngram_d;                          // ngram language model  Lattice* lattice_d;                      // lattice for rescoring  Link_list* gram_list_d;                  // list of grammars     // flag to determine if decoder outputs compacted lattice  //  logical_1 compact_lattice_d;    // path information  //  // active model instances  //  int_4 inst_count_d;  Hash_table* inst_table_d;  Link_list* inst_list_d;    // hypotheses information  //  int_4 num_hyps_d;                        // number of output hypotheses  int_4 num_nbest_d;                       // max number of output hypotheses  Trace** nbest_d;                         // array of output hypotheses  // path history information  //  int_4 num_hist_d;                        // word histories per path  Link_list* history_list_d;               // list of active word histories  // lexical tree information  //  Lex_tree* proto_tree_d;                  // prototype lex tree  Link_list* lextree_list_d;               // list of active tree copies  // list of traces at model level  //  int_4 num_active_models_d;  int_4* active_models_d;  Link_list** model_trlist_d;  // list of traces at word level  //  int_4 num_active_words_d;  int_4* active_words_d;  Link_list** word_trlist_d;  float_4* word_scores_d;    // pruning-related information  //  // beam pruning information  //  float_4* max_score_d;                    // max path score at each level  float_4* beam_d;                         // beam width at each level  float_4* beam_thresh_d;                  // beam threshold at each level  // instance pruning information  //  int_4 mapmi_limit_d;                     // number of allowed instances  float_4 mapmi_thresh_d;                  // mapmi pruning threshold  // nbest pruning parameters  //  int_4 nbest_max_paths_d;                 // max path score  int_4 nbest_beam_d;                      // beam width    // word-end pruning information  //  int_4 num_wordends_d;                    // number of allowed words  float_4 wordend_thresh_d;                // word end pruning threshold  // demo information  //  int_4 num_steps_d;                       // number of frames to decode  // trace generation and other statistics   //  int_4* num_traces_total_d;               // number of active traces  int_4* num_traces_gen_d;                 // number of generated traces  int_4* num_traces_del_d;                 // number of deleted traces  int_4* total_gen_d;                      // cumulative generated traces  int_4* total_del_d;                      // cumulative deleted traces  // special models  //  int_4 sil_model_d;                       // silence model index  int_4 sp_model_d;                        // short pause (sp) model index  float_4 sp_score_d;                      // sp model skip score  // parameters for Communicator API  //  float_8 *buf_d;  // block buffer address  int_4 size_d;      // block buffer size  int_4 current_d;   // buffer offset which point to the processing data    // object for frontend  //  Extract_feature frontend_d;  // time counting parameters  //  float_8 decode_time_d;  // model-level segmental infromation (alignments)  //  Link_list* model_seglist_d;    //---------------------------------------------------------------------------  //  // public methods  //  //---------------------------------------------------------------------------public:  // required methods  //  char_1* name_cc();  volatile void error_handler_cc(char_1* mname, char_1* msg);  logical_1 debug_cc(FILE *fp, char_1* message);  int_4 size_cc();    // destructors/constructors  //  ~Decoder();  Decoder();  Decoder(Decoder& decoder);  // method to expand a system variable string  //  static logical_1 expand_filename_cc(char_1*& fname);    // methods to read data from file  //  logical_1 read_transitions_cc(FILE* fp);  logical_1 read_states_cc(FILE* fp);  logical_1 read_hmms_cc(FILE* fp);  logical_1 read_ci_models_cc(FILE* fp);  logical_1 read_cd_models_cc(FILE* fp);  logical_1 read_lexicon_cc(FILE* fp);  logical_1 read_ngram_cc(FILE* fp);  logical_1 read_gram_list_cc(FILE* fp);  logical_1 read_model_segment_cc(FILE* fp);    // methods to read lattice for rescoring  //  logical_1 read_lattice_cc(FILE* fp);  logical_1 read_lattice_cc(char_1* str);  // method to compute lattice WER  //  logical_1 lattice_wer_cc(FILE* flat, FILE* fref, FILE* fout);    // method to convert lattice to an nbest list  //  logical_1 lattice_to_nbest_cc(FILE* flat, FILE* fout, int_4 max_paths,                                float_8 beam_width);  logical_1 lattice_to_nbest_cc(Lattice* flat, char_1*& hypo, int_4 max_paths,                                float_8 beam_width);  logical_1 lattice_to_nbest_cc(Lattice* flat, FILE* fout, int_4 max_paths,                                float_8 beam_width);  // methods to set parameters  //  logical_1 set_restructure_memory_cc(logical_1 value) {    restructure_memory_d = value;    return ISIP_TRUE;  }    logical_1 set_function_mode_cc(int_4 value) {    function_mode_d = value;    return ISIP_TRUE;  }    logical_1 set_context_mode_cc(int_4 value) {    context_mode_d = value;    return ISIP_TRUE;  }  logical_1 set_align_mode_cc(int_4 value) {    align_mode_d = value;    return ISIP_TRUE;  }  logical_1 set_segment_mode_cc(int_4 value) {    segment_mode_d = value;    return ISIP_TRUE;  }  logical_1 set_input_format_cc(int_4 value) {    input_format_d = value;    return ISIP_TRUE;  }  logical_1 set_input_feature_format_cc(int_4 value) {    input_feature_format_d = value;    return ISIP_TRUE;  }  logical_1 set_delta_win_cc(int_4 value) {    delta_win_d = value;    return ISIP_TRUE;  }  logical_1 set_delta_cc(logical_1 value) {    delta_d = value;    return ISIP_TRUE;  }  logical_1 set_acc_cc(logical_1 value) {    acc_d = value;    return ISIP_TRUE;  }  logical_1 set_demo_mode_cc(logical_1 value) {    demo_mode_d = value;    return ISIP_TRUE;  }  logical_1 set_frame_cc(int_4 value) {    frame_d = value;    return ISIP_TRUE;  }  logical_1 set_frame_dur_cc(float_4 value) {    frame_dur_d = value;    return ISIP_TRUE;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -