📄 hiddenmarkovmodel.h
字号:
// file: $isip/class/pr/HiddenMarkovModel/HiddenMarkovModel.h// version: $Id: HiddenMarkovModel.h,v 1.70 2003/04/16 13:23:33 alphonso Exp $//// make sure definitions are only made once//#ifndef ISIP_HIDDEN_MARKOV_MODEL#define ISIP_HIDDEN_MARKOV_MODEL#ifndef ISIP_DOUBLE#include <Double.h>#endif#ifndef ISIP_ULONG#include <Ulong.h>#endif#ifndef ISIP_VECTOR_BYTE#include <VectorByte.h>#endif#ifndef ISIP_MATRIX_FLOAT#include <MatrixFloat.h>#endif#ifndef ISIP_FRONT_END#include <FrontEnd.h>#endif#ifndef ISIP_FRONT_END#include <FrontEnd.h>#endif#ifndef ISIP_TRAIN_NODE#include <TrainNode.h>#endif#ifndef ISIP_WRAPPER#include <Wrapper.h>#endif#ifndef ISIP_HASH_KEY#include <HashKey.h>#endif#ifndef ISIP_LANGUAGE_MODEL#include <LanguageModel.h>#endif#ifndef ISIP_HIERARCHICAL_SEARCH#include <HierarchicalSearch.h>#endif#ifndef ISIP_STACK_SEARCH#include <StackSearch.h>#endif#ifndef ISIP_TRANSCRIPTION_DATABASE#include <TranscriptionDatabase.h>#endif#ifndef ISIP_AUDIO_DATABASE#include <AudioDatabase.h>#endif#ifndef ISIP_ANNOTATION_GRAPH#include <AnnotationGraph.h>#endif#ifndef ISIP_ANNOTATION#include <Annotation.h>#endif#ifndef ISIP_SDB#include <Sdb.h>#endif#ifndef ISIP_PHONETIC_DECISION_TREE#include <PhoneticDecisionTree.h>#endif#ifndef ISIP_ANNOTATION_GRAPH#include <AnnotationGraph.h>#endif// HiddenMarkovModel: an intermediate class providing base// for Decode and Train algorithm using// Viterbi or Baum-Welch implementation//class HiddenMarkovModel { //-------------------------------------------------------------------------- // // public constants // //--------------------------------------------------------------------------public: // define the class name // static const String CLASS_NAME; //---------------------------------------- // // other important constants // //---------------------------------------- // define the algorithm choices // enum ALGORITHM { DECODE = 0, TRAIN, MODEL_CREATION, INITIALIZE, MIXTURE_SPLITTING, CONTEXT_GENERATION, TRAIN_PARAMETER_TYING, TEST_PARAMETER_TYING, FORCED_ALIGNMENT, ADAPT_DECODE, DEF_ALGORITHM = DECODE }; // define the implementation choices // enum IMPLEMENTATION { VITERBI = 0, STACK, BAUM_WELCH, GLOBAL, VARIANCE_SPLITTING, SYMBOL_GENERATION, ML, MODEL_GENERATION, DEF_IMPLEMENTATION = VITERBI }; // define the adaptation supervision choices // enum ADAPT_SUPERVISION_MODE { SUPERVISED = 0, UNSUPERVISED, DEF_ADAPT_SUPERVISION_MODE = UNSUPERVISED }; // define the adaptation sequence choices // enum ADAPT_SEQUENCE_MODE { INCREMENTAL = 0, BATCH, DEF_ADAPT_SEQUENCE_MODE = INCREMENTAL }; // define the context choices // enum CONTEXT_MODE { SYMBOL_ONLY = 0, SYMBOL_INTERNAL, CROSS_SYMBOL, DEF_CONTEXT_MODE = SYMBOL_ONLY }; // define the cross-word choices // enum FUNCTION_MODE { NONE = 0, GRAMMAR_DECODING, NETWORK_DECODING, NETWORK_RESCORING, NETWORK_GENERATION, NETWORK_VERIFICATION, ACCUMULATE, UPDATE, DEF_FUNCTION_MODE = NONE }; // define output modes // enum OUTPUT_MODE { FILE = 0, TRANSFORM, LIST, DATABASE, DEF_OUTPUT_MODE = FILE }; // define update modes // enum UPDATE_MODE { TRANSITIONS = 0, OBSERVATIONS, ALL, DEF_UPDATE_MODE = ALL }; // define output formats // enum OUTPUT_FORMAT { NIST_SCORE = 0, NATIVE, JSGF, DEF_OUTPUT_FORMAT = NATIVE }; // define output types // enum OUTPUT_TYPE { TEXT = 0, BINARY, DEF_OUTPUT_TYPE = BINARY }; // define internal on/off states // static const byte OFF = 0; static const byte ON = 1; // define internal default values // static const String DEF_TRANSCRIPTION_LEVEL; static const String DEF_FORCE_ALIGNMENT_LEVEL; // define internal constants // static const float DEF_WEIGTH_ONE = 0; static const float DEF_WEIGTH_HALF = -0.693147; static const float DEF_BETA_PRUNING_THRESHOLD = 3.40282347e+38F; static const float DEF_FLT_MIN = 1.17549435e-38F; static const float DEF_FLT_MAX = 3.40282347e+38F; static const float DEF_DBL_MIN = 2.2250738585072014e-308; static const float DEF_DBL_MAX = 1.7976931348623157e+308; // define the static NameMap objects // static const NameMap IMPL_MAP; static const NameMap ALGO_MAP; static const NameMap ADAPT_SUP_MODE_MAP; static const NameMap ADAPT_SEQ_MODE_MAP; static const NameMap CONTEXT_MODE_MAP; static const NameMap FUNCTION_MODE_MAP; static const NameMap OUTPUT_MODE_MAP; static const NameMap OUTPUT_FORMAT_MAP; static const NameMap OUTPUT_TYPE_MAP; static const NameMap UPDATE_MODE_MAP; //---------------------------------------- // // i/o related constants // //---------------------------------------- static const String DEF_PARAM; static const String PARAM_UPDATE_LEVELS; static const String PARAM_NUM_LEVELS; static const String PARAM_NUM_MIXTURES; static const String PARAM_NUM_ITERATIONS; static const String PARAM_VAR_FLOOR; static const String PARAM_VAR_FLOOR_FILE; static const String PARAM_BETA_THRESHOLD; static const String PARAM_TRANSCRIPTION_LEVEL; static const String PARAM_FORCE_ALIGNMENT_LEVEL; static const String PARAM_TRANS_DB_FILE; static const String PARAM_AUDIO_DB_FILE; static const String PARAM_MIN_MPD; static const String PARAM_MIN_OCCUPANCY; static const String PARAM_MIN_MODEL_COUNT; static const String PARAM_ALGORITHM; static const String PARAM_IMPLEMENTATION; static const String PARAM_ADAPT_SUPERVISION_MODE; static const String PARAM_ADAPT_SEQUENCE_MODE; static const String PARAM_CONFIGURATION_FILE; static const String PARAM_FRONT_END_FILE; static const String PARAM_LANGUAGE_MODEL_FILE; static const String PARAM_ACOUSTIC_MODEL_FILE; static const String PARAM_UPDATE_LANGUAGE_MODEL_FILE; static const String PARAM_UPDATE_ACOUSTIC_MODEL_FILE; static const String PARAM_ACCUMULATOR_FILE; static const String PARAM_ACCUMULATOR_LIST; static const String PARAM_OUTPUT_MODE; static const String PARAM_TRANSCRIPTION_BOUNDS; static const String PARAM_OUTPUT_FORMAT; static const String PARAM_OUTPUT_TYPE; static const String PARAM_OUTPUT_LEVELS; static const String PARAM_UPDATE_MODE; static const String PARAM_FUNCTION_MODE; static const String PARAM_CONTEXT_MODE; static const String PARAM_CONTEXT_LEVEL; static const String PARAM_CONTEXT_ORDER; static const String PARAM_OUTPUT_FILE; static const String PARAM_OUTPUT_LIST; static const String PARAM_CONTEXT_LIST; static const String PARAM_QUES_ANS_FILE; static const String PARAM_PHONETIC_DT_FILE; static const String PARAM_PHONETIC_DT_SPLIT_THRESHOLD; static const String PARAM_PHONETIC_DT_MERGE_THRESHOLD; static const String PARAM_PHONETIC_DT_NUM_OCC_THRESHOLD; static const String OPTION_TEXT; static const String OPTION_BINARY; static const String OPTION_OUT_MODE_FILE; static const String OPTION_OUT_MODE_TRANSFORM; static const String OPTION_OUT_MODE_LIST; static const String OPTION_OUT_FORM_NIST_SCORE; static const String OPTION_OUT_FORM_NATIVE; static const String OPTION_OUT_FORM_JSGF; static const String OPTION_TRANSCRIPTION_LEVEL; static const String OPTION_UPDATE_TRANSITIONS; static const String OPTION_UPDATE_OBSERVATIONS; static const String OPTION_UPDATE_ALL; static const String OPTION_FUNCTION_NONE; static const String OPTION_FUNCTION_GRAMMAR_DECODING; static const String OPTION_FUNCTION_NETWORK_DECODING; static const String OPTION_FUNCTION_NETWORK_RESCORING; static const String OPTION_FUNCTION_NETWORK_GENERATION; static const String OPTION_FUNCTION_NETWORK_VERIFICATION; static const String OPTION_FUNCTION_ACCUMULATE; static const String OPTION_FUNCTION_UPDATE; //---------------------------------------- // // default values and arguments // //---------------------------------------- // default values // static const boolean DEF_VERIFY = false; static const boolean DEF_STREAM = false; static const boolean DEF_TRANSCRIPTION_BOUNDS = false; static const long DEF_NUM_LEVELS = 3; static const long DEF_NUM_MIXTURES = 1; static const long DEF_NUM_ITERATIONS = 1; static const long DEF_ALIGNMENT_LEVEL = -1; static const long DEF_INITIAL_LEVEL = 0; static const long DEF_CONTEXT_LEVEL = 0; static const long DEF_CONTEXT_ORDER = 3; static const long DEF_CAPACITY = 12000; static const float DEF_MIN_MPD = 1000; static const float DEF_MIN_OCCUPANCY = 1.0e-10; static const long DEF_MIN_MODEL_COUNT = 1; static const float DEF_VAR_FLOOR = 2e-4; //---------------------------------------- // // error codes // //---------------------------------------- static const long ERR = (long)100000; static const long ERR_ADAPT_NO_GAUSSIAN = (long)100050; //--------------------------------------------------------------------------- // // protected data // //---------------------------------------------------------------------------protected: // algorithm name // ALGORITHM algorithm_d; // implementation name // IMPLEMENTATION implementation_d; // adaptation supervision mode // ADAPT_SUPERVISION_MODE adapt_supervision_mode_d; // adaptation sequence mode // ADAPT_SEQUENCE_MODE adapt_sequence_mode_d; // context mode // CONTEXT_MODE context_mode_d; // cross-word mode // FUNCTION_MODE function_mode_d; // output mode // OUTPUT_MODE output_mode_d; // update mode // UPDATE_MODE update_mode_d; // output format // OUTPUT_FORMAT output_format_d; // output type // OUTPUT_TYPE output_type_d; // update levels // String update_levels_str_d; // vector that contains the indices of re-estimation update levels // VectorByte update_levels_d; // output levels // String output_levels_str_d; // vector that contains the indices of hypothesis output levels // VectorByte output_levels_d; // parameter file // Filename param_file_d; // language model file // Filename lm_model_file_d; // statistical model file // Filename ac_model_file_d; // configuration file // Filename cnfg_file_d; // fornt-end file // Filename fend_file_d; // updated model file // Filename update_lm_model_file_d; // updated statistics file // Filename update_ac_model_file_d; // transcription file // Filename transcription_db_file_d; // audio database file // Filename audio_db_file_d; // output file // Filename output_file_d; // accumulator file // Filename accum_file_d; // accumulator list // Filename accum_list_d; // output list file // Filename output_list_d; // context generation file // Filename context_list_d; // variance floor file // Filename variance_floor_file_d; // phonetic question-answers file // Filename ques_ans_file_d; // phonetic decision-tree file // Filename phonetic_dt_file_d; // split-threshold for the phonetic-decision-tree // Float phonetic_dt_split_threshold_d; // merge-threshold for the phonetic-decision-tree // Float phonetic_dt_merge_threshold_d; // num_occ-threshold for the phonetic-decision-tree // Float phonetic_dt_num_occ_threshold_d; // variance floor // Float variance_floor_d; // beta pruning threshold // Float beta_threshold_d; // transcription level // String transcription_level_d; // alignment level // String force_alignment_level_d; // force alignment level // Long alignment_level_d; // initial level // Long initial_level_d; // context level // Long context_level_d; // context order // Long context_order_d; // streaming input flag // boolean stream_d; // verification mode flag // boolean verify_d; // front end // FrontEnd fe_d; // utterance trellis // BiGraph<TrainNode>* trellis_d; // hierarchical search // HierarchicalSearch search_engine_d; // stack search // StackSearch stack_engine_d; // number of search levels // Long num_levels_d; // number of mixtures // Long num_mixtures_d; // number of iterations of training // Long num_iterations_d; // minimum model probability deviance // Float min_mpd_d; // floor on the occupancy probability // Float min_occupancy_d; // minimum number of times a model must occur before update // Long min_model_count_d; // transcription database // TranscriptionDatabase transcription_db_d; // audio database // AudioDatabase audio_db_d; // flag indicates if non-speech symbols are forced at transcription ends // Boolean transcription_bounds_d; // debug level // static Integral::DEBUG debug_level_d; // verbosity // static Integral::DEBUG verbosity_d; // static memory manager // static MemoryManager mgr_d; //--------------------------------------------------------------------------- // // required public methods // //---------------------------------------------------------------------------public: // method: name // static const String& name() { return CLASS_NAME; } // other static methods // static boolean diagnose(Integral::DEBUG debug_level); // method: setDebug // boolean setDebug(Integral::DEBUG debug_level) { debug_level_d = debug_level; HierarchicalSearch::setDebug(debug_level); StackSearch::setDebug(debug_level); SearchNode::setDebug(debug_level); Trace::setDebug(debug_level); Instance::setDebug(debug_level); History::setDebug(debug_level); Context::setDebug(debug_level); return true; } // constructor(s)/destructor(s) // ~HiddenMarkovModel(); HiddenMarkovModel(); HiddenMarkovModel(const HiddenMarkovModel& arg); // method: assign // boolean assign(const HiddenMarkovModel& arg) { return Error::handle(name(), L"assign", Error::ARG, __FILE__, __LINE__); } // i/o methods // long sofSize() const; boolean read(Sof& sof, long tag, const String& name = CLASS_NAME); boolean write(Sof& sof, long tag, const String& name = CLASS_NAME) const; boolean readData(Sof& sof, const String& pname = DEF_PARAM, long size = SofParser::FULL_OBJECT, boolean param = true, boolean nested = false); boolean writeData(Sof& sof, const String& param = DEF_PARAM) const; // method: new // static void* operator new(size_t size) { return mgr_d.get(); } // method: new[] // static void* operator new[](size_t size) { return mgr_d.getBlock(size); } // method: delete // static void operator delete(void* ptr) { mgr_d.release(ptr); } // method: delete[] // static void operator delete[](void* ptr) { mgr_d.releaseBlock(ptr); } // method: setGrowSize // static boolean setGrowSize(long grow_size) { return mgr_d.setGrow(grow_size); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -