📄 fbs.h
字号:
* Like uttproc_result, but returns a list of word segmentations instead of the full * recognition string. The list of word segmentations is READ-ONLY, and clobbered by * the next call to any of the result functions. * Use uttproc_result or uttproc_result_seg to obtain the final result, but not both! */int32 uttproc_result_seg (int32 *frm, /* Out: *frm = #frames in utterance */ search_hyp_t **hyp, /* Out: *hyp = first element in NULL terminated linked list of word segmentations */ int32 block);/* * Obtain a partial recognition result in the middle of an utterance. This function can * be called anytime after uttproc_begin_utt and before the final uttproc_result. * Return value: 0 if successful, else -1. */int32 uttproc_partial_result (int32 *frm, /* Out: *frm = #frames processed corresponding to the partial result */ char **hyp); /* Out: *hyp = partial recognition string, READ-ONLY. Contents clobbered by the next uttproc_result or uttproc_partial_result call. *//* * Like uttproc_partial_result, but returns a list of word segmentations instead of * the partial recognition string. The list of word segmentations is READ-ONLY, and * clobbered by the next call to any of the result functions. */int32 uttproc_partial_result_seg (int32 *frm, search_hyp_t **hyp); /* Out: *hyp = first element in NULL terminated linked list of word segmentations *//* * Called instead of uttproc_end_utt to abort the current utterance immediately. No * final recognition result is available. Note that one cannot abort an utterance after * uttproc_end_utt. * Return value: 0 if successful, else -1. */int32 uttproc_abort_utt ( void );/* * The sequence uttproc_stop_utt()...uttproc_restart_utt() can be used to re-recognize * the current utterance. It is typically used to switch to a new language model in the * middle of an utterance, for example, based on a partial recognition result; the * switch occurs in the middle of the two calls. uttproc_stop_utt must eventually be * followed by uttproc_restart_utt. There can be no other intervening calls relating to * the current utterance; i.e., no uttproc_begin_utt, uttproc_rawdata, uttproc_cepdata, * uttproc_end_utt, uttproc_result, uttproc_partial_result, or uttproc_abort_utt. * This operation cannot be performed after uttproc_end_utt. * Return value: 0 if successful, else -1. */int32 uttproc_stop_utt ( void );int32 uttproc_restart_utt ( void );/* * Obtain N-best list for current utterance: * NOTE: Should be preceded by search_save_lattice (). * NOTE: Clobbers any previously returned N-best hypotheses in *alt_out. * Arguments: * sf, ef: Start and end frame range within utterance for generating N-best list. * w1, w2: Two-word context preceding utterance; w2 is the later one. w1 may be -1 * (i.e., non-existent). w2 must be valid; it can be the word-id for <s>. * On return, alt_out[i] = i-th hypothesis generated. * Return value: #alternative hypotheses returned; -1 if error. */int32 search_get_alt (int32 n, /* In: No. of alternatives to produce */ int32 sf, int32 ef, /* In: Start/End frame */ int32 w1, int32 w2, /* In: context words */ search_hyp_t ***alt_out); /* Out: array of alternatives *//* Should be called before search_get_alt */void search_save_lattice ( void );/* * Perform allphone recognition on the given cepstrum file and return a linked list of * phones and segmentation. The filename should NOT contain the (.mfc) file extension. * Return value: pointer to head of linked list of search_hyp_t entries for the phone * segments; it may be NULL. It is a READ-ONLY list. It will be clobbered by the next * call to this function. */search_hyp_t *uttproc_allphone_file (char const *file); /* Without filename extension *//* Force alignment API */void time_align_utterance (char const *utt, FILE *out_sent_fp, char const *left_word, int32 begin_frame, char *pe_words, /* FIXME: should be const */ int32 end_frame, char const *right_word);/* Other batch mode processing API */void run_ctl_file (char const *ctl_file_name);void run_time_align_ctl_file (char const *utt_ctl_file_name, char const *pe_ctl_file_name, char const *out_sent_file_name);/************************* N-gram LM related *************************//* * Read in a new LM file (lmfile), and associate it with name lmname. If there is * already an LM with the same name, it is automatically deleted. The current LM is * undefined at this point; use uttproc_set_lm(lmname) immediately afterwards. * Return value: 0 if successful, else -1. */int32 lm_read (char const *lmfile, /* In: LM file name */ char const *lmname, /* In: LM name associated with this model */ double lw, /* In: Language weight; typically 6.5-9.5 */ double uw, /* In: Unigram weight; typically 0.5 */ double wip); /* In: Word insertion penalty; typically 0.65 *//* * Delete the named LM from the LM collection. The current LM is undefined at this * point. Use uttproc_set_lm(...) immediately afterwards. * Return value: 0 if successful, else -1. */int32 lm_delete (char const *lmname);/* * Set the currently active LM to the given named LM. Multiple LMs can be loaded initially * (during fbs_init) or at run time using lm_read (see above). This functions * sets the decoder in n-gram decoding mode. * Return value: 0 if successful, else -1. */int32 uttproc_set_lm (char const *lmname);/* * Indicate to the decoder that the named LM has been updated (e.g., with the addition of * a new unigram). * Return value: 0 if successful, else -1. */int32 uttproc_lmupdate (char const *lmname);/* * Set the N-gram LM start symbol to the given value. * Explicitly reqeusted by some projects. (Okay, but other stuff uses it anyway.) */int32 uttproc_set_startword (char const *startword);/* * Set trigram history context for the next utterance. Instead of the next utterance * beginning with a clean slate, it is begun as if the two words wd1 and wd2 have just * been recognized. They are used as the (trigram) language model history for the * utterance. wd1 can be NULL if there is only a one word history wd2, or both wd1 and * wd2 can be NULL to clear any history information. * Return value: 0 if successful, else -1. */int32 uttproc_set_context (char const *wd1, /* In: First word of history (possibly NULL) */ char const *wd2);/* In: Last (most recent) history (maybe NULL) *//****************************** FSG related ******************************//* * Structures through which an application may load an FSG into the decoder. * There's essentially a 1-to-1 correspondence between the FSG file format * and these structures. */typedef struct s2_fsg_trans_s { int32 from_state; int32 to_state; float32 prob; /* Probability associated with transition */ char *word; /* Word emitted by this transition; NULL for null transitions */ struct s2_fsg_trans_s *next; /* For constructing linear linked list of all transitions in FSG; NULL at the tail */} s2_fsg_trans_t;typedef struct s2_fsg_s { char *name; /* This would be the name on the FSG_BEGIN line in an FSG file. Can be NULL or "" for unnamed FSGs */ int32 n_state; /* Set of states = 0 .. n_state-1 */ int32 start_state; /* 0 <= start_state < n_state */ int32 final_state; /* 0 <= final_state < n_state */ s2_fsg_trans_t *trans_list; /* Head of list of transitions in FSG; transitions need not be in any order */} s2_fsg_t;/* * Load the given FSG file into the system. Another FSG with the same string * name (on the FSG_BEGIN line) must not already exist. * The loaded FSG is NOT automatically the currently active FSG. * Options to consider alternative pronunciations and transparent insertion of * filler words are determined from appropriate "command-line" arguments at * system initialization, as are filler word probabilities and the language * weight. * Return value: a read-only ptr to the string name of the loaded FSG; NULL * if any error. This pointer is invalid after the FSG is deleted * (via uttproc_del_fsg()). */char *uttproc_load_fsgfile (char *fsgfile);/* * Similar to uttproc_load_fsgfile, but load from the given in-memory * structures defined above, instead of from a file. In addition, and unlike * uttproc_load_fsgfile, it allows the application to explicitly configure * other aspects such as whether to consider alternative pronunciations, * insert filler words, and the language weight to be applied to transition * probabilities. * Return value: 1 if successfully loaded, 0 if any error. * (This function specifically requested by some applications.) */int32 uttproc_load_fsg (s2_fsg_t *fsg, int32 use_altpron, /* Whether to automatically insert all the alternative pronunciations for each transition */ int32 use_filler, /* Whether to automatically insert filler (e.g., silence) transitions (self-loops) at each state. */ float32 silprob, /* Probability for automatically added silence transitions */ float32 fillprob, /* Probability for automatically added non-silence filler transitions */ float32 lw); /* Language weight applied to all transition probs *//* * Set the currently active FSG to the given named FSG. Cannot be performed in * the middle of an utterance. This function sets the decoder in FSG-decoding * mode. * Return value: 0 if successful, else -1. */int32 uttproc_set_fsg (char *fsgname);/* * Delete the given named FSG from the system. If it was the currently active * FSG, the search mode (N-gram or FSG-mode) becomes undefined. Cannot be * performed in the middle of an utterance. * Return value: 0 if successful, else -1. */int32 uttproc_del_fsg (char *fsgname);/* * Whether the current utterance was (is) decoded in FSG search mode. */boolean uttproc_fsg_search_mode ( void );/* * Return the start (or final) state of the currently active FSG, if any. * If none, return -1. The final state is useful, for example, in determining * if the recognition hypothesis returned by the decoder terminated in the final * state or not. */int32 uttproc_get_fsg_start_state ( void );int32 uttproc_get_fsg_final_state ( void );/* * Set the start (or final) state of the currently active FSG, if any, to the * given state. This operation can only be done in between utterances, not * in the midst of one. Return the previous start (or final) state for this * FSG if successful. Return -1 if any error. */int32 uttproc_set_fsg_start_state (int32 state);int32 uttproc_set_fsg_final_state (int32 state);/************************** Logging related **************************//* * Set the current logging directory for per utterance raw sample files and cepstrum * files. The file names are <uttid>.raw and <uttid>.mfc respectively, where <uttid> is * the utterance id associated with the current utterance (see uttproc_begin_utt). * Return value: 0 if successful, else -1. */int32 uttproc_set_rawlogdir (char const *dir);int32 uttproc_set_mfclogdir (char const *dir);/* Logfile can be changed in between utterances. Return value: 0 if ok, else -1 */int32 uttproc_set_logfile (char const *file);/* * Obtain the uttid for the most recent utterance (in progress or just finished) * Return value: pointer to READ-ONLY string that is the utterance id. */char const *uttproc_get_uttid ( void );/* * For automatically generated uttid's (see uttproc_begin_utt), also use the prefix * given below. (So the uttid is formatted "%s%08d", prefix, sequence_no.) */int32 uttproc_set_auto_uttid_prefix (char const *prefix);#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -