flat_fwd.c
来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 2,212 行 · 第 1/5 页
C
2,212 行
/* ==================================================================== * Copyright (c) 1995-2002 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * fwd.c -- Forward Viterbi beam search * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1995 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** * * HISTORY * * 28-Jul-04 ARCHAN (archan@cs.cmu.edu at Carnegie Mellon Unversity * First incorporate it from s3 code base. * * $Log: flat_fwd.c,v $ * Revision 1.10 2004/12/27 19:46:19 arthchan2003 * 1, Add perf-std to Makefile.am , developers can type make perf-std as the standard performance test target. This only works in CMU. 2, Fix warning in flat_fwd.[ch], 3, Apply Yitao's change in cmd_ln.c . 4, 2,3 are standard regression tested. * * Revision 1.9 2004/12/23 21:00:51 arthchan2003 * 1, Fixed problems in the code of -cepext, 2, Enabled the generic HMM computation routine flat_fwd.c. This is the key problem of the decode_anytopo. * * Revision 1.8 2004/12/06 10:52:00 arthchan2003 * Enable doxygen documentation in libs3decoder * * Revision 1.7 2004/12/05 12:01:30 arthchan2003 * 1, move libutil/libutil.h to s3types.h, seems to me not very nice to have it in every files. 2, Remove warning messages of main_align.c 3, Remove warning messages in chgCase.c * * Revision 1.6 2004/11/16 05:13:18 arthchan2003 * 1, s3cipid_t is upgraded to int16 because we need that, I already check that there are no magic code using 8-bit s3cipid_t * 2, Refactor the ep code and put a lot of stuffs into fe.c (should be renamed to something else. * 3, Check-in codes of wave2feat and cepview. (cepview will not dump core but Evandro will kill me) * 4, Make the same command line frontends for decode, align, dag, astar, allphone, decode_anytopo and ep . Allow the use a file to configure the application. * 5, Make changes in test such that test-allphone becomes a repeatability test. * 6, cepview, wave2feat and decode_anytopo will not be installed in 3.5 RCIII * (Known bugs after this commit) * 1, decode_anytopo has strange bugs in some situations that it cannot find the end of the lattice. This is urgent. * 2, default argument file's mechanism is not yet supported, we need to fix it. * 3, the bug discovered by SonicFoundry is still not fixed. * * Revision 1.2 2004/11/14 07:00:08 arthchan2003 * 1, Finally, a version of working flat decoder is completed. It is not compiled in the standard compilation yet because there are two many warnings. 2, eliminate the statics variables in fe_sigproc.c * * Revision 1.2 2002/12/03 23:02:38 egouvea * Updated slow decoder with current working version. * Added copyright notice to Makefiles, *.c and *.h files. * Updated some of the documentation. * * Revision 1.1.1.1 2002/12/03 20:20:46 robust * Import of s3decode. * * * 08-Sep-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added .Z compression option to lattice files. * * 04-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added dag_chk_linkscr(). Added check for renormalization before bestpath. * * 15-Nov-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * BUGFIX: Added lwf factoring of fillpen in dag_backtrace(). * * 11-Nov-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Changed hardwired MIN_EF_RANGE constant into -min_endfr argument. * Added fudge edges in dag (dag_add_fudge_edges). * * 08-Nov-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added exact reporting of word sequence and scores from dag_search. * For this, added daglink_t.bypass, daglink_t.lscr, daglink_t.src, and * added bypass argument to dag_link and dag_bypass_link, and changed * dag_backtrace to find exact best path. * * 07-Nov-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added onlynodes argument to dag_dump(). * * 29-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * BUGFIX: Applied lwf to filler penalties in dag_remove_filler_nodes(). * * 28-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Corrected for backoff case of LM score in lat_seg_lscr(). * * 15-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Undid previous change: now the complete DAG is built whether the bestpath * search is to be run or not. * * 11-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Replace explicit silpen and noisepen with calls to fillpen(). * * 05-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * BUGFIX: Added pscr_valid flag to daglink_t to avoid evaluating the * same path mulitple times (millions of times, in some cases). * * 27-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * BUGFIX: Added checks in dag_bestpath and dag_search for dealing with * zero paths through DAG (caused by introduction of MIN_EF_RANGE). * * 26-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added separate language weight (-bestpathlw) for bestpath DAG search. * Added MIN_EF_RANGE to limit active nodes in DAG search. Removed internal * finishwid nodes from DAG search. * * 21-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added and used -bptblsize argument. * Freed rcscore entries in BP table if not running bestpath search (for * reducing memory requirement; but causes acoustic scores in dumped * lattices to be inaccurate). * * 12-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Changed fwd_sen_active to flag active senones instead of building a list * of them. * * 09-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Changed (> thresh) to (>= thresh) in word_trans, for consistency. * Added word_ugprob structure and use in word_trans() to speed up unigram * cross-word transitions. (Didn't help that much.) * Postponed pruning and reclaiming of inactive whmm to whmm_eval, to avoid * unnecessarily deallocating HMMs, only to allocate them again because of * an incoming transition. * Changed tp[][] indices to tp[] in 5-state specific eval_nonmpx_whmm and * eval_mpx_whmm, again to speed up whmm_eval. (Helped a bit.) * * 06-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Create edges in DAG iff bestpath search being done. Reduces size of * dumped lattices, but cannot be used to run bestpath search. * * 29-Aug-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Changed output lattice file to include edges and best ending scores. * Changed input lattice file format to conform to output format. * * 26-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Bugfix: </s> never becomes an active candidate if using an input lattice * to constrain search AND </s> appears in filler dictionary. * * 24-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added DAG search. * * 02-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added check (through tg_trans_done) in word_trans to avoid backing off to * bigram transition w2->w3 if trigram transition w1,w2->w3 already done. * * 29-Mar-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added the reporting of no. of triphones mapped to ciphones. (BUG: The * number reported is not accurate as it counts the number of such INSTANCES * for within-word triphones, but only the SET of cross-word triphones.) * * 12-Mar-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added checks in eval_mpx_whmm and eval_nonmpx_whmm for detecting * very poor state scores and flooring them to S3_LOGPROB_ZERO. Otherwise, * these scores could overflow and turn +ve. * * 26-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Fixed bug in build_wwpid (pointed out by purify) that accessed * out of bounds memory in the case of single-phone words. * * 20-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added functionality to constrain search to words in given input lattices. * This mainly affects the word_trans function. * Added fwd_sen_active() function. * Added code to increase lattice[] size (realloc) when it overflows, instead * of exiting with an error message. * * 20-Dec-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Changed allocation of whmm state,latid,pid vectors to block mode * allocation in whmm_alloc (suggested by Paul Placeway). * * 10-Aug-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Started. */#define ANY#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <s3types.h>#include "mdef.h"#include "tmat.h"#include "dict.h"#include "lm.h"#include "fillpen.h"#include "logs3.h"#include "search.h"#include "programs/s3_dag.h"#include "flat_fwd.h"/** \file flat_fwd.c \brief Implementation of forward search in a flat lexicon. *//** * Left context mapping (for multiphone words): given the 1st base phone, b, of a word * and its right context, r, the triphone for any left context l = * lcpid[b][r].pid[lcpid[b][r].cimap[l]]. * * Similarly, right context mapping (for multiphone words): given b and left context l, * the triphone for any right context r = * rcpid[b][l].pid[lcpid[b][l].cimap[r]]. * * A single phone word is a combination of the above, where both l and r are unknown. * Triphone, given any l and r context ciphones: * lrcpid[b][l].pid[lcpid[b][l].cimap[r]]. * For simplicity, all cimap[] vectors (for all l) must be identical. For now, this is * attained by avoiding any compression and letting cimap be the identity map. * * Reason for compressing pid[] and indirect access: senone sequences for triphones not * distinct. Hence, cross-word modelling fanout at word exits can be limited by fanning * out to only distinct ones and sharing the results among all ciphones. */static xwdpid_t **lcpid;static xwdpid_t **rcpid;static xwdpid_t **lrcpid;static int32 n_backoff_ci; /* #Triphone instances backed off to ciphones */static int8 *word_start_ci;static int8 *word_end_ci;static whmm_t **whmm;/** * First, the within word triphone models. wwpid[w] = list of triphone pronunciations * for word w. * Since left and right extremes require cross-word modelling (see below), wwpid[w][0] * and wwpid[w][pronlen-1] contain no information and shouldn't be touched. */static s3pid_t **wwpid; /** * Word lattice for recording decoded hypotheses. * * lattice[i] = entry for a word ending at a particular frame. There can be at most one * entry for a word in a given frame. * NOTE: lattice array allocated statically. Need a more graceful way to grow without * such an arbitrary internal limit. */typedef struct lattice_s { s3wid_t wid; /** Decoded word */ s3frmid_t frm; /** End frame for this entry */ s3latid_t history; /** Index of predecessor lattice_t entry */ int32 score; /** Best path score upto the end of this entry */ int32 *rcscore; /** Individual path scores for different right context ciphones */ dagnode_t *dagnode; /* DAG node representing this entry */} lattice_t;static lattice_t *lattice;static int32 lat_alloc; /** #lattice entries allocated */static int32 n_lat_entry; /** #lattice entries used at any point */#define LAT_ALLOC_INCR 32768#define LATID2SF(l) (IS_S3LATID(lattice[l].history) ? \ lattice[lattice[l].history].frm + 1 : 0)/** * Structures for decoding utterances subject to given input word lattices; ie, restricting * the decoding to words found in the lattice. (For speeding up the decoding process.) * NOTE: This mode is optional. If no input lattice is given, the entire vocabulary is * eligible during recognition. Also, SILENCEWORD, FINISHWORD, and noisewords are always * eligible candidates. * * Input lattice specifies candidate words that may start at a given frame. In addition, * this forward pass can also consider words starting at a number of neighbouring frames * within a given window. * * Input lattice file format: Each line contains a single <word> <startframe> info. The * line may contain other info following these two fields; these are ignored. Empty lines * and lines beginning with a # char in the first column (ie, comment lines) are ignored. */static char *word_cand_dir; /** Directory containing candidate words files. If NULL, full search performed for entire run */static char *latfile_ext; /** Complete word candidate filename for an utterance formed by word_cand_dir/<uttid>.latfile_ext */static int32 word_cand_win; /** In frame f, candidate words in input lattice from frames [(f - word_cand_win) .. (f + word_cand_win)] will be the actual candidates to be started(entered) */typedef struct word_cand_s { s3wid_t wid; /** A particular candidate word starting in a given frame */ struct word_cand_s *next; /** Next candidate starting in same frame; NULL if none */} word_cand_t;static word_cand_t **word_cand; /** Word candidates for each frame. (NOTE!! Another array with a hard limit on its size.) */static int32 n_word_cand; /** #candidate entries in word_cand for current utterance. If <= 0; full search performed for current utterance *//** Various search-related parameters */static int32 beam; /** General beamwidth */static int32 wordbeam; /** Beam for exiting a word */static int32 phone_penalty; /** Applied for each phone transition */static int32 n_state = 0;static int32 final_state;static s3wid_t silwid; /** General silence word id */static s3wid_t startwid; /** Begin silence */static s3wid_t finishwid; /** End silence */dict_t *dict; /** The dictionary */tmat_t *tmat; /** HMM transition probabilities matrices */fillpen_t *fpen; /** Filler penalty */mdef_t *mdef;lm_t *lm; /** The currently active language model */static dag_t dag;s3lmwid_t *dict2lmwid; /** Mapping from decoding dictionary wid's to lm ones. They may not be the same! */static char *uttid = NULL; /** Utterance id; for error reporting */static int32 n_frm; /** Current frame being searched within utt */static s3latid_t *frm_latstart; /** frm_latstart[f] = first lattice entry in frame f */static srch_hyp_t *hyp = NULL; /** The final recognition result */static int32 renormalized; /** Whether scores had to be renormalized in current utt *//* Debugging */static s3wid_t trace_wid; /** Word to be traced; for debugging */static int32 word_dump_sf; /** Start frame for words to be dumped for debugging */static int32 hmm_dump_sf; /** Start frame for HMMs to be dumped for debugging *//* Event count statistics */pctr_t ctr_mpx_whmm;pctr_t ctr_nonmpx_whmm;pctr_t ctr_latentry;static ptmr_t tm_hmmeval;static ptmr_t tm_hmmtrans;static ptmr_t tm_wdtrans;/* Get rid of old hyp, if any */static void hyp_free ( void ){ srch_hyp_t *tmphyp; while (hyp) { tmphyp = hyp->next; listelem_free ((char *)hyp, sizeof(srch_hyp_t)); hyp = tmphyp; }}static int32 filler_word (s3wid_t w){ if ((w == startwid) || (w == finishwid)) return 0; if ((w >= dict->filler_start) && (w <= dict->filler_end)) return 1; return 0;}#if 0static void dump_xwdpidmap (xwdpid_t **x){ s3cipid_t b, c1, c2; s3pid_t p; for (b = 0; b < mdef->n_ciphone; b++) { if (! x[b]) continue; for (c1 = 0; c1 < mdef->n_ciphone; c1++) { if (! x[b][c1].cimap) continue; printf ("n_pid(%s, %s) = %d\n", mdef_ciphone_str(mdef, b), mdef_ciphone_str(mdef, c1), x[b][c1].n_pid); for (c2 = 0; c2 < mdef->n_ciphone; c2++) { p = x[b][c1].pid[x[b][c1].cimap[c2]]; printf (" %10s %5d\n", mdef_ciphone_str(mdef, c2), p); } } }}#endif/** * Utility function for building cross-word pid maps. Compresses cross-word pid list * to unique ones. */static int32 xwdpid_compress (s3pid_t p, s3pid_t *pid, s3cipid_t *map, s3cipid_t ctx, int32 n){ s3senid_t *senmap, *prevsenmap; int32 s; s3cipid_t i; senmap = mdef->phone[p].state; for (i = 0; i < n; i++) { if (mdef->phone[p].tmat != mdef->phone[pid[i]].tmat) continue; prevsenmap = mdef->phone[pid[i]].state; for (s = 0; (s < n_state-1) && (senmap[s] == prevsenmap[s]); s++);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?