flat_fwd.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 2,212 行 · 第 1/5 页

C
2,212
字号
/* ==================================================================== * Copyright (c) 1995-2002 Carnegie Mellon University.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * This work was supported in part by funding from the Defense Advanced  * Research Projects Agency and the National Science Foundation of the  * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * fwd.c -- Forward Viterbi beam search * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1995 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** *  * HISTORY *  * 28-Jul-04    ARCHAN (archan@cs.cmu.edu at Carnegie Mellon Unversity  *              First incorporate it from s3 code base.  * * $Log: flat_fwd.c,v $ * Revision 1.10  2004/12/27 19:46:19  arthchan2003 * 1, Add perf-std to Makefile.am , developers can type make perf-std as the standard performance test target. This only works in CMU. 2, Fix warning in flat_fwd.[ch], 3, Apply Yitao's change in cmd_ln.c . 4, 2,3 are standard regression tested. * * Revision 1.9  2004/12/23 21:00:51  arthchan2003 * 1, Fixed problems in the code of -cepext, 2, Enabled the generic HMM computation routine flat_fwd.c. This is the key problem of the decode_anytopo. * * Revision 1.8  2004/12/06 10:52:00  arthchan2003 * Enable doxygen documentation in libs3decoder * * Revision 1.7  2004/12/05 12:01:30  arthchan2003 * 1, move libutil/libutil.h to s3types.h, seems to me not very nice to have it in every files. 2, Remove warning messages of main_align.c 3, Remove warning messages in chgCase.c * * Revision 1.6  2004/11/16 05:13:18  arthchan2003 * 1, s3cipid_t is upgraded to int16 because we need that, I already check that there are no magic code using 8-bit s3cipid_t * 2, Refactor the ep code and put a lot of stuffs into fe.c (should be renamed to something else. * 3, Check-in codes of wave2feat and cepview. (cepview will not dump core but Evandro will kill me) * 4, Make the same command line frontends for decode, align, dag, astar, allphone, decode_anytopo and ep . Allow the use a file to configure the application. * 5, Make changes in test such that test-allphone becomes a repeatability test. * 6, cepview, wave2feat and decode_anytopo will not be installed in 3.5 RCIII * (Known bugs after this commit) * 1, decode_anytopo has strange bugs in some situations that it cannot find the end of the lattice. This is urgent. * 2, default argument file's mechanism is not yet supported, we need to fix it. * 3, the bug discovered by SonicFoundry is still not fixed. * * Revision 1.2  2004/11/14 07:00:08  arthchan2003 * 1, Finally, a version of working flat decoder is completed. It is not compiled in the standard compilation yet because there are two many warnings. 2, eliminate the statics variables in  fe_sigproc.c * * Revision 1.2  2002/12/03 23:02:38  egouvea * Updated slow decoder with current working version. * Added copyright notice to Makefiles, *.c and *.h files. * Updated some of the documentation. * * Revision 1.1.1.1  2002/12/03 20:20:46  robust * Import of s3decode. * *  * 08-Sep-97	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added .Z compression option to lattice files. *  * 04-Jun-97	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added dag_chk_linkscr().  Added check for renormalization before bestpath. *  * 15-Nov-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		BUGFIX: Added lwf factoring of fillpen in dag_backtrace(). *  * 11-Nov-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Changed hardwired MIN_EF_RANGE constant into -min_endfr argument. * 		Added fudge edges in dag (dag_add_fudge_edges). *  * 08-Nov-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added exact reporting of word sequence and scores from dag_search. * 		For this, added daglink_t.bypass, daglink_t.lscr, daglink_t.src, and * 		added bypass argument to dag_link and dag_bypass_link, and changed * 		dag_backtrace to find exact best path. *  * 07-Nov-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University *  		Added onlynodes argument to dag_dump(). *   * 29-Oct-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		BUGFIX: Applied lwf to filler penalties in dag_remove_filler_nodes(). *   * 28-Oct-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Corrected for backoff case of LM score in lat_seg_lscr(). *   * 15-Oct-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Undid previous change: now the complete DAG is built whether the bestpath * 		search is to be run or not. *   * 11-Oct-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Replace explicit silpen and noisepen with calls to fillpen(). *   * 05-Oct-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		BUGFIX: Added pscr_valid flag to daglink_t to avoid evaluating the * 		same path mulitple times (millions of times, in some cases). *   * 27-Sep-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		BUGFIX: Added checks in dag_bestpath and dag_search for dealing with * 		zero paths through DAG (caused by introduction of MIN_EF_RANGE). *   * 26-Sep-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added separate language weight (-bestpathlw) for bestpath DAG search. * 		Added MIN_EF_RANGE to limit active nodes in DAG search.  Removed internal * 		finishwid nodes from DAG search. *   * 21-Sep-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added and used -bptblsize argument. * 		Freed rcscore entries in BP table if not running bestpath search (for * 		reducing memory requirement; but causes acoustic scores in dumped * 		lattices to be inaccurate). *   * 12-Sep-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Changed fwd_sen_active to flag active senones instead of building a list * 		of them. *  * 09-Sep-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Changed (> thresh) to (>= thresh) in word_trans, for consistency. * 		Added word_ugprob structure and use in word_trans() to speed up unigram * 		cross-word transitions.  (Didn't help that much.) * 		Postponed pruning and reclaiming of inactive whmm to whmm_eval, to avoid * 		unnecessarily deallocating HMMs, only to allocate them again because of * 		an incoming transition. * 		Changed tp[][] indices to tp[] in 5-state specific eval_nonmpx_whmm and * 		eval_mpx_whmm, again to speed up whmm_eval.  (Helped a bit.) *  * 06-Sep-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Create edges in DAG iff bestpath search being done.  Reduces size of * 		dumped lattices, but cannot be used to run bestpath search. *  * 29-Aug-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Changed output lattice file to include edges and best ending scores. * 		Changed input lattice file format to conform to output format. *  * 26-Jun-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Bugfix: </s> never becomes an active candidate if using an input lattice * 		to constrain search AND </s> appears in filler dictionary. *  * 24-Jun-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added DAG search. *  * 02-Jun-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added check (through tg_trans_done) in word_trans to avoid backing off to * 		bigram transition w2->w3 if trigram transition w1,w2->w3 already done. *  * 29-Mar-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added the reporting of no. of triphones mapped to ciphones.  (BUG: The * 		number reported is not accurate as it counts the number of such INSTANCES * 		for within-word triphones, but only the SET of cross-word triphones.) *  * 12-Mar-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added checks in eval_mpx_whmm and eval_nonmpx_whmm for detecting * 		very poor state scores and flooring them to S3_LOGPROB_ZERO.  Otherwise, * 		these scores could overflow and turn +ve. *  * 26-Jan-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Fixed bug in build_wwpid (pointed out by purify) that accessed * 		out of bounds memory in the case of single-phone words. *  * 20-Jan-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Added functionality to constrain search to words in given input lattices. * 		This mainly affects the word_trans function. * 		Added fwd_sen_active() function. * 		Added code to increase lattice[] size (realloc) when it overflows, instead * 		of exiting with an error message. *  * 20-Dec-95	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Changed allocation of whmm state,latid,pid vectors to block mode * 		allocation in whmm_alloc (suggested by Paul Placeway). *  * 10-Aug-95	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * 		Started. */#define ANY#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <s3types.h>#include "mdef.h"#include "tmat.h"#include "dict.h"#include "lm.h"#include "fillpen.h"#include "logs3.h"#include "search.h"#include "programs/s3_dag.h"#include "flat_fwd.h"/** \file flat_fwd.c     \brief Implementation of forward search in a flat lexicon.  *//** * Left context mapping (for multiphone words): given the 1st base phone, b, of a word * and its right context, r, the triphone for any left context l = *     lcpid[b][r].pid[lcpid[b][r].cimap[l]]. *  * Similarly, right context mapping (for multiphone words): given b and left context l, * the triphone for any right context r = *     rcpid[b][l].pid[lcpid[b][l].cimap[r]]. *  * A single phone word is a combination of the above, where both l and r are unknown. * Triphone, given any l and r context ciphones: *     lrcpid[b][l].pid[lcpid[b][l].cimap[r]]. * For simplicity, all cimap[] vectors (for all l) must be identical.  For now, this is * attained by avoiding any compression and letting cimap be the identity map. *  * Reason for compressing pid[] and indirect access: senone sequences for triphones not * distinct.  Hence, cross-word modelling fanout at word exits can be limited by fanning * out to only distinct ones and sharing the results among all ciphones. */static xwdpid_t **lcpid;static xwdpid_t **rcpid;static xwdpid_t **lrcpid;static int32 n_backoff_ci;	/* #Triphone instances backed off to ciphones */static int8 *word_start_ci;static int8 *word_end_ci;static whmm_t **whmm;/** * First, the within word triphone models.  wwpid[w] = list of triphone pronunciations * for word w. * Since left and right extremes require cross-word modelling (see below), wwpid[w][0] * and wwpid[w][pronlen-1] contain no information and shouldn't be touched. */static s3pid_t **wwpid;    /** * Word lattice for recording decoded hypotheses. *  * lattice[i] = entry for a word ending at a particular frame.  There can be at most one * entry for a word in a given frame. * NOTE: lattice array allocated statically.  Need a more graceful way to grow without * such an arbitrary internal limit. */typedef struct lattice_s {    s3wid_t   wid;	/** Decoded word */    s3frmid_t frm;	/** End frame for this entry */    s3latid_t history;	/** Index of predecessor lattice_t entry */    int32     score;	/** Best path score upto the end of this entry */    int32    *rcscore;	/** Individual path scores for different right context ciphones */    dagnode_t *dagnode;	/* DAG node representing this entry */} lattice_t;static lattice_t *lattice;static int32 lat_alloc;		/** #lattice entries allocated */static int32 n_lat_entry;	/** #lattice entries used at any point */#define LAT_ALLOC_INCR		32768#define LATID2SF(l)	(IS_S3LATID(lattice[l].history) ? \			 lattice[lattice[l].history].frm + 1 : 0)/** * Structures for decoding utterances subject to given input word lattices; ie, restricting * the decoding to words found in the lattice.  (For speeding up the decoding process.) * NOTE:  This mode is optional.  If no input lattice is given, the entire vocabulary is * eligible during recognition.  Also, SILENCEWORD, FINISHWORD, and noisewords are always * eligible candidates. *  * Input lattice specifies candidate words that may start at a given frame.  In addition, * this forward pass can also consider words starting at a number of neighbouring frames * within a given window. *  * Input lattice file format:  Each line contains a single <word> <startframe> info.  The * line may contain other info following these two fields; these are ignored.  Empty lines * and lines beginning with a # char in the first column (ie, comment lines) are ignored. */static char *word_cand_dir;	/** Directory containing candidate words files.  If NULL,				   full search performed for entire run */static char *latfile_ext;	/** Complete word candidate filename for an utterance formed				   by word_cand_dir/<uttid>.latfile_ext */static int32 word_cand_win;	/** In frame f, candidate words in input lattice from frames				   [(f - word_cand_win) .. (f + word_cand_win)] will be				   the actual candidates to be started(entered) */typedef struct word_cand_s {    s3wid_t wid;		/** A particular candidate word starting in a given frame */    struct word_cand_s *next;	/** Next candidate starting in same frame; NULL if none */} word_cand_t;static word_cand_t **word_cand;	/** Word candidates for each frame.  (NOTE!! Another array				   with a hard limit on its size.) */static int32 n_word_cand;	/** #candidate entries in word_cand for current utterance.				   If <= 0; full search performed for current utterance *//** Various search-related parameters */static int32 beam;		/** General beamwidth */static int32 wordbeam;		/** Beam for exiting a word */static int32 phone_penalty;	/** Applied for each phone transition */static int32 n_state = 0;static int32 final_state;static s3wid_t silwid;		/** General silence word id */static s3wid_t startwid;	/** Begin silence */static s3wid_t finishwid;	/** End silence */dict_t *dict;		/** The dictionary */tmat_t *tmat;		/** HMM transition probabilities matrices */fillpen_t *fpen;         /** Filler penalty */mdef_t *mdef;lm_t   *lm;		/** The currently active language model */static dag_t dag;s3lmwid_t *dict2lmwid;	/** Mapping from decoding dictionary wid's to lm ones.  They may not be the same! */static char *uttid = NULL;	/** Utterance id; for error reporting */static int32 n_frm;		/** Current frame being searched within utt */static s3latid_t *frm_latstart;	/** frm_latstart[f] = first lattice entry in frame f */static srch_hyp_t *hyp = NULL;	/** The final recognition result */static int32 renormalized;	/** Whether scores had to be renormalized in current utt *//* Debugging */static s3wid_t trace_wid;	/** Word to be traced; for debugging */static int32 word_dump_sf;	/** Start frame for words to be dumped for debugging */static int32 hmm_dump_sf;	/** Start frame for HMMs to be dumped for debugging *//* Event count statistics */pctr_t ctr_mpx_whmm;pctr_t ctr_nonmpx_whmm;pctr_t ctr_latentry;static ptmr_t tm_hmmeval;static ptmr_t tm_hmmtrans;static ptmr_t tm_wdtrans;/* Get rid of old hyp, if any */static void hyp_free ( void ){    srch_hyp_t *tmphyp;        while (hyp) {	tmphyp = hyp->next;	listelem_free ((char *)hyp, sizeof(srch_hyp_t));	hyp = tmphyp;    }}static int32 filler_word (s3wid_t w){    if ((w == startwid) || (w == finishwid))	return 0;    if ((w >= dict->filler_start) && (w <= dict->filler_end))	return 1;    return 0;}#if 0static void dump_xwdpidmap (xwdpid_t **x){    s3cipid_t b, c1, c2;    s3pid_t p;    for (b = 0; b < mdef->n_ciphone; b++) {	if (! x[b])	    continue;		for (c1 = 0; c1 < mdef->n_ciphone; c1++) {	    if (! x[b][c1].cimap)		continue;	    	    printf ("n_pid(%s, %s) = %d\n",		    mdef_ciphone_str(mdef, b), mdef_ciphone_str(mdef, c1),		    x[b][c1].n_pid);	    for (c2 = 0; c2 < mdef->n_ciphone; c2++) {		p = x[b][c1].pid[x[b][c1].cimap[c2]];		printf ("  %10s %5d\n", mdef_ciphone_str(mdef, c2), p);	    }	}    }}#endif/** * Utility function for building cross-word pid maps.  Compresses cross-word pid list * to unique ones. */static int32 xwdpid_compress (s3pid_t p, s3pid_t *pid, s3cipid_t *map, s3cipid_t ctx,			      int32 n){    s3senid_t *senmap, *prevsenmap;    int32 s;    s3cipid_t i;    senmap = mdef->phone[p].state;        for (i = 0; i < n; i++) {	if (mdef->phone[p].tmat != mdef->phone[pid[i]].tmat)	    continue;	prevsenmap = mdef->phone[pid[i]].state;	for (s = 0; (s < n_state-1) && (senmap[s] == prevsenmap[s]); s++);	

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?