📄 main_allphone.c
字号:
/* ==================================================================== * Copyright (c) 1995-2004 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * allphone-main.c -- Main driver routine for allphone Viterbi decoding. * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1996 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** * * HISTORY * * 19-Jun-1998 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Modified to handle the new libfeat interface. * * 02-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added allphone lattice output. * * 06-Mar-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. * Added .semi. and .cont. options to -senmgaufn flag. * * 16-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Added orig_stdout, orig_stderr hack to avoid hanging on exit under Linux. * * 15-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University * Started */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <s3types.h>#include "feat.h"#include "logs3.h"#include "ms_gauden.h"#include "ms_senone.h"#ifdef INTERP#include "interp.h"#endif#include "tmat.h"#include "mdef.h"#include "s3_allphone.h"#include "agc.h"#include "cmn.h"/** \file main_allphone.c * \brief Main driver routine for allphone Viterbi decoding */static arg_t defn[] = { { "-logbase", ARG_FLOAT32, "1.0001", "Base in which all log values calculated" }, { "-mdef", ARG_STRING, NULL, "Model definition input file: triphone -> senones/tmat tying" }, { "-tmat", ARG_STRING, NULL, "Transition matrix input file" }, { "-mean", ARG_STRING, NULL, "Mixture gaussian codebooks mean parameters input file" }, { "-var", ARG_STRING, NULL, "Mixture gaussian codebooks variance parameters input file" }, { "-senmgau", ARG_STRING, ".cont.", "Senone to mixture-gaussian mapping file (or .semi. or .cont.)" }, { "-mixw", ARG_STRING, NULL, "Senone mixture weights parameters input file" },#ifdef INTERP { "-lambda", ARG_STRING, NULL, "Interpolation weights (CD/CI senone) parameters input file" },#endif { "-tpfloor", ARG_FLOAT32, "0.0001", "Triphone state transition probability floor applied to -tmat file" }, { "-varfloor", ARG_FLOAT32, "0.0001", "Codebook variance floor applied to -var file" }, { "-mwfloor", ARG_FLOAT32, "0.0000001", "Codebook mixture weight floor applied to -mixw file" }, { "-agc", ARG_STRING, "max", "AGC. max: C0 -= max(C0) in current utt; none: no AGC" }, { "-log3table", ARG_INT32, "1", "Determines whether to use the log3 table or to compute the values at run time."}, { "-cmn", ARG_STRING, "current", "Cepstral mean norm. current: C[1..n-1] -= mean(C[1..n-1]) in current utt; none: no CMN" }, { "-varnorm", ARG_STRING, "no", "Variance normalize each utterance (yes/no; only applicable if CMN is also performed)" }, { "-feat", /* Captures the computation for converting input to feature vector */ ARG_STRING, "1s_c_d_dd", "Feature stream: s2_4x / s3_1x39 / cep_dcep[,%d] / cep[,%d] / %d,%d,...,%d" }, { "-ctl", ARG_STRING, NULL, "Input control file listing utterances to be decoded" }, { "-ctloffset", ARG_INT32, "0", "No. of utterances at the beginning of -ctl file to be skipped" }, { "-ctlcount", ARG_INT32, NULL, "No. of utterances in -ctl file to be processed (after -ctloffset). Default: Until EOF" }, { "-cepdir", ARG_STRING, ".", "Directory for utterances in -ctl file (if relative paths specified)." }, { "-cepext", ARG_STRING, ".mfc", "File extension appended to utterances listed in -ctl file" }, { "-topn", ARG_INT32, "4", "No. of top scoring densities computed in each mixture gaussian codebook" }, { "-beam", ARG_FLOAT64, "1e-64", "Main pruning beam applied during search" }, { "-phlatbeam", ARG_FLOAT64, "1e-20", "Pruning beam for writing phone lattice" }, { "-phonetp", ARG_STRING, NULL, "Phone transition probabilities inputfile (default: flat probs)" }, { "-phonetpfloor", ARG_FLOAT32, "0.00001", "Floor for phone transition probabilities" }, { "-phonetpwt", ARG_FLOAT32, "3.0", "Weight (exponent) applied to phone transition probabilities" }, { "-inspen", ARG_FLOAT32, "0.05", "Phone insertion penalty (applied above phone transition probabilities)" }, { "-phsegdir", ARG_STRING, NULL, "Output directory for phone segmentation files; optionally end with ,CTL" }, { "-phlatdir", ARG_STRING, NULL, "Output directory for phone lattice files" }, { "-logfn", ARG_STRING, NULL, "Log file (default stdout/stderr)" }, { NULL, ARG_INT32, NULL, NULL }};/* The definition of mdef and tmat can be found in s3_allphone.c*/mdef_t *mdef;tmat_t *tmat;static gauden_t *g; /* Gaussian density codebooks */static senone_t *sen; /* Senones */#ifdef INTERPstatic interp_t *interp; /* CD/CI interpolation */#endifstatic feat_t *fcb; /* Feature type descriptor (Feature Control Block) */static float32 ***feat = NULL; /* Speech feature data */static int32 *senscale; /* ALL senone scores scaled by senscale[i] in frame i *//* For profiling/timing */static int32 tot_nfr;static ptmr_t tm_utt;static ptmr_t tm_gausen;static ptmr_t tm_allphone;/* * Load and cross-check all models (acoustic/lexical/linguistic). */static void models_init ( void ){ float32 varfloor, mixwfloor, tpfloor; int32 i; char *arg; /* HMM model definition */ mdef = mdef_init ((char *) cmd_ln_access("-mdef")); /* Codebooks */ varfloor = *((float32 *) cmd_ln_access("-varfloor")); g = gauden_init ((char *) cmd_ln_access("-mean"), (char *) cmd_ln_access("-var"), varfloor); /* Verify codebook feature dimensions against libfeat */ if (feat_n_stream(fcb) != g->n_feat) { E_FATAL("#feature mismatch: feat= %d, mean/var= %d\n", feat_n_stream(fcb), g->n_feat); } for (i = 0; i < feat_n_stream(fcb); i++) { if (feat_stream_len(fcb,i) != g->featlen[i]) { E_FATAL("featlen[%d] mismatch: feat= %d, mean/var= %d\n", i, feat_stream_len(fcb, i), g->featlen[i]); } } /* Senone mixture weights */ mixwfloor = *((float32 *) cmd_ln_access("-mwfloor")); sen = senone_init ((char *) cmd_ln_access("-mixw"), (char *) cmd_ln_access("-senmgau"), mixwfloor); /* Verify senone parameters against gauden parameters */ if (sen->n_feat != g->n_feat) E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat, sen->n_feat); if (sen->n_cw != g->n_density) E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n", g->n_density, sen->n_cw); if (sen->n_gauden > g->n_mgau) E_FATAL("Senones need more codebooks (%d) than present (%d)\n", sen->n_gauden, g->n_mgau); if (sen->n_gauden < g->n_mgau) E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n", sen->n_gauden, g->n_mgau); /* Verify senone parameters against model definition parameters */ if (mdef->n_sen != sen->n_sen) E_FATAL("Model definition has %d senones; but #senone= %d\n", mdef->n_sen, sen->n_sen);#ifdef INTERP /* CD/CI senone interpolation weights file, if present */ if ((arg = (char *) cmd_ln_access ("-lambda")) != NULL) { interp = interp_init (arg); /* Verify interpolation weights size with senones */ if (interp->n_sen != sen->n_sen) E_FATAL("Interpolation file has %d weights; but #senone= %d\n", interp->n_sen, sen->n_sen); } else interp = NULL;#endif /* Transition matrices */ tpfloor = *((float32 *) cmd_ln_access("-tpfloor")); tmat = tmat_init ((char *) cmd_ln_access("-tmat"), tpfloor); /* Verify transition matrices parameters against model definition parameters */ if (mdef->n_tmat != tmat->n_tmat) E_FATAL("Model definition has %d tmat; but #tmat= %d\n", mdef->n_tmat, tmat->n_tmat); if (mdef->n_emit_state != tmat->n_state) E_FATAL("#Emitting states in model definition = %d, #states in tmat = %d\n", mdef->n_emit_state, tmat->n_state); arg = (char *) cmd_ln_access ("-agc"); if ((strcmp (arg, "max") != 0) && (strcmp (arg, "none") != 0)) E_FATAL("Unknown -agc argument: %s\n", arg); arg = (char *) cmd_ln_access ("-cmn"); if ((strcmp (arg, "current") != 0) && (strcmp (arg, "none") != 0)) E_FATAL("Unknown -cmn argument: %s\n", arg);}/* Write phone segmentation output file */static void write_phseg (char *dir, char *uttid, phseg_t *phseg){ char str[1024]; FILE *fp = (FILE *)0; int32 uttscr, f, scale; /* Attempt to write segmentation for this utt to a separate file */ if (dir) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -