📄 sphinx3loader.java
字号:
/* * Copyright 1999-2004 Carnegie Mellon University. * Portions Copyright 2004 Sun Microsystems, Inc. * Portions Copyright 2004 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */package edu.cmu.sphinx.linguist.acoustic.tiedstate;import java.io.BufferedInputStream;import java.io.DataInputStream;import java.io.File;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStream;import java.net.URL;import java.util.LinkedHashMap;import java.util.Map;import java.util.Properties;import java.util.logging.Level;import java.util.logging.Logger;import java.util.zip.ZipException;import edu.cmu.sphinx.linguist.acoustic.Context;import edu.cmu.sphinx.linguist.acoustic.HMM;import edu.cmu.sphinx.linguist.acoustic.HMMPosition;import edu.cmu.sphinx.linguist.acoustic.LeftRightContext;import edu.cmu.sphinx.linguist.acoustic.Unit;import edu.cmu.sphinx.linguist.acoustic.UnitManager;import edu.cmu.sphinx.linguist.acoustic.tiedstate.*;import edu.cmu.sphinx.util.ExtendedStreamTokenizer;import edu.cmu.sphinx.util.LogMath;import edu.cmu.sphinx.util.SphinxProperties;import edu.cmu.sphinx.util.StreamFactory;import edu.cmu.sphinx.util.Utilities;import edu.cmu.sphinx.util.props.PropertyException;import edu.cmu.sphinx.util.props.PropertySheet;import edu.cmu.sphinx.util.props.PropertyType;import edu.cmu.sphinx.util.props.Registry;/** * Loads a tied-state acoustic model generated by the Sphinx-3 trainer. * <p> * The acoustic model should be packaged in a JAR file. * The dictionary and language model files are not required * to be in the package. You can specify their locations separately. * A text file called "model.props" and the data files that make up the * acoustic model are required. * The model.props file is a file of key-value pairs, loadable as a Java * Properties file. It should minimally contain the following properties: * <ul> * <li><b>dataLocation</b> - this specifies the directory where the * actual model data files are, <i>relative to the model implementation * class</i></li> * <li><b>modelDefinition</b> - this specifies the location where the * model definition file is, <i>relative to the model implementation * class</i></li> * </ul> * The actual model data files are named "means", "variances", * "transition_matrices", "mixture_weights" for binary versions, * or prepended with ".ascii" for the ASCII versions. * </p> * <p> * As an example, lets look at the Wall Street Journal acoustic model JAR * file, which is located at the <code>sphinx4/lib</code> directory. * If you run * <code>"jar tvf lib/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.jar"</code>, * you will find that its internal structure looks roughly like: * <pre> * WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.jar * | * +- edu * | * +- cmu * | * +- sphinx * | * +- model * | * + acoustic * | * +- model.props * | * +- WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz.class * | * +- WSJLoader.class * | * +- cd_continuous_8gau * | | * | +- means * | +- variances * | +- mixture_weights * | +- transition_matrices * | * +- dict * | | * | +- alpha.dict * | +- cmudict.0.6d * | +- digits.dict * | +- fillerdict * | * +- etc * | * +- WSJ_clean_13dCep_16k_40mel_130Hz_6800Hz.4000.mdef * +- WSJ_clean_13dCep_16k_40mel_130Hz_6800Hz.ci.mdef * +- variables.def * * </pre> * <p> * The model.props file looks like (note how the 'dataLocation' and * 'modelDefinition' properties are defined relative to the * WSJ_clean_13dCep_16k_40mel_130Hz_6800Hz.class): * </p> * <pre>description = Wall Street Journal acoustic modelsisBinary = truefeatureType = cepstra_delta_doubledeltavectorLength = 39sparseForm = falsenumberFftPoints = 512filters = 40gaussians = 8maxFreq = 6800minFreq. = 130sampleRate = 16000dataLocation = cd_continuous_8gaumodelDefinition = etc/WSJ_clean_13dCep_16k_40mel_130Hz_6800Hz.4000.mdef * </pre> * * <p> * Note that although most of the properties of this class are already * defined in the model.props file, it is still possible (but not recommended) * to override those values by specifying them in the configuration file. * </p> */public class Sphinx3Loader implements Loader { /** * The log math component for the system. */ public final static String PROP_LOG_MATH = "logMath"; /** * The unit manager */ public final static String PROP_UNIT_MANAGER = "unitManager"; /** * Specifies whether the model to be loaded is in ASCII or binary format */ public final static String PROP_IS_BINARY = "isBinary"; /** * The default value of PROP_IS_BINARY */ public final static boolean PROP_IS_BINARY_DEFAULT = true; /** * The name of the model definition file (contains the HMM data) */ public final static String PROP_MODEL = "modelDefinition"; /** * The default value of PROP_MODEL_DEFAULT. */ public final static String PROP_MODEL_DEFAULT = "model.mdef"; /** * Subdirectory where the acoustic model can be found */ public final static String PROP_DATA_LOCATION = "dataLocation"; /** * The default value of PROP_DATA_LOCATION. */ public final static String PROP_DATA_LOCATION_DEFAULT = "data"; /** * The SphinxProperty for the name of the acoustic properties file. */ public final static String PROP_PROPERTIES_FILE = "propertiesFile"; /** * The default value of PROP_PROPERTIES_FILE. */ public final static String PROP_PROPERTIES_FILE_DEFAULT = "model.props"; /** * The SphinxProperty for the length of feature vectors. */ public final static String PROP_VECTOR_LENGTH = "vectorLength"; /** * The default value of PROP_VECTOR_LENGTH. */ public final static int PROP_VECTOR_LENGTH_DEFAULT = 39; /** * The SphinxProperty specifying whether the transition matrices of the * acoustic model is in sparse form, i.e., omitting the zeros of the * non-transitioning states. */ public final static String PROP_SPARSE_FORM = "sparseForm"; /** * The default value of PROP_SPARSE_FORM. */ public final static boolean PROP_SPARSE_FORM_DEFAULT = true; /** * The SphinxProperty specifying whether context-dependent units should be * used. */ public final static String PROP_USE_CD_UNITS = "useCDUnits"; /** * The default value of PROP_USE_CD_UNITS. */ public final static boolean PROP_USE_CD_UNITS_DEFAULT = true; /** * Mixture component score floor. */ public final static String PROP_MC_FLOOR = "MixtureComponentScoreFloor"; /** * Mixture component score floor default value. */ public final static float PROP_MC_FLOOR_DEFAULT = 0.0f; /** * Variance floor. */ public final static String PROP_VARIANCE_FLOOR = "varianceFloor"; /** * Variance floor default value. */ public final static float PROP_VARIANCE_FLOOR_DEFAULT = 0.0001f; /** * Mixture weight floor. */ public final static String PROP_MW_FLOOR = "mixtureWeightFloor"; /** * Mixture weight floor default value. */ public final static float PROP_MW_FLOOR_DEFAULT = 1e-7f; protected final static String NUM_SENONES = "num_senones"; protected final static String NUM_GAUSSIANS_PER_STATE = "num_gaussians"; protected final static String NUM_STREAMS = "num_streams"; protected final static String FILLER = "filler"; protected final static String SILENCE_CIPHONE = "SIL"; protected final static int BYTE_ORDER_MAGIC = 0x11223344; /** * Supports this version of the acoustic model */ public final static String MODEL_VERSION = "0.3"; protected final static int CONTEXT_SIZE = 1; private Pool meansPool; private Pool variancePool; private Pool matrixPool; private Pool meanTransformationMatrixPool; private Pool meanTransformationVectorPool; private Pool varianceTransformationMatrixPool; private Pool varianceTransformationVectorPool; private Pool mixtureWeightsPool; private Pool senonePool; private Map contextIndependentUnits; private HMMManager hmmManager; private LogMath logMath; private UnitManager unitManager; private Properties properties; private boolean swap; protected final static String DENSITY_FILE_VERSION = "1.0"; protected final static String MIXW_FILE_VERSION = "1.0"; protected final static String TMAT_FILE_VERSION = "1.0"; // -------------------------------------- // Configuration variables // -------------------------------------- private String name; private Logger logger; private boolean binary; private boolean sparseForm; private int vectorLength; private String location; private String model; private String dataDir; private String propsFile; private float distFloor; private float mixtureWeightFloor; private float varianceFloor; private boolean useCDUnits; /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#register(java.lang.String, * edu.cmu.sphinx.util.props.Registry) */ public void register(String name, Registry registry) throws PropertyException { this.name = name; registry.register(PROP_LOG_MATH, PropertyType.COMPONENT); registry.register(PROP_UNIT_MANAGER, PropertyType.COMPONENT); registry.register(PROP_IS_BINARY, PropertyType.BOOLEAN); registry.register(PROP_SPARSE_FORM, PropertyType.BOOLEAN); registry.register(PROP_VECTOR_LENGTH, PropertyType.INT); registry.register(PROP_MODEL, PropertyType.STRING); registry.register(PROP_DATA_LOCATION, PropertyType.STRING); registry.register(PROP_PROPERTIES_FILE, PropertyType.STRING); registry.register(PROP_MC_FLOOR, PropertyType.FLOAT); registry.register(PROP_MW_FLOOR, PropertyType.FLOAT); registry.register(PROP_VARIANCE_FLOOR, PropertyType.FLOAT); registry.register(PROP_USE_CD_UNITS, PropertyType.BOOLEAN); } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet) */ public void newProperties(PropertySheet ps) throws PropertyException { logger = ps.getLogger(); propsFile = ps.getString(PROP_PROPERTIES_FILE, PROP_PROPERTIES_FILE_DEFAULT); logMath = (LogMath) ps.getComponent(PROP_LOG_MATH, LogMath.class); unitManager = (UnitManager) ps.getComponent(PROP_UNIT_MANAGER, UnitManager.class); binary = ps.getBoolean(PROP_IS_BINARY, getIsBinaryDefault()); sparseForm = ps.getBoolean(PROP_SPARSE_FORM, getSparseFormDefault()); vectorLength = ps.getInt(PROP_VECTOR_LENGTH, getVectorLengthDefault()); model = ps.getString(PROP_MODEL, getModelDefault()); dataDir = ps.getString(PROP_DATA_LOCATION, getDataLocationDefault()) + "/"; distFloor = ps.getFloat(PROP_MC_FLOOR, PROP_MC_FLOOR_DEFAULT); mixtureWeightFloor = ps.getFloat(PROP_MW_FLOOR, PROP_MW_FLOOR_DEFAULT); varianceFloor = ps.getFloat(PROP_VARIANCE_FLOOR, PROP_VARIANCE_FLOOR_DEFAULT); useCDUnits = ps.getBoolean(PROP_USE_CD_UNITS, PROP_USE_CD_UNITS_DEFAULT); } private void loadProperties() { if (properties == null) { properties = new Properties(); try { URL url = getClass().getResource(propsFile); // System.out.println(getClass() + " " + url); properties.load(url.openStream()); } catch (IOException ioe) { ioe.printStackTrace(); } } } /** * Returns whether the models are binary by default * * @return true if the models are binary by default */ private boolean getIsBinaryDefault() { loadProperties(); String binary = (String) properties.get(PROP_IS_BINARY); if (binary != null) { return (Boolean.valueOf(binary).equals(Boolean.TRUE)); } else { return PROP_IS_BINARY_DEFAULT; } } /** * Returns whether the matrices are in sparse form by default. * * @return true if the matrices are in sparse form by default */ private boolean getSparseFormDefault() { loadProperties(); String sparse = (String) properties.get(PROP_SPARSE_FORM); if (sparse != null) { return (Boolean.valueOf(binary).equals(Boolean.TRUE)); } else { return PROP_SPARSE_FORM_DEFAULT; } } /** * Returns the default vector length. */ private int getVectorLengthDefault() { loadProperties();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -