📄 hmmpoolmanager.java
字号:
/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */package edu.cmu.sphinx.linguist.acoustic.tiedstate.trainer;import java.util.HashMap;import java.util.Iterator;import java.util.logging.Logger;import edu.cmu.sphinx.frontend.Data;import edu.cmu.sphinx.frontend.FloatData;import edu.cmu.sphinx.linguist.acoustic.HMMState;import edu.cmu.sphinx.linguist.acoustic.tiedstate.GaussianMixture;import edu.cmu.sphinx.linguist.acoustic.tiedstate.HMMManager;import edu.cmu.sphinx.linguist.acoustic.tiedstate.Loader;import edu.cmu.sphinx.linguist.acoustic.tiedstate.MixtureComponent;import edu.cmu.sphinx.linguist.acoustic.tiedstate.Pool;import edu.cmu.sphinx.linguist.acoustic.tiedstate.SenoneHMM;import edu.cmu.sphinx.linguist.acoustic.tiedstate.SenoneHMMState;import edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel;import edu.cmu.sphinx.util.LogMath;import edu.cmu.sphinx.util.SphinxProperties;/** * Manages the HMM pools. */class HMMPoolManager { private HMMManager hmmManager; private HashMap indexMap; private Pool meansPool; private Pool variancePool; private Pool matrixPool; private Pool mixtureWeightsPool; private Pool meanTransformationMatrixPool; private Pool meanTransformationVectorPool; private Pool varianceTransformationMatrixPool; private Pool varianceTransformationVectorPool; private Pool meansBufferPool; private Pool varianceBufferPool; private Pool matrixBufferPool; private Pool mixtureWeightsBufferPool; private Pool senonePool; private int vectorLength; private LogMath logMath; private float logMixtureWeightFloor; private float logTransitionProbabilityFloor; private float varianceFloor; private float logLikelihood; private float currentLogLikelihood; /* * The logger for this class */ private static Logger logger = Logger.getLogger("edu.cmu.sphinx.linguist.acoustic.HMMPoolManager"); /** * Constructor for this pool manager. * It gets the pointers to the pools from a loader. * * @param loader the loader */ protected HMMPoolManager(Loader loader, SphinxProperties props) { hmmManager = loader.getHMMManager(); indexMap = new HashMap(); meansPool = loader.getMeansPool(); variancePool = loader.getVariancePool(); mixtureWeightsPool = loader.getMixtureWeightPool(); matrixPool = loader.getTransitionMatrixPool(); senonePool = loader.getSenonePool(); logMath = LogMath.getLogMath(props.getContext()); float mixtureWeightFloor = props.getFloat(TiedStateAcousticModel.PROP_MW_FLOOR, TiedStateAcousticModel.PROP_MW_FLOOR_DEFAULT); logMixtureWeightFloor = logMath.linearToLog(mixtureWeightFloor); float transitionProbabilityFloor = props.getFloat(TiedStateAcousticModel.PROP_TP_FLOOR, TiedStateAcousticModel.PROP_TP_FLOOR_DEFAULT); logTransitionProbabilityFloor = logMath.linearToLog(transitionProbabilityFloor); varianceFloor = props.getFloat(TiedStateAcousticModel.PROP_VARIANCE_FLOOR, TiedStateAcousticModel.PROP_VARIANCE_FLOOR_DEFAULT); createBuffers(); logLikelihood = 0.0f; } /** * Recreates the buffers. */ protected void resetBuffers() { createBuffers(); logLikelihood = 0.0f; } /** * Create buffers for all pools used by the trainer in this pool manager. */ protected void createBuffers() { // the option false or true refers to whether the buffer is in // log scale or not, true if it is. meansBufferPool = create1DPoolBuffer(meansPool, false); varianceBufferPool = create1DPoolBuffer(variancePool, false); matrixBufferPool = create2DPoolBuffer(matrixPool, true); mixtureWeightsBufferPool = create1DPoolBuffer(mixtureWeightsPool, true); } /** * Create buffers for a given pool. */ private Pool create1DPoolBuffer(Pool pool, boolean isLog) { Pool bufferPool = new Pool(pool.getName()); for (int i = 0; i < pool.size(); i++) { float[] element = (float [])pool.get(i); indexMap.put(element, new Integer(i)); Buffer buffer = new Buffer(element.length, isLog, i); bufferPool.put(i, buffer); } return bufferPool; } /** * Create buffers for a given pool. */ private Pool create2DPoolBuffer(Pool pool, boolean isLog) { Pool bufferPool = new Pool(pool.getName()); for (int i = 0; i < pool.size(); i++) { float[][] element = (float [][])pool.get(i); indexMap.put(element, new Integer(i)); int poolSize = element.length; Buffer[] bufferArray = new Buffer[poolSize]; for (int j = 0; j < poolSize; j++) { bufferArray[j] = new Buffer(element[j].length, isLog, j); } bufferPool.put(i, bufferArray); } return bufferPool; } /** * Accumulate the TrainerScore into the buffers. * * @param index the current index into the TrainerScore vector * @param score the TrainerScore */ protected void accumulate(int index, TrainerScore[] score) { accumulate(index, score, null); } /** * Accumulate the TrainerScore into the buffers. * * @param index the current index into the TrainerScore vector * @param score the TrainerScore for the current frame * @param nextScore the TrainerScore for the next time frame */ protected void accumulate(int index, TrainerScore[] score, TrainerScore[] nextScore) { int senoneID; TrainerScore thisScore = score[index]; Data feature = thisScore.getData(); // We should be doing this just once per utterance... // currentLogLikelihood = thisScore.getLogLikelihood(); // Since we're scaling, the loglikelihood disappears... currentLogLikelihood = 0; // And the total becomes the sum of (-) scaling factors logLikelihood -= score[0].getScalingFactor(); SenoneHMMState state = (SenoneHMMState) thisScore.getState(); if (state == null) { // We only care about the case "all models" senoneID = thisScore.getSenoneID(); if (senoneID == TrainerAcousticModel.ALL_MODELS) { accumulateMean(senoneID, score[index]); accumulateVariance(senoneID, score[index]); accumulateMixture(senoneID, score[index]); accumulateTransition(senoneID, index, score, nextScore); } } else { // If state is non-emitting, we presume there's only one // transition out of it. Therefore, we only accumulate // data for emitting states. if (state.isEmitting()) { senoneID = senonePool.indexOf(state.getSenone()); // accumulateMean(senoneID, score[index]); // accumulateVariance(senoneID, score[index]); accumulateMixture(senoneID, score[index]); accumulateTransition(senoneID, index, score, nextScore); } } } /** * Accumulate the means. */ private void accumulateMean(int senone, TrainerScore score) { if (senone == TrainerAcousticModel.ALL_MODELS) { for (int i = 0; i < senonePool.size(); i++) { accumulateMean(i, score); } } else { GaussianMixture gaussian = (GaussianMixture) senonePool.get(senone); MixtureComponent[] mix = gaussian.getMixtureComponents(); for (int i = 0; i < mix.length; i++) { float[] mean = mix[i].getMean(); // int indexMean = meansPool.indexOf(mean); Integer indexInMap = (Integer) indexMap.get(mean); int indexMean = indexInMap.intValue(); assert indexMean >= 0; assert indexMean == senone; Buffer buffer = (Buffer) meansBufferPool.get(indexMean); float[] feature = ((FloatData) score.getData()).getValues(); double[] data = new double[feature.length]; float prob = score.getComponentGamma()[i]; prob -= currentLogLikelihood; double dprob = logMath.logToLinear(prob); // prob = (float) logMath.logToLinear(prob); for (int j = 0; j < data.length; j++) { data[j] = feature[j] * dprob; } buffer.accumulate(data, dprob); } } } /** * Accumulate the variance. */ private void accumulateVariance(int senone, TrainerScore score) { if (senone == TrainerAcousticModel.ALL_MODELS) { for (int i = 0; i < senonePool.size(); i++) { accumulateVariance(i, score); } } else { GaussianMixture gaussian = (GaussianMixture) senonePool.get(senone); MixtureComponent[] mix = gaussian.getMixtureComponents(); for (int i = 0; i < mix.length; i++) { float[] mean = mix[i].getMean(); float[] variance = mix[i].getVariance(); // int indexVariance = variancePool.indexOf(variance); Integer indexInMap = (Integer) indexMap.get(variance); int indexVariance = indexInMap.intValue(); Buffer buffer = (Buffer) varianceBufferPool.get(indexVariance); float[] feature = ((FloatData) score.getData()).getValues(); double[] data = new double[feature.length]; float prob = score.getComponentGamma()[i]; prob -= currentLogLikelihood; double dprob = logMath.logToLinear(prob); for (int j = 0; j < data.length; j++) { data[j] = (feature[j] - mean[j]); data[j] *= data[j] * dprob; } buffer.accumulate(data, dprob); } } } /** * Accumulate the mixture weights. */ private void accumulateMixture(int senone, TrainerScore score) { // The index into the senone pool and the mixture weight pool // is the same if (senone == TrainerAcousticModel.ALL_MODELS) { for (int i = 0; i < senonePool.size(); i++) { accumulateMixture(i, score); } } else { Buffer buffer = (Buffer) mixtureWeightsBufferPool.get(senone); float[] mixw = (float [])mixtureWeightsPool.get(senone); for (int i = 0; i < mixw.length; i++) { float prob = score.getComponentGamma()[i]; prob -= currentLogLikelihood; buffer.logAccumulate(prob, i, logMath); } } } /** * Accumulate transitions from a given state. * * @param indexScore the current index into the TrainerScore * @param score the score information * @param nextScore the score information for the next frame */ private void accumulateStateTransition(int indexScore, TrainerScore[] score, TrainerScore[] nextScore) { HMMState state = score[indexScore].getState(); if (state == null) { // Non-emitting state
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -