largetrigrammodel.java
来自「It is the Speech recognition software. 」· Java 代码 · 共 1,208 行 · 第 1/3 页
JAVA
1,208 行
/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */package edu.cmu.sphinx.linguist.language.ngram.large;import java.io.FileOutputStream;import java.io.IOException;import java.io.PrintWriter;import java.util.Arrays;import java.util.Collections;import java.util.HashMap;import java.util.HashSet;import java.util.LinkedHashMap;import java.util.Map;import java.util.Set;import java.util.logging.Logger;import java.util.logging.Level;import java.io.DataOutputStream;import java.io.DataInputStream;import java.io.FileInputStream;import java.io.File;import edu.cmu.sphinx.linguist.WordSequence;import edu.cmu.sphinx.linguist.dictionary.Dictionary;import edu.cmu.sphinx.linguist.dictionary.Word;import edu.cmu.sphinx.linguist.language.ngram.LanguageModel;import edu.cmu.sphinx.linguist.language.ngram.large.BigramBuffer;import edu.cmu.sphinx.linguist.language.ngram.large.BigramProbability;import edu.cmu.sphinx.linguist.language.ngram.large.TrigramBuffer;import edu.cmu.sphinx.util.LogMath;import edu.cmu.sphinx.util.Timer;import edu.cmu.sphinx.util.props.PropertyException;import edu.cmu.sphinx.util.props.PropertySheet;import edu.cmu.sphinx.util.props.PropertyType;import edu.cmu.sphinx.util.props.Registry;/** * Queries a binary language model file generated by the * <a href="http://www.speech.cs.cmu.edu/SLM_info.html"> * CMU-Cambridge Statistical Language Modelling Toolkit</a>. * * Note that all probabilites in the grammar are stored in LogMath log base * format. Language Probabilties in the language model file are stored in log * 10 base. They are converted to the LogMath logbase. */public class LargeTrigramModel implements LanguageModel { /** * Sphinx property for the name of the file that logs all the queried * N-grams. If this property is set to null, it means that the queried * N-grams are not logged. */ public static final String PROP_QUERY_LOG_FILE = "queryLogFile"; /** * The default value for PROP_QUERY_LOG_FILE. */ public static final String PROP_QUERY_LOG_FILE_DEFAULT = null; /** * A sphinx property that defines that maxium number of trigrams to be * cached */ public static final String PROP_TRIGRAM_CACHE_SIZE = "trigramCacheSize"; /** * The default value for the PROP_TRIGRAM_CACHE_SIZE property */ public static final int PROP_TRIGRAM_CACHE_SIZE_DEFAULT = 100000; /** * A sphinx property that defines the maximum number of bigrams to be * cached. */ public static final String PROP_BIGRAM_CACHE_SIZE = "bigramCacheSize"; /** * The default value for the PROP_BIGRAM_CACHE_SIZE property */ public static final int PROP_BIGRAM_CACHE_SIZE_DEFAULT = 50000; /** * A sphinx property that controls whether the bigram and trigram caches * are cleared after every utterance */ public static final String PROP_CLEAR_CACHES_AFTER_UTTERANCE = "clearCachesAfterUtterance"; /** * The default value for the PROP_CLEAR_CACHES_AFTER_UTTERANCE property */ public static final boolean PROP_CLEAR_CACHES_AFTER_UTTERANCE_DEFAULT = false; /** * Sphinx property that defines the language weight for the search */ public final static String PROP_LANGUAGE_WEIGHT = "languageWeight"; /** * The default value for the PROP_LANGUAGE_WEIGHT property */ public final static float PROP_LANGUAGE_WEIGHT_DEFAULT = 1.0f; /** * Sphinx property that defines the logMath component. */ public final static String PROP_LOG_MATH = "logMath"; /** * Sphinx propert that controls whether or not the language model will * apply the language weight and word insertion probability */ public final static String PROP_APPLY_LANGUAGE_WEIGHT_AND_WIP = "applyLanguageWeightAndWip"; /** * The default value for PROP_APPLY_LANGUAGE_WEIGHT_AND_WIP */ public final static boolean PROP_APPLY_LANGUAGE_WEIGHT_AND_WIP_DEFAULT = false; /** * Word insertion probability property */ public final static String PROP_WORD_INSERTION_PROBABILITY = "wordInsertionProbability"; /** * The default value for PROP_WORD_INSERTION_PROBABILITY */ public final static double PROP_WORD_INSERTION_PROBABILITY_DEFAULT = 1.0; /** * If true, use full bigram information to determine smear */ public final static String PROP_FULL_SMEAR = "fullSmear"; /** * Default value for PROP_FULL_SMEAR */ public final static boolean PROP_FULL_SMEAR_DEFAULT = false; /** * The number of bytes per bigram in the LM file generated by the * CMU-Cambridge Statistical Language Modelling Toolkit. */ public static final int BYTES_PER_BIGRAM = 8; /** * The number of bytes per trigram in the LM file generated by the * CMU-Cambridge Statistical Language Modelling Toolkit. */ public static final int BYTES_PER_TRIGRAM = 4; private final static int SMEAR_MAGIC = 0xC0CAC01A; // things go better // ------------------------------ // Configuration data // ------------------------------ private Logger logger; private LogMath logMath; private String name; private String ngramLogFile; private int maxTrigramCacheSize; private int maxBigramCacheSize; private boolean clearCacheAfterUtterance; private boolean fullSmear; private int maxDepth; private Dictionary dictionary; private String format; private File location; private boolean applyLanguageWeightAndWip; private float languageWeight; private double wip; private float unigramWeight; // ------------------------------- // Statistics // ------------------------------- private int bigramMisses; private int trigramMisses; private int trigramHit; private int smearTermCount = 0; // ------------------------------- // subcomponents // -------------------------------- private BinaryLoader loader; private PrintWriter logFile; // ------------------------------- // Working data // -------------------------------- private Map unigramIDMap; private Map loadedTrigramBuffer; private LRUCache trigramCache; private LRUCache bigramCache; private Map bigramSmearMap; private BigramBuffer[] loadedBigramBuffers; private UnigramProbability[] unigrams; private int[] trigramSegmentTable; private float[] bigramProbTable; private float[] trigramProbTable; private float[] trigramBackoffTable; private float[] unigramSmearTerm; /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#register(java.lang.String, * edu.cmu.sphinx.util.props.Registry) */ public void register(String name, Registry registry) throws PropertyException { this.name = name; registry.register(PROP_FORMAT, PropertyType.STRING); registry.register(PROP_LOCATION, PropertyType.STRING); registry.register(PROP_QUERY_LOG_FILE, PropertyType.STRING); registry.register(PROP_TRIGRAM_CACHE_SIZE, PropertyType.INT); registry.register(PROP_BIGRAM_CACHE_SIZE, PropertyType.INT); registry.register(PROP_CLEAR_CACHES_AFTER_UTTERANCE, PropertyType.BOOLEAN); registry.register(PROP_MAX_DEPTH, PropertyType.INT); registry.register(PROP_LOG_MATH, PropertyType.COMPONENT); registry.register(PROP_DICTIONARY, PropertyType.COMPONENT); registry.register(PROP_APPLY_LANGUAGE_WEIGHT_AND_WIP, PropertyType.BOOLEAN); registry.register(PROP_LANGUAGE_WEIGHT, PropertyType.FLOAT); registry.register(PROP_WORD_INSERTION_PROBABILITY, PropertyType.DOUBLE); registry.register(PROP_UNIGRAM_WEIGHT, PropertyType.FLOAT); registry.register(PROP_FULL_SMEAR, PropertyType.BOOLEAN); } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet) */ public void newProperties(PropertySheet ps) throws PropertyException { logger = ps.getLogger(); format = ps.getString(LanguageModel.PROP_FORMAT, LanguageModel.PROP_FORMAT_DEFAULT); location = new File(ps.getString(PROP_LOCATION, PROP_LOCATION_DEFAULT)); ngramLogFile = ps.getString(PROP_QUERY_LOG_FILE, PROP_QUERY_LOG_FILE_DEFAULT); maxTrigramCacheSize = ps.getInt(PROP_TRIGRAM_CACHE_SIZE, PROP_TRIGRAM_CACHE_SIZE_DEFAULT); maxBigramCacheSize = ps.getInt(PROP_BIGRAM_CACHE_SIZE, PROP_BIGRAM_CACHE_SIZE_DEFAULT); clearCacheAfterUtterance = ps.getBoolean( PROP_CLEAR_CACHES_AFTER_UTTERANCE, PROP_CLEAR_CACHES_AFTER_UTTERANCE_DEFAULT); maxDepth = ps.getInt(LanguageModel.PROP_MAX_DEPTH, LanguageModel.PROP_MAX_DEPTH_DEFAULT); logMath = (LogMath) ps.getComponent(PROP_LOG_MATH, LogMath.class); dictionary = (Dictionary) ps.getComponent(PROP_DICTIONARY, Dictionary.class); applyLanguageWeightAndWip = ps.getBoolean( PROP_APPLY_LANGUAGE_WEIGHT_AND_WIP, PROP_APPLY_LANGUAGE_WEIGHT_AND_WIP_DEFAULT); languageWeight = ps.getFloat(PROP_LANGUAGE_WEIGHT, PROP_LANGUAGE_WEIGHT_DEFAULT); wip = ps.getDouble(PROP_WORD_INSERTION_PROBABILITY, PROP_WORD_INSERTION_PROBABILITY_DEFAULT); unigramWeight = ps.getFloat(PROP_UNIGRAM_WEIGHT, PROP_UNIGRAM_WEIGHT_DEFAULT); fullSmear = ps.getBoolean(PROP_FULL_SMEAR, PROP_FULL_SMEAR_DEFAULT); } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#getName() */ public String getName() { return name; } /* * (non-Javadoc) * * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#allocate() */ public void allocate() throws IOException { Timer.start("LM Load"); // create the log file if specified if (ngramLogFile != null) { logFile = new PrintWriter(new FileOutputStream(ngramLogFile)); } unigramIDMap = new HashMap(); loadedTrigramBuffer = new HashMap(); trigramCache = new LRUCache(maxTrigramCacheSize); bigramCache = new LRUCache(maxBigramCacheSize); loader = new BinaryLoader(format, location, applyLanguageWeightAndWip, logMath, languageWeight, wip, unigramWeight); unigrams = loader.getUnigrams(); bigramProbTable = loader.getBigramProbabilities(); trigramProbTable = loader.getTrigramProbabilities(); trigramBackoffTable = loader.getTrigramBackoffWeights(); trigramSegmentTable = loader.getTrigramSegments(); buildUnigramIDMap(dictionary); loadedBigramBuffers = new BigramBuffer[unigrams.length]; if (maxDepth == LanguageModel.PROP_MAX_DEPTH_DEFAULT) { maxDepth = loader.getMaxDepth(); } else if (maxDepth == 0) { throw new Error("Invalid LM max-depth: " + maxDepth); } logger.info("Unigrams: " + loader.getNumberUnigrams()); logger.info("Bigrams: " + loader.getNumberBigrams()); logger.info("Trigrams: " + loader.getNumberTrigrams()); if (fullSmear) { System.out.println("Full Smear"); try { System.out.println("... Reading ..."); readSmearInfo("smear.dat"); System.out.println("... Done "); } catch (IOException e) { System.out.println("... " + e); System.out.println("... Calculating"); buildSmearInfo(); System.out.println("... Writing"); // writeSmearInfo("smear.dat"); System.out.println("... Done"); } } Timer.stop("LM Load"); } /* * (non-Javadoc) * * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#deallocate() */ public void deallocate() { // TODO write me } /** * Builds the map from unigram to unigramID. Also finds the startWordID and * endWordID. */ private void buildUnigramIDMap(Dictionary dictionary) { int missingWords = 0; String[] words = loader.getWords(); for (int i = 0; i < words.length; i++) { Word word = dictionary.getWord(words[i]); if (word == null) { logger.info("Missing word: " + words[i]); missingWords++; } unigramIDMap.put(word, unigrams[i]); if (logger.isLoggable(Level.FINE)) { logger.fine("Word: " + word); } } if (missingWords > 0) { logger.warning("Dictionary is missing " + missingWords + " words that are contained in the language model."); } } /** * Called before a recognition */ public void start() { if (logFile != null) { logFile.println("<START_UTT>"); } } /** * Called after a recognition */ public void stop() { clearCache(); if (logFile != null) { logFile.println("<END_UTT>"); logFile.flush(); } } /** * Clears the various N-gram caches. */ private void clearCache() { for (int i = 0; i < loadedBigramBuffers.length; i++) { BigramBuffer buffer = loadedBigramBuffers[i]; if (buffer != null) { if (!buffer.getUsed()) { loadedBigramBuffers[i] = null; // free the BigramBuffer } else { buffer.setUsed(false);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?