simplengrammodel.java
来自「It is the Speech recognition software. 」· Java 代码 · 共 524 行 · 第 1/2 页
JAVA
524 行
/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */package edu.cmu.sphinx.linguist.language.ngram;import java.io.BufferedReader;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.InputStreamReader;import java.io.IOException;import java.net.URL;import java.util.ArrayList;import java.util.Collections;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Set;import java.util.StringTokenizer;import edu.cmu.sphinx.linguist.WordSequence;import edu.cmu.sphinx.linguist.dictionary.Dictionary;import edu.cmu.sphinx.linguist.dictionary.Word;import edu.cmu.sphinx.util.LogMath;import edu.cmu.sphinx.util.props.PropertyException;import edu.cmu.sphinx.util.props.PropertySheet;import edu.cmu.sphinx.util.props.PropertyType;import edu.cmu.sphinx.util.props.Registry;/** * An ascii ARPA language model loader. This loader makes no attempt to * optimize storage, so it can only load very small language models * <p> * Note that all probabilites in the grammar are stored in LogMath log base * format. Language Probabilties in the language model file are stored in log * 10 base. */public class SimpleNGramModel implements LanguageModel { /** * Sphinx property that defines the logMath component. */ public final static String PROP_LOG_MATH = "logMath"; // ---------------------------- // Configuration data // ---------------------------- private String name; private LogMath logMath; private String format; private URL urlLocation; private float unigramWeight; private Dictionary dictionary; private int desiredMaxDepth; private int maxNGram = 0; private Map map; private Set vocabulary; private int lineNumber; private BufferedReader reader; private String fileName; private boolean allocated = false; /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#register(java.lang.String, * edu.cmu.sphinx.util.props.Registry) */ public void register(String name, Registry registry) throws PropertyException { this.name = name; registry.register(PROP_FORMAT, PropertyType.STRING); registry.register(PROP_LOCATION, PropertyType.RESOURCE); registry.register(PROP_UNIGRAM_WEIGHT, PropertyType.FLOAT); registry.register(PROP_LOG_MATH, PropertyType.COMPONENT); registry.register(PROP_MAX_DEPTH, PropertyType.INT); registry.register(PROP_DICTIONARY, PropertyType.COMPONENT); } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet) */ public void newProperties(PropertySheet ps) throws PropertyException { if (allocated) { throw new PropertyException(this, null, "Can't change properties after allocation"); } format = ps.getString(PROP_FORMAT, PROP_FORMAT_DEFAULT); urlLocation = ps.getResource(PROP_LOCATION); unigramWeight = ps.getFloat(PROP_UNIGRAM_WEIGHT, PROP_UNIGRAM_WEIGHT_DEFAULT); logMath = (LogMath) ps.getComponent(PROP_LOG_MATH, LogMath.class); desiredMaxDepth = ps.getInt(PROP_MAX_DEPTH, PROP_MAX_DEPTH_DEFAULT); dictionary = (Dictionary) ps.getComponent(PROP_DICTIONARY, Dictionary.class); map = new HashMap(); vocabulary = new HashSet(); } /* * (non-Javadoc) * * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#allocate() */ public void allocate() throws IOException { allocated = true; load(format, urlLocation, unigramWeight, dictionary); if (desiredMaxDepth > 0) { if (desiredMaxDepth < maxNGram) { maxNGram = desiredMaxDepth; } } } /* * (non-Javadoc) * * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#deallocate() */ public void deallocate() { allocated = false; } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#getName() */ public String getName() { return name; } /** * Called before a recognition */ public void start() { } /** * Called after a recognition */ public void stop() { } /** * Gets the ngram probability of the word sequence represented by the word * list * * @param wordSequence * the word sequence * * @return the probability of the word sequence. Probability is in logMath * log base * */ public float getProbability(WordSequence wordSequence) { float logProbability = 0.0f; Probability prob = getProb(wordSequence); if (prob == null) { if (wordSequence.size() > 1) { logProbability = getBackoff(wordSequence.getOldest()) + getProbability(wordSequence.getNewest()); } else { // if the single word is not in the model at all // then its zero likelihood that we'll use it logProbability = LogMath.getLogZero(); } } else { logProbability = prob.logProbability; } if (false) { System.out.println("Search: " + wordSequence + " : " + logProbability + " " + logMath.logToLinear(logProbability)); } return logProbability; } /** * Gets the smear term for the given wordSequence * * @param wordSequence * the word sequence * @return the smear term associated with this word sequence */ public float getSmear(WordSequence wordSequence) { return 0.0f; // TODO not implememted } /** * Returns the backoff probability for the give sequence of words * * @param wordSequence * the sequence of words * * @return the backoff probability in LogMath log base */ public float getBackoff(WordSequence wordSequence) { float logBackoff = 0.0f; // log of 1.0 Probability prob = getProb(wordSequence); if (prob != null) { logBackoff = prob.logBackoff; } return logBackoff; } /** * Returns the maximum depth of the language model * * @return the maximum depth of the language mdoel */ public int getMaxDepth() { return maxNGram; } /** * Returns the set of words in the lanaguage model. The set is * unmodifiable. * * @return the unmodifiable set of words */ public Set getVocabulary() { return Collections.unmodifiableSet(vocabulary); } /** * Gets the probability entry for the given word sequence or null if there * is no entry * * @param wordSequence * a word sequence * * @return the probability entry for the wordlist or null */ private Probability getProb(WordSequence wordSequence) { return (Probability) map.get(wordSequence); } /** * Converts a wordList to a string * * @param wordList * the wordList * * @return the string */ private String listToString(List wordList) { StringBuffer sb = new StringBuffer(); for (Iterator i = wordList.iterator(); i.hasNext();) { sb.append(i.next().toString()); sb.append(" "); } return sb.toString();
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?