simplengrammodel.java

来自「It is the Speech recognition software. 」· Java 代码 · 共 524 行 · 第 1/2 页

JAVA
524
字号
/* * Copyright 1999-2002 Carnegie Mellon University.   * Portions Copyright 2002 Sun Microsystems, Inc.   * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved.  Use is subject to license terms. *  * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL  * WARRANTIES. * */package edu.cmu.sphinx.linguist.language.ngram;import java.io.BufferedReader;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.InputStreamReader;import java.io.IOException;import java.net.URL;import java.util.ArrayList;import java.util.Collections;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Set;import java.util.StringTokenizer;import edu.cmu.sphinx.linguist.WordSequence;import edu.cmu.sphinx.linguist.dictionary.Dictionary;import edu.cmu.sphinx.linguist.dictionary.Word;import edu.cmu.sphinx.util.LogMath;import edu.cmu.sphinx.util.props.PropertyException;import edu.cmu.sphinx.util.props.PropertySheet;import edu.cmu.sphinx.util.props.PropertyType;import edu.cmu.sphinx.util.props.Registry;/** * An ascii ARPA language model loader. This loader makes no attempt to * optimize storage, so it can only load very small language models * <p> * Note that all probabilites in the grammar are stored in LogMath log base * format. Language Probabilties in the language model file are stored in log * 10 base. */public class SimpleNGramModel implements LanguageModel {    /**     * Sphinx property that defines the logMath component.     */    public final static String PROP_LOG_MATH = "logMath";    // ----------------------------    // Configuration data    // ----------------------------    private String name;    private LogMath logMath;    private String format;    private URL urlLocation;    private float unigramWeight;    private Dictionary dictionary;    private int desiredMaxDepth;    private int maxNGram = 0;    private Map map;    private Set vocabulary;    private int lineNumber;    private BufferedReader reader;    private String fileName;    private boolean allocated = false;    /*     * (non-Javadoc)     *      * @see edu.cmu.sphinx.util.props.Configurable#register(java.lang.String,     *      edu.cmu.sphinx.util.props.Registry)     */    public void register(String name, Registry registry)            throws PropertyException {        this.name = name;        registry.register(PROP_FORMAT, PropertyType.STRING);        registry.register(PROP_LOCATION, PropertyType.RESOURCE);        registry.register(PROP_UNIGRAM_WEIGHT, PropertyType.FLOAT);        registry.register(PROP_LOG_MATH, PropertyType.COMPONENT);        registry.register(PROP_MAX_DEPTH, PropertyType.INT);        registry.register(PROP_DICTIONARY, PropertyType.COMPONENT);    }    /*     * (non-Javadoc)     *      * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)     */    public void newProperties(PropertySheet ps) throws PropertyException {        if (allocated) {            throw new PropertyException(this, null,                     "Can't change properties after allocation");        }        format = ps.getString(PROP_FORMAT, PROP_FORMAT_DEFAULT);        urlLocation = ps.getResource(PROP_LOCATION);        unigramWeight = ps.getFloat(PROP_UNIGRAM_WEIGHT,                PROP_UNIGRAM_WEIGHT_DEFAULT);        logMath = (LogMath) ps.getComponent(PROP_LOG_MATH, LogMath.class);        desiredMaxDepth = ps.getInt(PROP_MAX_DEPTH, PROP_MAX_DEPTH_DEFAULT);        dictionary = (Dictionary) ps.getComponent(PROP_DICTIONARY,                Dictionary.class);        map = new HashMap();        vocabulary = new HashSet();    }    /*     * (non-Javadoc)     *      * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#allocate()     */    public void allocate() throws IOException {        allocated = true;        load(format, urlLocation, unigramWeight, dictionary);        if (desiredMaxDepth > 0) {            if (desiredMaxDepth < maxNGram) {                maxNGram = desiredMaxDepth;            }        }    }    /*     * (non-Javadoc)     *      * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#deallocate()     */    public void deallocate() {        allocated = false;    }    /*     * (non-Javadoc)     *      * @see edu.cmu.sphinx.util.props.Configurable#getName()     */    public String getName() {        return name;    }    /**     * Called before a recognition     */    public void start() {    }    /**     * Called after a recognition     */    public void stop() {    }    /**     * Gets the ngram probability of the word sequence represented by the word     * list     *      * @param wordSequence     *                the word sequence     *      * @return the probability of the word sequence. Probability is in logMath     *         log base     *       */    public float getProbability(WordSequence wordSequence) {        float logProbability = 0.0f;        Probability prob = getProb(wordSequence);        if (prob == null) {            if (wordSequence.size() > 1) {                logProbability = getBackoff(wordSequence.getOldest())                        + getProbability(wordSequence.getNewest());            } else { // if the single word is not in the model at all                // then its zero likelihood that we'll use it                logProbability = LogMath.getLogZero();            }        } else {            logProbability = prob.logProbability;        }        if (false) {            System.out.println("Search: " + wordSequence + " : "                    + logProbability + " "                    + logMath.logToLinear(logProbability));        }        return logProbability;    }    /**     * Gets the smear term for the given wordSequence     *      * @param wordSequence     *                the word sequence     * @return the smear term associated with this word sequence     */    public float getSmear(WordSequence wordSequence) {        return 0.0f; // TODO not implememted    }    /**     * Returns the backoff probability for the give sequence of words     *      * @param wordSequence     *                the sequence of words     *      * @return the backoff probability in LogMath log base     */    public float getBackoff(WordSequence wordSequence) {        float logBackoff = 0.0f; // log of 1.0        Probability prob = getProb(wordSequence);        if (prob != null) {            logBackoff = prob.logBackoff;        }        return logBackoff;    }    /**     * Returns the maximum depth of the language model     *      * @return the maximum depth of the language mdoel     */    public int getMaxDepth() {        return maxNGram;    }    /**     * Returns the set of words in the lanaguage model. The set is     * unmodifiable.     *      * @return the unmodifiable set of words     */    public Set getVocabulary() {        return Collections.unmodifiableSet(vocabulary);    }    /**     * Gets the probability entry for the given word sequence or null if there     * is no entry     *      * @param wordSequence     *                a word sequence     *      * @return the probability entry for the wordlist or null     */    private Probability getProb(WordSequence wordSequence) {        return (Probability) map.get(wordSequence);    }    /**     * Converts a wordList to a string     *      * @param wordList     *                the wordList     *      * @return the string     */    private String listToString(List wordList) {        StringBuffer sb = new StringBuffer();        for (Iterator i = wordList.iterator(); i.hasNext();) {            sb.append(i.next().toString());            sb.append(" ");        }        return sb.toString();

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?