grammar.java

来自「It is the Speech recognition software. 」· Java 代码 · 共 612 行 · 第 1/2 页

JAVA
612
字号
/* * Copyright 1999-2002 Carnegie Mellon University.   * Portions Copyright 2002 Sun Microsystems, Inc.   * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved.  Use is subject to license terms. *  * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL  * WARRANTIES. * */package edu.cmu.sphinx.linguist.language.grammar;import java.io.FileOutputStream;import java.io.IOException;import java.io.PrintWriter;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.ArrayList;import java.util.Collections;import java.util.Random;import java.util.Set;import java.util.logging.Level;import java.util.logging.Logger;import edu.cmu.sphinx.linguist.dictionary.Dictionary;import edu.cmu.sphinx.linguist.dictionary.Word;import edu.cmu.sphinx.util.props.Configurable;import edu.cmu.sphinx.util.props.PropertyException;import edu.cmu.sphinx.util.props.PropertySheet;import edu.cmu.sphinx.util.props.PropertyType;import edu.cmu.sphinx.util.props.Registry;import edu.cmu.sphinx.util.Timer;/** * Classes that implement this interface create grammars. A grammar * is represented internally as a graph of {@link GrammarNode GrammarNodes} * linked together by {@link GrammarArc GrammarArcs}. Calling * {@link #getInitialNode() getInitialNode} will return the first node * of the grammar graph. To traverse the grammar graph, one should * call GrammarNode.getSuccessors, which will return an array of GrammarArcs, * from which you can reach the neighboring GrammarNodes. * <p> * Note that all grammar probabilities are maintained in LogMath log * domain. */public abstract class Grammar implements Configurable {    /**     * Property to control the the dumping of the grammar     */    public final static String PROP_SHOW_GRAMMAR = "showGrammar";    /**     * The default value for PROP_SHOW_GRAMMAR.     */    public final static boolean PROP_SHOW_GRAMMAR_DEFAULT = false;    /**     * Property to control whether grammars are optimized or not     */    public final static String PROP_OPTIMIZE_GRAMMAR = "optimizeGrammar";    /**     * The default value for PROP_OPTIMIZE_GRAMMAR     */    public final static boolean PROP_OPTIMIZE_GRAMMAR_DEFAULT = true;    /**     * Property to control whether silence words are inserted into the graph     */    public final static String PROP_ADD_SIL_WORDS = "addSilenceWords";    /**     * The default value for PROP_ADD_SIL_WORDS     */    public final static boolean PROP_ADD_SIL_WORDS_DEFAULT  = false;    /**     * Property to control whether filler words are inserted into the graph     */    public final static String PROP_ADD_FILLER_WORDS = "addFillerWords";    /**     * The default value for PROP_ADD_FILLER_WORDS     */    public final static boolean PROP_ADD_FILLER_WORDS_DEFAULT  = false;    /**     * Property that defines the dictionary to use for this grammar     */    public final static String PROP_DICTIONARY = "dictionary";    // ----------------------------    // Configuration data    // -----------------------------    private String name;    private Logger logger;    private boolean showGrammar;    private boolean optimizeGrammar = true;    private boolean addSilenceWords = false;    private boolean addFillerWords = false;    private Dictionary dictionary;    private GrammarNode initialNode;    private Set grammarNodes;    private final static Word[][] EMPTY_ALTERNATIVE = new Word[0][0];    private Random randomizer = new Random();    private int maxIdentity = 0;    private boolean postProcessed = false;    private boolean idCheck = false;    /*     * (non-Javadoc)     *      * @see edu.cmu.sphinx.util.props.Configurable#register(java.lang.String,     *      edu.cmu.sphinx.util.props.Registry)     */    public void register(String name, Registry registry)            throws PropertyException {        this.name = name;        registry.register(PROP_DICTIONARY, PropertyType.COMPONENT);        registry.register(PROP_SHOW_GRAMMAR, PropertyType.BOOLEAN);        registry.register(PROP_OPTIMIZE_GRAMMAR, PropertyType.BOOLEAN);        registry.register(PROP_ADD_SIL_WORDS, PropertyType.BOOLEAN);        registry.register(PROP_ADD_FILLER_WORDS, PropertyType.BOOLEAN);    }    /*     * (non-Javadoc)     *      * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)     */    public void newProperties(PropertySheet ps) throws PropertyException {        logger = ps.getLogger();        showGrammar = ps.getBoolean(PROP_SHOW_GRAMMAR,                PROP_SHOW_GRAMMAR_DEFAULT);        optimizeGrammar = ps.getBoolean(PROP_OPTIMIZE_GRAMMAR,                PROP_OPTIMIZE_GRAMMAR_DEFAULT);        addSilenceWords = ps.getBoolean(PROP_ADD_SIL_WORDS,                PROP_ADD_SIL_WORDS_DEFAULT);        addFillerWords = ps.getBoolean(PROP_ADD_FILLER_WORDS,                PROP_ADD_FILLER_WORDS_DEFAULT);        dictionary = (Dictionary) ps.getComponent(PROP_DICTIONARY,                Dictionary.class);    }    /*     * (non-Javadoc)     *      * @see edu.cmu.sphinx.util.props.Configurable#getName()     */    public String getName() {        return name;    }    /**     * Create the grammar     */    public void allocate() throws IOException {        dictionary.allocate();        newGrammar();        Timer timer = Timer.getTimer("grammarLoad");        timer.start();        initialNode = createGrammar();        postProcessGrammar();        timer.stop();    }    /**     * Deallocate resources allocated to this grammar     */    public void deallocate() {        initialNode = null;        grammarNodes = null;        dictionary.deallocate();    }    /**     * Returns the initial node for the grammar     *      * @return the initial grammar node     */    public GrammarNode getInitialNode() {        return initialNode;    }    /**     * Perform the standard set of grammar post processing. This can     * include inserting silence nodes and optimizing out empty nodes     */    protected void postProcessGrammar() {        if (!postProcessed) {            if (addFillerWords) {                addFillerWords();            } else if (addSilenceWords) {                addSilenceWords();            }            if (optimizeGrammar) {                optimizeGrammar();            }            dumpStatistics();            if (showGrammar) {                dumpGrammar("grammar.gdl");                dumpRandomSentences("sentences.txt", 100);                logger.info("Total number of nodes " + grammarNodes.size());            }            postProcessed = true;        }    }    /**     * Dumps statistics for this grammar     *       */    public void dumpStatistics() {        if (logger.isLoggable(Level.INFO)) {            int successorCount = 0;            logger.info("Num nodes : " + getNumNodes());            for (Iterator i = grammarNodes.iterator(); i.hasNext();) {                GrammarNode node = (GrammarNode) i.next();                successorCount += node.getSuccessors().length;            }            logger.info("Num arcs  : " + successorCount);            logger.info("Avg arcs  : "                    + ((float) successorCount / getNumNodes()));        }    }    /**     * Dump a set of random sentences that fit this grammar     *      * @param path     *                the name of the file to dump the sentences to     * @param count     *                dumps no more than this. May dump less than this     *                depending upon the number of uniqe sentences in the     *                grammar.     */    public void dumpRandomSentences(String path, int count) {        try {            Set set = new HashSet();            PrintWriter out = new PrintWriter(new FileOutputStream(path));            for (int i = 0; i < count; i++) {                String s = getRandomSentence();                if (!set.contains(s)) {                    set.add(s);                    out.println(s);                }            }            out.close();        } catch (IOException ioe) {            logger.severe("Can't write random sentences to " + path + " "                    + ioe);        }    }    /**     * Dump a set of random sentences that fit this grammar     *      * @param count     *                dumps no more than this. May dump less than this     *                depending upon the number of uniqe sentences in the     *                grammar.     */    public void dumpRandomSentences(int count) {        Set set = new HashSet();        for (int i = 0; i < count; i++) {            String s = getRandomSentence();            if (!set.contains(s)) {                set.add(s);            }        }        List sampleList = new ArrayList(set);        Collections.sort(sampleList);        for (Iterator i = sampleList.iterator(); i.hasNext(); ) {            System.out.println(i.next());        }    }    /**     * Returns a random sentence that fits this grammar     *      * @return a random sentence that fits this grammar     */    public String getRandomSentence() {        StringBuffer sb = new StringBuffer();        GrammarNode node = getInitialNode();        while (!node.isFinalNode()) {            if (!node.isEmpty()) {                Word word = node.getWord();                if (!word.isFiller()) {                    if (sb.length() > 0) {                        sb.append(" ");                    }                    sb.append(word.getSpelling());                }            }            node = selectRandomSuccessor(node);        }        return sb.toString();    }    /**     * Given a node, select a random successor from the set of possible

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?