grammar.java
来自「It is the Speech recognition software. 」· Java 代码 · 共 612 行 · 第 1/2 页
JAVA
612 行
/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */package edu.cmu.sphinx.linguist.language.grammar;import java.io.FileOutputStream;import java.io.IOException;import java.io.PrintWriter;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.ArrayList;import java.util.Collections;import java.util.Random;import java.util.Set;import java.util.logging.Level;import java.util.logging.Logger;import edu.cmu.sphinx.linguist.dictionary.Dictionary;import edu.cmu.sphinx.linguist.dictionary.Word;import edu.cmu.sphinx.util.props.Configurable;import edu.cmu.sphinx.util.props.PropertyException;import edu.cmu.sphinx.util.props.PropertySheet;import edu.cmu.sphinx.util.props.PropertyType;import edu.cmu.sphinx.util.props.Registry;import edu.cmu.sphinx.util.Timer;/** * Classes that implement this interface create grammars. A grammar * is represented internally as a graph of {@link GrammarNode GrammarNodes} * linked together by {@link GrammarArc GrammarArcs}. Calling * {@link #getInitialNode() getInitialNode} will return the first node * of the grammar graph. To traverse the grammar graph, one should * call GrammarNode.getSuccessors, which will return an array of GrammarArcs, * from which you can reach the neighboring GrammarNodes. * <p> * Note that all grammar probabilities are maintained in LogMath log * domain. */public abstract class Grammar implements Configurable { /** * Property to control the the dumping of the grammar */ public final static String PROP_SHOW_GRAMMAR = "showGrammar"; /** * The default value for PROP_SHOW_GRAMMAR. */ public final static boolean PROP_SHOW_GRAMMAR_DEFAULT = false; /** * Property to control whether grammars are optimized or not */ public final static String PROP_OPTIMIZE_GRAMMAR = "optimizeGrammar"; /** * The default value for PROP_OPTIMIZE_GRAMMAR */ public final static boolean PROP_OPTIMIZE_GRAMMAR_DEFAULT = true; /** * Property to control whether silence words are inserted into the graph */ public final static String PROP_ADD_SIL_WORDS = "addSilenceWords"; /** * The default value for PROP_ADD_SIL_WORDS */ public final static boolean PROP_ADD_SIL_WORDS_DEFAULT = false; /** * Property to control whether filler words are inserted into the graph */ public final static String PROP_ADD_FILLER_WORDS = "addFillerWords"; /** * The default value for PROP_ADD_FILLER_WORDS */ public final static boolean PROP_ADD_FILLER_WORDS_DEFAULT = false; /** * Property that defines the dictionary to use for this grammar */ public final static String PROP_DICTIONARY = "dictionary"; // ---------------------------- // Configuration data // ----------------------------- private String name; private Logger logger; private boolean showGrammar; private boolean optimizeGrammar = true; private boolean addSilenceWords = false; private boolean addFillerWords = false; private Dictionary dictionary; private GrammarNode initialNode; private Set grammarNodes; private final static Word[][] EMPTY_ALTERNATIVE = new Word[0][0]; private Random randomizer = new Random(); private int maxIdentity = 0; private boolean postProcessed = false; private boolean idCheck = false; /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#register(java.lang.String, * edu.cmu.sphinx.util.props.Registry) */ public void register(String name, Registry registry) throws PropertyException { this.name = name; registry.register(PROP_DICTIONARY, PropertyType.COMPONENT); registry.register(PROP_SHOW_GRAMMAR, PropertyType.BOOLEAN); registry.register(PROP_OPTIMIZE_GRAMMAR, PropertyType.BOOLEAN); registry.register(PROP_ADD_SIL_WORDS, PropertyType.BOOLEAN); registry.register(PROP_ADD_FILLER_WORDS, PropertyType.BOOLEAN); } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet) */ public void newProperties(PropertySheet ps) throws PropertyException { logger = ps.getLogger(); showGrammar = ps.getBoolean(PROP_SHOW_GRAMMAR, PROP_SHOW_GRAMMAR_DEFAULT); optimizeGrammar = ps.getBoolean(PROP_OPTIMIZE_GRAMMAR, PROP_OPTIMIZE_GRAMMAR_DEFAULT); addSilenceWords = ps.getBoolean(PROP_ADD_SIL_WORDS, PROP_ADD_SIL_WORDS_DEFAULT); addFillerWords = ps.getBoolean(PROP_ADD_FILLER_WORDS, PROP_ADD_FILLER_WORDS_DEFAULT); dictionary = (Dictionary) ps.getComponent(PROP_DICTIONARY, Dictionary.class); } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#getName() */ public String getName() { return name; } /** * Create the grammar */ public void allocate() throws IOException { dictionary.allocate(); newGrammar(); Timer timer = Timer.getTimer("grammarLoad"); timer.start(); initialNode = createGrammar(); postProcessGrammar(); timer.stop(); } /** * Deallocate resources allocated to this grammar */ public void deallocate() { initialNode = null; grammarNodes = null; dictionary.deallocate(); } /** * Returns the initial node for the grammar * * @return the initial grammar node */ public GrammarNode getInitialNode() { return initialNode; } /** * Perform the standard set of grammar post processing. This can * include inserting silence nodes and optimizing out empty nodes */ protected void postProcessGrammar() { if (!postProcessed) { if (addFillerWords) { addFillerWords(); } else if (addSilenceWords) { addSilenceWords(); } if (optimizeGrammar) { optimizeGrammar(); } dumpStatistics(); if (showGrammar) { dumpGrammar("grammar.gdl"); dumpRandomSentences("sentences.txt", 100); logger.info("Total number of nodes " + grammarNodes.size()); } postProcessed = true; } } /** * Dumps statistics for this grammar * */ public void dumpStatistics() { if (logger.isLoggable(Level.INFO)) { int successorCount = 0; logger.info("Num nodes : " + getNumNodes()); for (Iterator i = grammarNodes.iterator(); i.hasNext();) { GrammarNode node = (GrammarNode) i.next(); successorCount += node.getSuccessors().length; } logger.info("Num arcs : " + successorCount); logger.info("Avg arcs : " + ((float) successorCount / getNumNodes())); } } /** * Dump a set of random sentences that fit this grammar * * @param path * the name of the file to dump the sentences to * @param count * dumps no more than this. May dump less than this * depending upon the number of uniqe sentences in the * grammar. */ public void dumpRandomSentences(String path, int count) { try { Set set = new HashSet(); PrintWriter out = new PrintWriter(new FileOutputStream(path)); for (int i = 0; i < count; i++) { String s = getRandomSentence(); if (!set.contains(s)) { set.add(s); out.println(s); } } out.close(); } catch (IOException ioe) { logger.severe("Can't write random sentences to " + path + " " + ioe); } } /** * Dump a set of random sentences that fit this grammar * * @param count * dumps no more than this. May dump less than this * depending upon the number of uniqe sentences in the * grammar. */ public void dumpRandomSentences(int count) { Set set = new HashSet(); for (int i = 0; i < count; i++) { String s = getRandomSentence(); if (!set.contains(s)) { set.add(s); } } List sampleList = new ArrayList(set); Collections.sort(sampleList); for (Iterator i = sampleList.iterator(); i.hasNext(); ) { System.out.println(i.next()); } } /** * Returns a random sentence that fits this grammar * * @return a random sentence that fits this grammar */ public String getRandomSentence() { StringBuffer sb = new StringBuffer(); GrammarNode node = getInitialNode(); while (!node.isFinalNode()) { if (!node.isEmpty()) { Word word = node.getWord(); if (!word.isFiller()) { if (sb.length() > 0) { sb.append(" "); } sb.append(word.getSpelling()); } } node = selectRandomSuccessor(node); } return sb.toString(); } /** * Given a node, select a random successor from the set of possible
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?