📄 speechmarker.java

📁 It is the Speech recognition software. It is platform independent. To execute the source code,
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * Copyright 1999-2002 Carnegie Mellon University.   * Portions Copyright 2002 Sun Microsystems, Inc.   * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved.  Use is subject to license terms. *  * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL  * WARRANTIES. * */package edu.cmu.sphinx.frontend.endpoint;import java.util.ArrayList;import java.util.List;import java.util.ListIterator;import edu.cmu.sphinx.frontend.BaseDataProcessor;import edu.cmu.sphinx.frontend.Data;import edu.cmu.sphinx.frontend.DataEndSignal;import edu.cmu.sphinx.frontend.DataProcessingException;import edu.cmu.sphinx.frontend.DataStartSignal;import edu.cmu.sphinx.frontend.Signal;import edu.cmu.sphinx.frontend.endpoint.SpeechEndSignal;import edu.cmu.sphinx.frontend.endpoint.SpeechStartSignal;import edu.cmu.sphinx.util.props.PropertyException;import edu.cmu.sphinx.util.props.PropertySheet;import edu.cmu.sphinx.util.props.PropertyType;import edu.cmu.sphinx.util.props.Registry;/** * Converts a stream of SpeechClassifiedData objects, marked as  * speech and non-speech, and mark out the regions that are considered speech. * This is done by inserting SPEECH_START and SPEECH_END signals  * into the stream. * * <p>The algorithm for inserting the two signals is as follows. * * <p>The algorithm is always in one of two states: 'in-speech' and  * 'out-of-speech'. If 'out-of-speech', it will read in audio until  * we hit audio that is speech. If we have read more than 'startSpeech' * amount of <i>continuous</i> speech, we consider that speech has started,  * and insert a SPEECH_START at 'speechLeader' time before speech  * first started. The state of the algorithm changes to 'in-speech'. * * <p>Now consider the case when the algorithm is in 'in-speech' state. * If it read an audio that is speech, it is outputted. If the audio is * non-speech, we read ahead until we have 'endSilence' amount of  * <i>continuous</i> non-speech. At the point we consider that speech  * has ended. A SPEECH_END signal is inserted at 'speechTrailer' time  * after the first non-speech audio. The algorithm returns to  * 'ou-of-speech' state. If any speech audio is encountered in-between, * the accounting starts all over again. */public class SpeechMarker extends BaseDataProcessor {    /**     * The SphinxP roperty for the minimum amount of time in speech     * (in milliseconds) to be considered as utterance start.     */    public static final String PROP_START_SPEECH =         "startSpeech";    /**     * The default value of PROP_START_SPEECH.     */    public static final int PROP_START_SPEECH_DEFAULT = 200;    /**     * The SphinxProperty for the amount of time in silence     * (in milliseconds) to be considered as utterance end.     */    public static final String PROP_END_SILENCE = "endSilence";    /**     * The default value of PROP_END_SILENCE.     */    public static final int PROP_END_SILENCE_DEFAULT = 500;    /**     * The SphinxProperty for the amount of time (in milliseconds)     * before speech start to be included as speech data.     */    public static final String PROP_SPEECH_LEADER =         "speechLeader";    /**     * The default value of PROP_SPEECH_LEADER.     */    public static final int PROP_SPEECH_LEADER_DEFAULT = 100;    /**     * The SphinxProperty for the amount of time (in milliseconds)     * after speech ends to be included as speech data.     */    public static final String PROP_SPEECH_TRAILER =         "speechTrailer";    /**     * The default value of PROP_SPEECH_TRAILER.     */    public static final int PROP_SPEECH_TRAILER_DEFAULT = 100;    private List outputQueue;  // Audio objects are added to the end    private boolean inSpeech;    private int startSpeechTime;    private int endSilenceTime;    private int speechLeader;    private int speechTrailer;    /*     * (non-Javadoc)     *      * @see edu.cmu.sphinx.util.props.Configurable#register(java.lang.String,     *      edu.cmu.sphinx.util.props.Registry)     */    public void register(String name, Registry registry)            throws PropertyException {        super.register(name, registry);        registry.register(PROP_START_SPEECH, PropertyType.INT);        registry.register(PROP_END_SILENCE, PropertyType.INT);        registry.register(PROP_SPEECH_LEADER, PropertyType.INT);        registry.register(PROP_SPEECH_TRAILER, PropertyType.INT);    }    /*     * (non-Javadoc)     *      * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)     */    public void newProperties(PropertySheet ps) throws PropertyException {        startSpeechTime = ps.getInt (PROP_START_SPEECH, PROP_START_SPEECH_DEFAULT);        endSilenceTime = ps.getInt (PROP_END_SILENCE, PROP_END_SILENCE_DEFAULT);        speechLeader = ps.getInt (PROP_SPEECH_LEADER, PROP_SPEECH_LEADER_DEFAULT);        speechTrailer = ps.getInt (PROP_SPEECH_TRAILER, PROP_SPEECH_TRAILER_DEFAULT);    }    /**     * Initializes this SpeechMarker      *     */    public void initialize() {        super.initialize();        this.outputQueue = new ArrayList();        reset();    }    /**     * Resets this SpeechMarker to a starting state.     */    private void reset() {        inSpeech = false;    }    /**     * Returns the next Data object.     *     * @return the next Data object, or null if none available     *     * @throws DataProcessingException if a data processing error occurs     */    public Data getData() throws DataProcessingException {        if (outputQueue.size() == 0) {            if (!inSpeech) {                readInitialFrames();            } else {                Data audio = readData();                if (audio != null) {                    if (audio instanceof SpeechClassifiedData) {                        SpeechClassifiedData data =                            (SpeechClassifiedData) audio;                        sendToQueue(data);                        if (!data.isSpeech()) {                            inSpeech = !(readEndFrames(data));                        }                    } else if (audio instanceof DataEndSignal) {                        sendToQueue(new SpeechStartSignal                                    (((Signal) audio).getTime()));                        sendToQueue(audio);                        inSpeech = false;                    } else if (audio instanceof DataStartSignal) {                        throw new Error("Got DataStartSignal while in speech");                    }                }            }        }        if (outputQueue.size() > 0) {            Data audio = (Data) outputQueue.remove(0);            if (audio instanceof SpeechClassifiedData) {                SpeechClassifiedData data = (SpeechClassifiedData) audio;                audio = data.getDoubleData();            }            return audio;        } else {            return null;        }    }    private Data readData() throws DataProcessingException {        Data audio = getPredecessor().getData();        return audio;    }    private int numUttStarts;    private int numUttEnds;    private void sendToQueue(Data audio) {        // now add the audio        outputQueue.add(audio);        if (audio instanceof DataStartSignal) {            numUttEnds = 0;            numUttStarts++;        } else if (audio instanceof DataEndSignal) {            numUttStarts = 0;            numUttEnds++;        }    }    /**     * Returns the amount of audio data in milliseconds in the      * given SpeechClassifiedData object.     *     * @param audio the SpeechClassifiedData object     *     * @return the amount of audio data in milliseconds     */    public int getAudioTime(SpeechClassifiedData audio) {
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -