📄 voice.java
字号:
/** * Portions Copyright 2001 Sun Microsystems, Inc. * Portions Copyright 1999-2001 Language Technologies Institute, * Carnegie Mellon University. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. */package com.sun.speech.freetts;import com.sun.speech.freetts.lexicon.Lexicon;import com.sun.speech.freetts.util.BulkTimer;import com.sun.speech.freetts.util.Utilities;import com.sun.speech.freetts.audio.AudioPlayer;import com.sun.speech.freetts.relp.LPCResult;import java.util.List;import java.util.Iterator;import java.util.HashMap;import java.util.Map;import java.util.ArrayList;import java.util.Collections;import java.io.PrintWriter;import java.io.IOException;import java.io.InputStream;import java.io.Reader;import java.io.BufferedReader;import java.io.InputStreamReader;import java.net.URL;import org.w3c.dom.Document;import org.w3c.dom.Node;import org.w3c.dom.Text;import java.util.Locale;import javax.sound.sampled.AudioFormat;/** * Performs text-to-speech using a series of * <code>UtteranceProcessors</code>. It is the main conduit to the FreeTTS * speech synthesizer. It can perform TTS on ASCII text, * a JSML document, an <code>InputStream</code>, or a * <code>FreeTTSSpeakable</code>, by invoking the method <code>speak</code>. * * <p>Before a Voice can perform TTS, it must have a * <code>Lexicon</code>, from which it gets the vocabulary, and * an <code>AudioPlayer</code>, to which it sends the synthesized output. * * <p><b>Example</b> (using the <code>CMUDiphoneVoice</code>, * <code>CMULexicon</code> and <code>JavaClipAudioPlayer</code>): * * <pre> * Voice voice = new CMUDiphoneVoice(); * * // sets the Lexicon * voice.setLexicon(new CMULexicon()); * * // sets the AudioPlayer * voice.setAudioPlayer(new JavaClipAudioPlayer()); * * // loads the Voice * voice.allocate(); * * // start talking * voice.speak("I can talk forever without getting tired!"); * </pre> * * * <p>A user can override the AudioPlayer to use by defining the * "com.sun.speech.freetts.voice.defaultAudioPlayer" system property. * The value of this property must be the name of a class that * implements the AudioPlayer interface, and which also has a no-arg * constructor. * * @see VoiceManager * @see VoiceDirectory */public abstract class Voice implements UtteranceProcessor, Dumpable { /** * Constant that describes the name of the unit database used by * this voice. */ public final static String DATABASE_NAME = "databaseName"; private List utteranceProcessors; private Map featureProcessors; private FeatureSetImpl features; private boolean verbose = false; private boolean metrics = false; private boolean detailedMetrics = false; private boolean dumpUtterance = false; private boolean dumpRelations = false; private String runTitle = "unnamed run"; private Lexicon lexicon = null; private AudioPlayer defaultAudioPlayer = null; private AudioPlayer audioPlayer = null; private UtteranceProcessor audioOutput; private OutputQueue outputQueue = null; private String waveDumpFile = null; private BulkTimer runTimer = new BulkTimer(); private BulkTimer threadTimer = new BulkTimer(); private boolean externalOutputQueue = false; private boolean externalAudioPlayer = false; private float nominalRate = 150; // nominal speaking rate for this voice private float pitch = 100; // pitch baseline (hertz) private float range = 10; // pitch range (hertz) private float pitchShift = 1; // F0 Shift private float volume = 0.8f; // the volume (range 0 to 1) private float durationStretch = 1f; // the duration stretch private boolean loaded = false; private String name = "default_name"; private Age age = Age.DONT_CARE; private Gender gender = Gender.DONT_CARE; private String description = "default description"; private Locale locale = Locale.getDefault(); private String domain = "general"; private String style = "standard"; private String organization = "unknown"; /** * Prefix for System property names. */ public final static String PROP_PREFIX = "com.sun.speech.freetts.voice."; /** * Feature name for the silence phone string. */ public final static String FEATURE_SILENCE = "silence"; /** * Feature name for the join type string. */ public final static String FEATURE_JOIN_TYPE = "join_type"; /** * Feature name for the default AudioPlayer class to use. */ public final static String DEFAULT_AUDIO_PLAYER = PROP_PREFIX + "defaultAudioPlayer"; /** * The default class to use for the DEFAULT_AUDIO_PLAYER. */ public final static String DEFAULT_AUDIO_PLAYER_DEFAULT = "com.sun.speech.freetts.audio.JavaStreamingAudioPlayer"; /** * Creates a new Voice. Utterances are sent to an * output queue to be rendered as audio. Utterances are placed * on the queue by an output thread. This * queue is usually created via a call to 'createOutputThread,' * which creates a thread that waits on the queue and sends the * output to the audio player associated with this voice. If * the queue is null, the output is rendered in the calling * thread. * * @see #createOutputThread */ public Voice() { /* Make the utteranceProcessors a synchronized list to avoid * some threading issues. */ utteranceProcessors = Collections.synchronizedList(new ArrayList()); features = new FeatureSetImpl(); featureProcessors = new HashMap(); try { nominalRate = Float.parseFloat( Utilities.getProperty(PROP_PREFIX + "speakingRate","150")); pitch = Float.parseFloat( Utilities.getProperty(PROP_PREFIX + "pitch","100")); range = Float.parseFloat( Utilities.getProperty(PROP_PREFIX + "range","10")); volume = Float.parseFloat( Utilities.getProperty(PROP_PREFIX + "volume","1.0")); } catch (SecurityException se) { // can't get properties, just use defaults } outputQueue = null; audioPlayer = null; defaultAudioPlayer = null; } /** * Creates a new Voice like above, except that it also * stores the properties of the voice. * @param name the name of the voice * @param gender the gender of the voice * @param age the age of the voice * @param description a human-readable string providing a * description that can be displayed for the users. * @param locale the locale of the voice * @param domain the domain of this voice. For example, * @param organization the organization which created the voice * "general", "time", or * "weather". * * @see #Voice() */ public Voice(String name, Gender gender, Age age, String description, Locale locale, String domain, String organization) { this(); setName(name); setGender(gender); setAge(age); setDescription(description); setLocale(locale); setDomain(domain); setOrganization(organization); } /** * Speaks the given text. * * @param text the text to speak * * @return <code>true</code> if the given text is spoken properly; * otherwise <code>false</code> */ public boolean speak(String text) { return speak(new FreeTTSSpeakableImpl(text)); } /** * Speaks the given document. * * @param doc the JSML document to speak * * @return <code>true</code> if the given document is spoken properly; * otherwise <code>false</code> */ public boolean speak(Document doc) { return speak(new FreeTTSSpeakableImpl(doc)); } /** * Speaks the input stream. * * @param inputStream the inputStream to speak * * @return <code>true</code> if the given input stream is spoken properly; * otherwise <code>false</code> */ public boolean speak(InputStream inputStream) { return speak(new FreeTTSSpeakableImpl(inputStream)); } /** * Speak the given queue item. This is a synchronous method that * does not return until the speakable is completely * spoken or has been cancelled. * * @param speakable the item to speak * * @return <code>true</code> if the utterance was spoken properly, * <code>false</code> otherwise */ public boolean speak(FreeTTSSpeakable speakable) { log("speak(FreeTTSSpeakable) called"); boolean ok = true; boolean posted = false; getAudioPlayer().startFirstSampleTimer(); for (Iterator i = tokenize(speakable); !speakable.isCompleted() && i.hasNext() ; ) { try { Utterance utterance = (Utterance) i.next(); if (utterance != null) { processUtterance(utterance); posted = true; } } catch (ProcessException pe) { ok = false; } } if (ok && posted) { runTimer.start("WaitAudio"); ok = speakable.waitCompleted(); runTimer.stop("WaitAudio"); } log("speak(FreeTTSSpeakable) completed"); return ok; } /** * @deprecated As of FreeTTS 1.2, replaced by {@link #allocate}. */ public void load() { allocate(); } /** * Allocate this Voice. It loads the lexicon and the * audio output handler, and creates an audio output thread by * invoking <code>createOutputThread()</code>, if * one is not already created. It then calls the <code>loader()</code> * method to load Voice-specific data, which include utterance processors. */ public void allocate() { if (isLoaded()) { return; } BulkTimer.LOAD.start(); if (!lexicon.isLoaded()) { try { lexicon.load(); } catch (IOException ioe) { error("Can't load voice " + ioe); } } try { audioOutput = getAudioOutput(); } catch (IOException ioe) { error("Can't load audio output handler for voice " + ioe); } if (outputQueue == null) { outputQueue = createOutputThread(); } try { loader(); } catch (IOException ioe) { error("Can't load voice " + ioe); } BulkTimer.LOAD.stop(); if (isMetrics()) { BulkTimer.LOAD.show("loading " + toString() + " for " + getRunTitle()); } setLoaded(true); } /** * Returns true if this voice is loaded. * * @return <code>true</code> if the voice is loaded; * otherwise <code>false</code> */ public boolean isLoaded() { return loaded; } /** * Sets the loaded state * * @param loaded the new loaded state * otherwise <code>false</code> */ protected void setLoaded(boolean loaded) { this.loaded = loaded; } /** * Processes the given Utterance by passing it to each * UtteranceProcessor managed by this Voice. The * UtteranceProcessors are called in the order they were added to * the Voice. * * @param u the Utterance to process * * @throws ProcessException if an exception occurred while performing * operations on the Utterance */ public void processUtterance(Utterance u) throws ProcessException { UtteranceProcessor[] processors; if (utteranceProcessors == null) { return; } if (u == null) { throw new ProcessException("Utterance is null."); } runTimer.start("processing"); processors = new UtteranceProcessor[utteranceProcessors.size()]; processors = (UtteranceProcessor[]) utteranceProcessors.toArray(processors); log("Processing Utterance: " + u.getString("input_text")); try { for (int i = 0; i < processors.length && !u.getSpeakable().isCompleted(); i++) { runProcessor(processors[i], u, runTimer); } if (!u.getSpeakable().isCompleted()) { if (outputQueue == null) { log("To AudioOutput"); outputUtterance(u, runTimer); } else { runTimer.start("..post"); outputQueue.post(u); runTimer.stop("..post"); } } } catch (ProcessException pe) { System.err.println("Processing Utterance: " + pe); } catch (Exception e) { System.err.println("Trouble while processing utterance " + e); e.printStackTrace(); u.getSpeakable().cancelled(); } log("Done Processing Utterance: " + u.getString("input_text")); runTimer.stop("processing"); if (dumpUtterance) { u.dump("Utterance"); } if (dumpRelations) { u.dumpRelations("Utterance"); } dumpASCII(u); } /** * Dumps the wave for the given utterance. * * @param utterance the utterance of interest */ private void dumpASCII(Utterance utterance) { if (waveDumpFile != null) { LPCResult lpcResult = (LPCResult) utterance.getObject("target_lpcres"); try { if (waveDumpFile.equals("-")) { lpcResult.dumpASCII(); } else { lpcResult.dumpASCII(waveDumpFile); } } catch (IOException ioe) { error("Can't dump file to " + waveDumpFile + " " + ioe); } } } /** * Creates an output thread that will asynchronously * output utterances that are generated by this voice (and other * voices). * * @return the queue where utterances should be placed. */ public static OutputQueue createOutputThread() { final OutputQueue queue = new OutputQueue(); Thread t = new Thread() { public void run() { Utterance utterance = null; do { utterance = queue.pend(); if (utterance != null) { Voice voice = utterance.getVoice(); voice.log("OUT: " + utterance.getString("input_text")); voice.outputUtterance(utterance, voice.threadTimer); } } while (utterance != null); } }; t.setDaemon(true); t.start(); return queue; } /** * Sends the given utterance to the audio output processor * associated with this voice. If the queue item associated with * this utterance is completed, then this set of utterances has * been cancelled or otherwise aborted and the utterance should * not be output. * * @param utterance the utterance to be output * @param timer the timer for gathering performance metrics * * @return true if the utterance was output properly; otherwise * false */ private boolean outputUtterance(Utterance utterance, BulkTimer timer) { boolean ok = true; FreeTTSSpeakable speakable = utterance.getSpeakable(); if (!speakable.isCompleted()) { if (utterance.isFirst()) { getAudioPlayer().reset();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -