📄 lettertosoundimpl.java
字号:
/** * Portions Copyright 2001 Sun Microsystems, Inc. * Portions Copyright 1999-2001 Language Technologies Institute, * Carnegie Mellon University. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. */package com.sun.speech.freetts.lexicon;import java.io.BufferedInputStream;import java.io.BufferedOutputStream;import java.io.BufferedReader;import java.io.DataInputStream;import java.io.DataOutputStream;import java.io.FileOutputStream;import java.io.InputStream;import java.io.InputStreamReader;import java.io.IOException;import java.net.URL;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Set;import java.util.StringTokenizer;import com.sun.speech.freetts.util.BulkTimer;import com.sun.speech.freetts.util.Utilities;/** * Provides the phone list for words using the CMU6 letter-to-sound * (LTS) rules, which are based on the Black, Lenzo, and Pagel paper, * "Issues in Building General Letter-to-Sound Rules." Proceedings * of ECSA Workshop on Speech Synthesis, pages 77-80, Australia, 1998. * * <p>The LTS rules are a simple state machine, with one entry point * for each letter of the alphabet (lower case letters are always * assumed, and the rules keep an array with one entry per letter that * point into the state machine). * * <p>The state machine consists of a huge array, with most entries * containing a decision and the indeces of two other entries. The * first of these two indeces represents where to go if the decision * is true, and the second represents where to go if the decision is * false. All entries that do not contain a decision are final * entries, and these contain a phone. * * <p>The decision in this case is a simple character comparison, * but it is done in the context of a window around the character in * the word. The decision consists of a index into the context window * and a character value. If the character in the context window * matches the character value, then the decision is true. * * <p>The machine traversal for each letter starts at that letter's * entry in the state machine and ends only when it reaches a final * state. If there is no phone that can be mapped, the phone in the * final state is set to 'epsilon.' * * <p>The context window for a character is generated in the following * way: * * <ul> * <li>Pad the original word on either side with '#' and '0' * characters the size of the window for the LTS rules (in this case, * the window size is 4). The "#" is used to indicate the beginning * and end of the word. So, the word "monkey" would turn into * "000#monkey#000". * <li>For each character in the word, the context window consists of * the characters in the padded form the preceed and follow the word. * The number of characters on each side is dependent upon the window * size. So, for this implementation, the context window for the 'k' * in monkey is "#money#0". * </ul> * * <p>Here's how the phone for 'k' in 'monkey' might be determined: * * <ul> * <li>Create the context window "#money#0". * <li>Start at the state machine entry for 'k' in the state machine. * <li>Grab the 'index' from the current state. This represents an * index into the context window. * <li>Compare the value of the character at the index in the context * window to the character from the current state. If there is a * match, the next state is the qtrue value. If there isn't a match, * the next state is the qfalse state. * <li>Keep on working through the machine until you read a final * state. * <li>When you get to the final state, the phone is the character in * that state. * </ul> * * <p>This implementation will either read from a straight ASCII file * or a binary file. When reading from an ASCII file, you can specify * when the input line is tokenized: load, lookup, or never. If you * specify 'load', the entire file will be parsed when it is loaded. * If you specify 'lookup', the file will be loaded, but the parsing * for each line will be delayed until it is referenced and the parsed * form will be saved away. If you specify 'never', the lines will * parsed each time they are referenced. The default is 'load'. To * specify the load type, set the system property as follows: * * <pre> * -Dcom.sun.speech.freetts.lexicon.LTSTokenize=load * </pre> * * <p>[[[TODO: This implementation uses ASCII 'a'-'z', which is not * internationalized.]]] */public class LetterToSoundImpl implements LetterToSound { /** * Entry in file represents the total number of states in the * file. This should be at the top of the file. The format * should be "TOTAL n" where n is an integer value. */ final static String TOTAL = "TOTAL"; /** * Entry in file represents the beginning of a new letter index. * This should appear before the list of a new set of states for * a particular letter. The format should be "INDEX n c" where * n is the index into the state machine array and c is the * character. */ final static String INDEX = "INDEX"; /** * Entry in file represents a state. The format should be * "STATE i c t f" where 'i' represents an index to look at in the * decision string, c is the character that should match, t is the * index of the state to go to if there is a match, and f is the * of the state to go to if there isn't a match. */ final static String STATE = "STATE"; /** * Entry in file represents a final state. The format should be * "PHONE p" where p represents a phone string that comes from the * phone table. */ final static String PHONE = "PHONE"; /** * If true, the state string is tokenized when it is first read. * The side effects of this are quicker lookups, but more memory * usage and a longer startup time. */ protected boolean tokenizeOnLoad = false; /** * If true, the state string is tokenized the first time it is * referenced. The side effects of this are quicker lookups, but * more memory usage. */ protected boolean tokenizeOnLookup = false; /** * Magic number for binary LTS files. */ private final static int MAGIC = 0xdeadbeef; /** * Current binary file version. */ private final static int VERSION = 1; /** * The LTS state machine. Entries can be String or State. An * ArrayList could be used here -- I chose not to because I * thought it might be quicker to avoid dealing with the dynamic * resizing. */ private Object[] stateMachine = null; /** * The number of states in the state machine. */ private int numStates = 0; /** * The 'window size' of the LTS rules. */ private final static int WINDOW_SIZE = 4; /** * An array of characters to hold a string for checking against a * rule. This will be reused over and over again, so the goal * was just to have a single area instead of new'ing up a new one * for every word. The name choice is to match that in Flite's * <code>cst_lts.c</code>. */ private char[] fval_buff = new char[WINDOW_SIZE * 2]; /** * The indexes of the starting points for letters in the state machine. */ protected HashMap letterIndex; /** * The list of phones that can be returned by the LTS rules. */ static private List phonemeTable; /** * Class constructor. * * @param ltsRules a URL pointing to the text * containing the letter to sound rules * @param binary if true, the URL is a binary source * * @throws NullPointerException if the ltsRules are null * @throws IOException if errors are encountered while reading the * compiled form or the addenda */ public LetterToSoundImpl(URL ltsRules, boolean binary) throws IOException { BulkTimer.LOAD.start("LTS"); InputStream is = ltsRules.openStream(); if (binary) { loadBinary(is); } else { loadText(is); } is.close(); BulkTimer.LOAD.stop("LTS"); } /** * Loads the LTS rules from the given text input stream. The * stream is not closed after the rules are read. * * @param is the input stream * * @throws IOException if an error occurs on input. */ private void loadText(InputStream is) throws IOException { BufferedReader reader; String line; // Find out when to convert the phone string into an array. // String tokenize = Utilities.getProperty("com.sun.speech.freetts.lexicon.LTSTokenize", "load"); tokenizeOnLoad = tokenize.equals("load"); tokenizeOnLookup = tokenize.equals("lookup"); letterIndex = new HashMap(); reader = new BufferedReader(new InputStreamReader(is)); line = reader.readLine(); while (line != null) { if (!line.startsWith("***")) { parseAndAdd(line); } line = reader.readLine(); } } /** * Loads the LTS rules from the given binary input stream. The * input stream is not closed after the rules are read. * * @param is the input stream * * @throws IOException if an error occurs on input. */ private void loadBinary(InputStream is) throws IOException { DataInputStream dis = new DataInputStream(is); if (dis.readInt() != MAGIC) { throw new Error("Bad LTS binary file format"); } if (dis.readInt() != VERSION) { throw new Error("Bad LTS binary file version"); } // read the phoneme table // int phonemeTableSize = dis.readInt(); phonemeTable = new ArrayList(phonemeTableSize); for (int i = 0; i < phonemeTableSize; i++) { String phoneme = dis.readUTF(); phonemeTable.add(phoneme); } // letter index // int letterIndexSize = dis.readInt(); letterIndex = new HashMap(); for (int i = 0; i < letterIndexSize; i++) { char c = dis.readChar(); int index = dis.readInt(); letterIndex.put(Character.toString(c), new Integer(index)); } // statemachine states // int stateMachineSize = dis.readInt(); stateMachine = new Object[stateMachineSize]; for (int i = 0; i < stateMachineSize; i++) { int type = dis.readInt(); if (type == FinalState.TYPE) { stateMachine[i] = FinalState.loadBinary(dis); } else if (type == DecisionState.TYPE) { stateMachine[i] = DecisionState.loadBinary(dis); } else { throw new Error("Unknown state type in LTS load"); } } } /** * Creates a word from the given input line and add it to the state * machine. It expects the TOTAL line to come before any of the * states. * * @param line the line of text from the input file */ protected void parseAndAdd(String line) { StringTokenizer tokenizer = new StringTokenizer(line," "); String type = tokenizer.nextToken(); if (type.equals(STATE) || type.equals(PHONE)) { if (tokenizeOnLoad) { stateMachine[numStates] = getState(type, tokenizer); } else { stateMachine[numStates] = line; } numStates++; } else if (type.equals(INDEX)) { Integer index = new Integer(tokenizer.nextToken()); if (index.intValue() != numStates) { throw new Error("Bad INDEX in file."); } else { String c = tokenizer.nextToken(); letterIndex.put(c,index); } } else if (type.equals(TOTAL)) { stateMachine = new Object[Integer.parseInt(tokenizer.nextToken())]; } } /** * Dumps a binary form of the letter to sound rules. * This method is not thread-safe. * * <p>Binary format is: * <pre> * MAGIC * VERSION * NUM STATES * for each state ... * </pre> * * @param path the path to dump the file to * * @throws IOException if a problem occurs during the dump */ public void dumpBinary(String path) throws IOException { FileOutputStream fos = new FileOutputStream(path); DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(fos)); dos.writeInt(MAGIC); dos.writeInt(VERSION); // Phoneme table // phonemeTable = findPhonemes(); dos.writeInt(phonemeTable.size()); for (Iterator i = phonemeTable.iterator(); i.hasNext(); ) { String phoneme = (String) i.next(); dos.writeUTF(phoneme); } // letter index // dos.writeInt(letterIndex.size()); for (Iterator i = letterIndex.keySet().iterator(); i.hasNext(); ) { String letter = (String) i.next(); int index = ((Integer) letterIndex.get(letter)).intValue(); dos.writeChar(letter.charAt(0)); dos.writeInt(index); } // statemachine states // dos.writeInt(stateMachine.length); for (int i = 0; i < stateMachine.length; i++) { getState(i).writeBinary(dos); } dos.close(); } /** * Returns a list of all the phonemes used by the LTS rules. * * @return a list of all the phonemes */ private List findPhonemes() { Set set = new HashSet(); for (int i = 0; i < stateMachine.length; i++) { if (stateMachine[i] instanceof FinalState) { FinalState fstate = (FinalState) stateMachine[i]; if (fstate.phoneList != null) { for (int j = 0; j < fstate.phoneList.length; j++) { set.add(fstate.phoneList[j]); } } } } return new ArrayList(set); } /** * Gets the <code>State</code> at the given index. This may * replace a <code>String</code> at * the current spot with an actual <code>State</code> instance. * * @param i the index into the state machine * * @return the <code>State</code> at the given index. */ protected State getState(int i) { State state = null; if (stateMachine[i] instanceof String) { state = getState((String) stateMachine[i]); if (tokenizeOnLookup) { stateMachine[i] = state; } } else { state = (State) stateMachine[i]; } return state; } /** * Gets the <code>State</code> based upon the <code>String</code>. * * @param s the string to parse * * @return the parsed <code>State</code> */ protected State getState(String s) { StringTokenizer tokenizer = new StringTokenizer(s, " "); return getState(tokenizer.nextToken(), tokenizer); } /** * Gets the <code>State</code> based upon the <code>type</code> * and <code>tokenizer<code>. * * @param type one of <code>STATE</code> or <code>PHONE</code> * @param tokenizer a <code>StringTokenizer</code> containing the * <code>State</code> * * @return the parsed <code>State</code> */ protected State getState(String type, StringTokenizer tokenizer) { if (type.equals(STATE)) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -