lexiconimpl.java

来自「这是java 开发的的免费语音播放插件,很值得学习参考!!!!!!!!!!!!1」· Java 代码 · 共 876 行 · 第 1/2 页
JAVA
876 行
/** * Portions Copyright 2001 Sun Microsystems, Inc. * Portions Copyright 1999-2001 Language Technologies Institute,  * Carnegie Mellon University. * All Rights Reserved.  Use is subject to license terms. *  * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL  * WARRANTIES. */package com.sun.speech.freetts.lexicon;import com.sun.speech.freetts.util.Utilities;import com.sun.speech.freetts.util.BulkTimer;import java.io.BufferedInputStream;import java.io.BufferedOutputStream;import java.io.BufferedReader;import java.io.DataInputStream;import java.io.DataOutputStream;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.InputStream;import java.io.InputStreamReader;import java.io.IOException;import java.nio.channels.FileChannel;import java.nio.ByteBuffer;import java.nio.MappedByteBuffer;import java.net.MalformedURLException;import java.net.URL;import java.util.ArrayList;import java.util.Collections;import java.util.HashMap;import java.util.LinkedHashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Set;import java.util.StringTokenizer;/** * Provides an implementation of a Lexicon. * * <p>This implementation will either read from a straight ASCII file * or a binary file.  When reading from an ASCII file, you can specify * when the input line is tokenized:  load, lookup, or never.  If you * specify 'load', the entire file will be parsed when it is loaded. * If you specify 'lookup', the file will be loaded, but the parsing * for each line will be delayed until it is referenced and the parsed * form will be saved away.  If you specify 'never', the lines will * parsed each time they are referenced.  The default is 'never'.  To * specify the load type, set the system property as follows: * * <pre> *   -Dcom.sun.speech.freetts.lexicon.LexTokenize=load * </pre> * * <p>If a binary file is used, you can also specify whether the new * IO package is used.  The new IO package is new for JDK1.4, and can * greatly improve the speed of loading files.  To enable new IO, use * the following system property (it is enabled by default): * * <pre> *   -Dcom.sun.speech.freetts.useNewIO=true * </pre> * * <p>The implementation also allows users to define their own addenda * that will be used in addition to the system addenda.  If the user * defines their own addenda, it values will be added to the system * addenda, overriding any existing elements in the system addenda. * To define a user addenda, the user needs to set the following * property: * * <pre> *   -Dcom.sun.speeech.freetts.lexicon.userAddenda=&lt;URLToUserAddenda> * </pre> * * Where &lt;URLToUserAddenda> is a URL pointing to an ASCII file * containing addenda entries. * * <p>[[[TODO: support multiple homographs with the same part of speech.]]]  */abstract public class LexiconImpl implements Lexicon {    /**     * If true, the phone string is replaced with the phone array in     * the hashmap when the phone array is loaded.  The side effects     * of this are quicker lookups, but more memory usage and a longer     * startup time.     */    protected boolean tokenizeOnLoad = false;           /**     * If true, the phone string is replaced with the phone array in     * the hashmap when the phone array is first looked up.  The side effects     * Set by cmufilelex.tokenize=lookup.     */    protected boolean tokenizeOnLookup = false;     /**     * Magic number for binary Lexicon files.     */    private final static int MAGIC = 0xBABB1E;    /**     * Current binary file version.     */    private final static int VERSION = 1;    /**     * URL for the compiled form.     */    private URL compiledURL;    /**     * URL for the addenda.     */    private URL addendaURL;    /**     * URL for the letter to sound rules.     */    private URL letterToSoundURL;    /**     * The addenda.     */    private Map addenda;    /**     * The compiled lexicon.     */    private Map compiled;    /**     * The LetterToSound rules.     */    private LetterToSound letterToSound = null;    /**     * Parts of Speech.     */    private ArrayList partsOfSpeech = new ArrayList();    /**     * A static directory of compiledURL URL objects and associated     * already-loaded compiled Map objects. This is used to share     * the immutable compiled lexicons between lexicon instances.     * As the addenda can be changed using <code>addAddendum()</code>     * and <code>removeAddendum</code>, each lexicon instance has its     * own addenda.     */    private static Map loadedCompiledLexicons;            /**     * Loaded State of the lexicon     */    private boolean loaded = false;        /**     * Type of lexicon to load     */    private boolean binary = false;    /**     * No phones for this word.     */    final static private String[] NO_PHONES = new String[0];    /**     * Temporary place holder.     */    private char charBuffer[] = new char[128];    /**     * Use the new IO package?     */    private boolean useNewIO =	Utilities.getProperty("com.sun.speech.freetts.useNewIO",		"true").equals("true");    /**     * Create a new LexiconImpl by reading from the given URLS.     *     * @param compiledURL a URL pointing to the compiled lexicon     * @param addendaURL a URL pointing to lexicon addenda     * @param letterToSoundURL a LetterToSound to use if a word cannot     *   be found in the compiled form or the addenda     * @param binary if <code>true</code>, the input streams are binary;     *   otherwise, they are text.     */     public LexiconImpl(URL compiledURL, URL addendaURL,                       URL letterToSoundURL,		       boolean binary) {	this();	setLexiconParameters(compiledURL, addendaURL, letterToSoundURL, binary);    }    /**     * Class constructor for an empty Lexicon.     */    public LexiconImpl() {        // Find out when to convert the phone string into an array.        //        String tokenize =	    Utilities.getProperty("com.sun.speech.freetts.lexicon.LexTokenize",                               "never");	tokenizeOnLoad = tokenize.equals("load");	tokenizeOnLookup = tokenize.equals("lookup");    }    /**     * Sets the lexicon parameters     * @param compiledURL a URL pointing to the compiled lexicon     * @param addendaURL a URL pointing to lexicon addenda     * @param letterToSoundURL a URL pointing to the LetterToSound to use     * @param binary if <code>true</code>, the input streams are binary;     *   otherwise, they are text.     */     protected void setLexiconParameters(URL compiledURL,                                        URL addendaURL,                                        URL letterToSoundURL,                                        boolean binary) {	this.compiledURL = compiledURL;	this.addendaURL = addendaURL;        this.letterToSoundURL = letterToSoundURL;	this.binary = binary;    }    /**     * Determines if this lexicon is loaded.     *     * @return <code>true</code> if the lexicon is loaded     */    public boolean isLoaded() {	return loaded;    }    /**     * Loads the data for this lexicon.  If the      *     * @throws IOException if errors occur during loading     */    public void load() throws IOException {	BulkTimer.LOAD.start("Lexicon");	if (compiledURL == null) {	    throw new IOException("Can't load lexicon");	}	if (addendaURL == null) {	    throw new IOException("Can't load lexicon addenda " );	}	if (loadedCompiledLexicons == null) {	    loadedCompiledLexicons = new HashMap();	}	if (!loadedCompiledLexicons.containsKey(compiledURL)) {		InputStream compiledIS = Utilities.getInputStream(compiledURL);		if (compiledIS == null) {		    throw new IOException("Can't load lexicon from " + compiledURL);		}		Map newCompiled = createLexicon(compiledIS, binary, 65000);        loadedCompiledLexicons.put(compiledURL, newCompiled);    	compiledIS.close();	}	assert loadedCompiledLexicons.containsKey(compiledURL);	compiled = Collections.unmodifiableMap((Map)loadedCompiledLexicons.get(compiledURL));	InputStream addendaIS = Utilities.getInputStream(addendaURL);	if (addendaIS == null) {	    throw new IOException("Can't load lexicon addenda from " 		    + addendaURL);	}	// [[[TODO: what is the best way to derive the estimated sizes?]]]        //        addenda = createLexicon(addendaIS, binary, 50);	addendaIS.close();        /* Load the user-defined addenda and override any existing         * entries in the system addenda.         */        String userAddenda = Utilities.getProperty(            "com.sun.speech.freetts.lexicon.userAddenda", null);        if (userAddenda != null) {            try {                URL userAddendaURL = new URL(userAddenda);                InputStream userAddendaIS = Utilities.getInputStream(                    userAddendaURL);                if (userAddendaIS == null) {                    throw new IOException("Can't load user addenda from "                                          + userAddenda);                }                Map tmpAddenda = createLexicon(userAddendaIS, false, 50);                userAddendaIS.close();                for (Iterator keys = tmpAddenda.keySet().iterator();                     keys.hasNext();) {                    Object key = keys.next();                    addenda.put(key, tmpAddenda.get(key));                }            } catch (MalformedURLException e) {                throw new IOException("User addenda URL is malformed: " +                                      userAddenda);            }        }        	loaded = true;	BulkTimer.LOAD.stop("Lexicon");	letterToSound = new LetterToSoundImpl(letterToSoundURL, binary);    }    /**     * Reads the given input stream as lexicon data and returns the     * results in a <code>Map</code>.     *     * @param is the input stream     * @param binary if <code>true</code>, the data is binary     * @param estimatedSize the estimated size of the lexicon     *     * @throws IOException if errors are encountered while reading the data     */    protected Map createLexicon(InputStream is,                                boolean binary,                                 int estimatedSize)         throws IOException {	if (binary) {	    if (useNewIO && is instanceof FileInputStream) {		FileInputStream fis = (FileInputStream) is;		return loadMappedBinaryLexicon(fis, estimatedSize);	    } else {		DataInputStream dis = new DataInputStream(			new BufferedInputStream(is));		return loadBinaryLexicon(dis, estimatedSize);	    }	}  else {	    return loadTextLexicon(is, estimatedSize);	}    }    /**     * Reads the given input stream as text lexicon data and returns the     * results in a <code>Map</code>.     *     * @param is the input stream     * @param estimatedSize the estimated number of entries of the lexicon     *     * @throws IOException if errors are encountered while reading the data     */    protected Map loadTextLexicon(InputStream is, int estimatedSize) 	throws IOException {        Map lexicon = new LinkedHashMap(estimatedSize * 4 / 3);        BufferedReader reader = new BufferedReader(new InputStreamReader(is));        String line;                line = reader.readLine();        while (line != null) {            if (!line.startsWith("***")) {                parseAndAdd(lexicon, line);            }            line = reader.readLine();        }        return lexicon;    }        /**     * Creates a word from the given input line and add it to the lexicon.     *     * @param lexicon the lexicon     * @param line the input text     */    protected void parseAndAdd(Map lexicon, String line) {        StringTokenizer tokenizer = new StringTokenizer(line,"\t");        String phones = null;                String wordAndPos = tokenizer.nextToken();        String pos = wordAndPos.substring(wordAndPos.length() - 1);        if (!partsOfSpeech.contains(pos)) {            partsOfSpeech.add(pos);        }        if (tokenizer.hasMoreTokens()) {            phones = tokenizer.nextToken();        }	if ((phones != null) && (tokenizeOnLoad)) {	    lexicon.put(wordAndPos, getPhones(phones));        } else if (phones == null) {            lexicon.put(wordAndPos, NO_PHONES);        } else {            lexicon.put(wordAndPos, phones);        }    }        /**     * Gets the phone list for a given word.  If a phone list cannot     * be found, returns <code>null</code>.  The format is lexicon     * dependent.  If the part of speech does not matter, pass in     * <code>null</code>.     *     * @param word the word to find     * @param partOfSpeech the part of speech     *     * @return the list of phones for word or <code>null</code>     */    public String[] getPhones(String word, String partOfSpeech) {    	return getPhones(word, partOfSpeech, true);    }    /**     * Gets the phone list for a given word.  If a phone list cannot     * be found, <code>null</code> is returned.  The     * <code>partOfSpeech</code> is implementation dependent, but     * <code>null</code> always matches.     *     * @param word the word to find     * @param partOfSpeech the part of speech or <code>null</code>     * @param useLTS whether to use the letter-to-sound rules when     *        the word is not in the lexicon.     *     * @return the list of phones for word or null     */        public String[] getPhones			(String word, String partOfSpeech, boolean useLTS){    	String[] phones = null; 	phones = getPhones(addenda, word, partOfSpeech);    	if (phones == null) {    	    phones = getPhones(compiled, word, partOfSpeech);    	}    	if(useLTS){            if (phones == null && letterToSound != null) {                phones = letterToSound.getPhones(word, partOfSpeech);            }    	}    	if(phones != null){    	String[] copy = new String[phones.length];
lexiconimpl.java - 源码说明

本页面展示了「这是java 开发的的免费语音播放插件,很值得学习参考!!!!!!!!!!!!111」中的 lexiconimpl.java 源码文件，采用 Java 编程语言编写，共 876 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?