tokentowords.java

来自「这是java 开发的的免费语音播放插件,很值得学习参考!!!!!!!!!!!!1」· Java 代码 · 共 1,126 行 · 第 1/3 页
JAVA
1,126 行
/** * Portions Copyright 2001-2003 Sun Microsystems, Inc. * Portions Copyright 1999-2001 Language Technologies Institute,  * Carnegie Mellon University. * All Rights Reserved.  Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL  * WARRANTIES. */package com.sun.speech.freetts.en.us;import java.io.*;import java.util.Hashtable;import java.util.Iterator;import java.util.List;import java.util.LinkedList;import java.util.regex.Pattern;import java.util.regex.Matcher;import com.sun.speech.freetts.FeatureSet;import com.sun.speech.freetts.FeatureSetImpl;import com.sun.speech.freetts.Item;import com.sun.speech.freetts.PathExtractor;import com.sun.speech.freetts.PathExtractorImpl;import com.sun.speech.freetts.ProcessException;import com.sun.speech.freetts.Relation;import com.sun.speech.freetts.Utterance;import com.sun.speech.freetts.UtteranceProcessor;import com.sun.speech.freetts.cart.CART;import com.sun.speech.freetts.util.Utilities;/** * Converts the Tokens (in US English words) in an  * Utterance into a list of words. It puts the produced list back * into the Utterance. Usually, the tokens that gets expanded are numbers * like "23" (to "twenty" "three"). * <p> * It translates the following code from flite: * <br> * <code> * lang/usenglish/us_text.c * </code> */public class TokenToWords implements UtteranceProcessor {    /** Regular expression for something that has a vowel */    private static final String RX_HAS_VOWEL = ".*[aeiouAEIOU].*";                                    // Patterns for regular expression matching    private static final Pattern alphabetPattern;    private static final Pattern commaIntPattern;    private static final Pattern digits2DashPattern;    private static final Pattern digitsPattern;    private static final Pattern digitsSlashDigitsPattern;    private static final Pattern dottedAbbrevPattern;    private static final Pattern doublePattern;    private static final Pattern drStPattern;    private static final Pattern fourDigitsPattern;    private static final Pattern hasVowelPattern;    private static final Pattern illionPattern;    private static final Pattern numberTimePattern;    private static final Pattern numessPattern;    private static final Pattern ordinalPattern;    private static final Pattern romanNumbersPattern;    private static final Pattern sevenPhoneNumberPattern;    private static final Pattern threeDigitsPattern;    private static final Pattern usMoneyPattern;        static {	alphabetPattern = Pattern.compile(USEnglish.RX_ALPHABET);	commaIntPattern = Pattern.compile(USEnglish.RX_COMMAINT);	digits2DashPattern = Pattern.compile(USEnglish.RX_DIGITS2DASH);	digitsPattern = Pattern.compile(USEnglish.RX_DIGITS);	digitsSlashDigitsPattern = Pattern.compile(USEnglish.RX_DIGITSSLASHDIGITS);	dottedAbbrevPattern = Pattern.compile(USEnglish.RX_DOTTED_ABBREV);	doublePattern = Pattern.compile(USEnglish.RX_DOUBLE);	drStPattern = Pattern.compile(USEnglish.RX_DRST);	fourDigitsPattern = Pattern.compile(USEnglish.RX_FOUR_DIGIT);	hasVowelPattern = Pattern.compile(USEnglish.RX_HAS_VOWEL);	illionPattern = Pattern.compile(USEnglish.RX_ILLION);	numberTimePattern = Pattern.compile(USEnglish.RX_NUMBER_TIME);	numessPattern = Pattern.compile(USEnglish.RX_NUMESS);	ordinalPattern = Pattern.compile(USEnglish.RX_ORDINAL_NUMBER);	romanNumbersPattern = Pattern.compile(USEnglish.RX_ROMAN_NUMBER);	sevenPhoneNumberPattern = Pattern.compile(USEnglish.RX_SEVEN_DIGIT_PHONE_NUMBER);	threeDigitsPattern = Pattern.compile(USEnglish.RX_THREE_DIGIT);	usMoneyPattern = Pattern.compile(USEnglish.RX_US_MONEY);    }    // King-like words     private static final String[] kingNames = {	"louis", "henry", "charles", "philip", "george",	"edward", "pius", "william", "richard", "ptolemy",	"john", "paul", "peter", "nicholas", "frederick",	"james", "alfonso", "ivan", "napoleon", "leo",	"gregory", "catherine", "alexandria", "pierre", "elizabeth",	"mary" };        private static final String[] kingTitles = {	"king", "queen", "pope", "duke", "tsar",	"emperor", "shah", "caesar", "duchess", "tsarina",	"empress", "baron", "baroness", "sultan", "count",	"countess" };    // Section-like words    private static final String[] sectionTypes = {	"section", "chapter", "part", "phrase", "verse",	"scene", "act", "book", "volume", "chap",	"war", "apollo", "trek", "fortran" };        /**     * Here we use a hashtable for constant time matching, instead of using     * if (A.equals(B) || A.equals(C) || ...) to match Strings     */    private static Hashtable kingSectionLikeHash = new Hashtable();    private static final String KING_NAMES = "kingNames";    private static final String KING_TITLES = "kingTitles";    private static final String SECTION_TYPES = "sectionTypes";    // Hashtable initialization    static {	for (int i = 0; i < kingNames.length; i++) {	    kingSectionLikeHash.put(kingNames[i], KING_NAMES);	}	for (int i = 0; i < kingTitles.length; i++) {	    kingSectionLikeHash.put(kingTitles[i], KING_TITLES);	}	for (int i = 0; i < sectionTypes.length; i++) {	    kingSectionLikeHash.put(sectionTypes[i], SECTION_TYPES);	}    }    private static final String[] postrophes = {	"'s", "'ll", "'ve", "'d" };    // Finite state machines to check if a Token is pronounceable    private PronounceableFSM prefixFSM = null;    private PronounceableFSM suffixFSM = null;    // List of US states abbreviations and their full names    private static final String[][] usStates =    {	{ "AL", "ambiguous", "alabama"  },	{ "Al", "ambiguous", "alabama"  },	{ "Ala", "", "alabama"  },	{ "AK", "", "alaska"  },	{ "Ak", "", "alaska"  },	{ "AZ", "", "arizona"  },	{ "Az", "", "arizona"  },	{ "CA", "", "california"  },	{ "Ca", "", "california"  },	{ "Cal", "ambiguous", "california"  },	{ "Calif", "", "california"  },	{ "CO", "ambiguous", "colorado"  },	{ "Co", "ambiguous", "colorado"  },	{ "Colo", "", "colorado"  },	{ "DC", "", "d" , "c" },	{ "DE", "", "delaware"  },	{ "De", "ambiguous", "delaware"  },	{ "Del", "ambiguous", "delaware"  },	{ "FL", "", "florida"  },	{ "Fl", "ambiguous", "florida"  },	{ "Fla", "", "florida"  },	{ "GA", "", "georgia"  },	{ "Ga", "", "georgia"  },	{ "HI", "ambiguous", "hawaii"  },	{ "Hi", "ambiguous", "hawaii"  },	{ "IA", "", "iowa"  },	{ "Ia", "ambiguous", "iowa"  },	{ "IN", "ambiguous", "indiana"  },	{ "In", "ambiguous", "indiana"  },	{ "Ind", "ambiguous", "indiana"  },	{ "ID", "ambiguous", "idaho"  },	{ "IL", "ambiguous", "illinois"  },	{ "Il", "ambiguous", "illinois"  },	{ "ILL", "ambiguous", "illinois"  },	{ "KS", "", "kansas"  },	{ "Ks", "", "kansas"  },	{ "Kans", "", "kansas"  },	{ "KY", "ambiguous", "kentucky"  },	{ "Ky", "ambiguous", "kentucky"  },	{ "LA", "ambiguous", "louisiana"  },	{ "La", "ambiguous", "louisiana"  },	{ "Lou", "ambiguous", "louisiana"  },	{ "Lous", "ambiguous", "louisiana"  },	{ "MA", "ambiguous", "massachusetts"  },	{ "Mass", "ambiguous", "massachusetts"  },	{ "Ma", "ambiguous", "massachusetts"  },	{ "MD", "ambiguous", "maryland"  },	{ "Md", "ambiguous", "maryland"  },	{ "ME", "ambiguous", "maine"  },	{ "Me", "ambiguous", "maine"  },	{ "MI", "", "michigan"  },	{ "Mi", "ambiguous", "michigan"  },	{ "Mich", "ambiguous", "michigan"  },	{ "MN", "ambiguous", "minnestota"  },	{ "Minn", "ambiguous", "minnestota"  },	{ "MS", "ambiguous", "mississippi"  },	{ "Miss", "ambiguous", "mississippi"  },	{ "MT", "ambiguous", "montanna"  },	{ "Mt", "ambiguous", "montanna"  },	{ "MO", "ambiguous", "missouri"  },	{ "Mo", "ambiguous", "missouri"  },	{ "NC", "ambiguous", "north" , "carolina" },	{ "ND", "ambiguous", "north" , "dakota" },	{ "NE", "ambiguous", "nebraska"  },	{ "Ne", "ambiguous", "nebraska"  },	{ "Neb", "ambiguous", "nebraska"  },	{ "NH", "ambiguous", "new" , "hampshire" },	{ "NV", "", "nevada"  },	{ "Nev", "", "nevada"  },	{ "NY", "", "new" , "york" },	{ "OH", "ambiguous", "ohio"  },	{ "OK", "ambiguous", "oklahoma"  },	{ "Okla", "", "oklahoma"  },	{ "OR", "ambiguous", "oregon"  },	{ "Or", "ambiguous", "oregon"  },	{ "Ore", "ambiguous", "oregon"  },	{ "PA", "ambiguous", "pennsylvania"  },	{ "Pa", "ambiguous", "pennsylvania"  },	{ "Penn", "ambiguous", "pennsylvania"  },	{ "RI", "ambiguous", "rhode" , "island" },	{ "SC", "ambiguous", "south" , "carlolina" },	{ "SD", "ambiguous", "south" , "dakota" },	{ "TN", "ambiguous", "tennesee"  },	{ "Tn", "ambiguous", "tennesee"  },	{ "Tenn", "ambiguous", "tennesee"  },	{ "TX", "ambiguous", "texas"  },	{ "Tx", "ambiguous", "texas"  },	{ "Tex", "ambiguous", "texas"  },	{ "UT", "ambiguous", "utah"  },	{ "VA", "ambiguous", "virginia"  },	{ "WA", "ambiguous", "washington"  },	{ "Wa", "ambiguous", "washington"  },	{ "Wash", "ambiguous", "washington"  },	{ "WI", "ambiguous", "wisconsin"  },	{ "Wi", "ambiguous", "wisconsin"  },	{ "WV", "ambiguous", "west" , "virginia" },	{ "WY", "ambiguous", "wyoming"  },	{ "Wy", "ambiguous", "wyoming"  },	{ "Wyo", "", "wyoming"  },	{ "PR", "ambiguous", "puerto" , "rico" }    };    // Again hashtable for constant time searching    private static Hashtable usStatesHash = new Hashtable();        // initialize the Hashtable for usStates    static {	for (int i = 0; i < usStates.length; i++) {	    usStatesHash.put(usStates[i][0], usStates[i]);	}    };    // class variables    // the word relation that we are building    private WordRelation wordRelation;    // the current token Item    private Item tokenItem;    // a CART for classifying numbers    private CART cart;    /**     * Constructs a default USTokenWordProcessor. It uses the USEnglish     * regular expression set (USEngRegExp) by default.     *     * @param usNumbersCART the cart to use to classify numbers     */    public TokenToWords(CART usNumbersCART,			PronounceableFSM prefixFSM,			PronounceableFSM suffixFSM) {	this.cart = usNumbersCART;	this.prefixFSM = prefixFSM;	this.suffixFSM = suffixFSM;    }    /**     * Returns the currently processing token Item.     *     * @return the current token Item; null if no item     */    public Item getTokenItem() {	return tokenItem;    }    /**     *  process the utterance     *     * @param  utterance  the utterance contain the tokens     *     * @throws ProcessException if an IOException is thrown during the     *         processing of the utterance     */    public void processUtterance(Utterance utterance) throws ProcessException {	Relation tokenRelation;	if ((tokenRelation = utterance.getRelation(Relation.TOKEN)) == null) {	    throw new IllegalStateException		("TokenToWords: Token relation does not exist");	}		Item wordItem;	wordRelation = WordRelation.createWordRelation(utterance, this);		for (tokenItem = tokenRelation.getHead();	     tokenItem != null;	     tokenItem = tokenItem.getNext()) {	    FeatureSet featureSet = tokenItem.getFeatures();	    String tokenVal = featureSet.getString("name");	    	    // convert the token into a list of words	    tokenToWords(tokenVal);	}    }    /**     * Returns true if the given token matches part of a phone number     *     * @param tokenItem the token     * @param tokenVal the string value of the token     *     * @return true or false     */    private boolean matchesPartPhoneNumber(String tokenVal) {	String n_name = (String) tokenItem.findFeature("n.name");	String n_n_name = (String) tokenItem.findFeature("n.n.name");	String p_name = (String) tokenItem.findFeature("p.name");	String p_p_name = (String) tokenItem.findFeature("p.p.name");	boolean matches3DigitsP_name = matches(threeDigitsPattern, p_name);	return ((matches(threeDigitsPattern, tokenVal) &&		 ((!matches(digitsPattern, p_name)		   && matches(threeDigitsPattern, n_name)		   && matches(fourDigitsPattern, n_n_name)) ||		  (matches(sevenPhoneNumberPattern, n_name)) ||		  (!matches(digitsPattern, p_p_name)		   && matches3DigitsP_name		   && matches(fourDigitsPattern, n_name)))) ||		(matches(fourDigitsPattern, tokenVal) &&		 (!matches(digitsPattern, n_name)		  && matches3DigitsP_name		  && matches(threeDigitsPattern, p_p_name))));    }        /**     * Returns true if the given string is in the given string array.     *     * @param value the string to check     * @param stringArray the array to check     *     * @return true if the string is in the array, false otherwise     */    private static boolean inStringArray(String value, String[] stringArray) {	for (int i = 0; i < stringArray.length; i++) {	    if (stringArray[i].equals(value)) {		return true;	    }	}	return false;    }    /**
tokentowords.java - 源码说明

本页面展示了「这是java 开发的的免费语音播放插件,很值得学习参考!!!!!!!!!!!!111」中的 tokentowords.java 源码文件，采用 Java 编程语言编写，共 1,126 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?