📄 tokentowords.java
字号:
* Converts the given Token into (word) Items in the WordRelation. * * @param tokenVal the String value of the token, which may or may not be * same as the one in called "name" in flite * */ private void tokenToWords(String tokenVal) { FeatureSet tokenFeatures = tokenItem.getFeatures(); String itemName = tokenFeatures.getString("name"); int tokenLength = tokenVal.length(); if (tokenFeatures.isPresent("phones")) { wordRelation.addWord(tokenVal); } else if ((tokenVal.equals("a") || tokenVal.equals("A")) && ((tokenItem.getNext() == null) || !(tokenVal.equals(itemName)) || !(((String) tokenItem.findFeature("punc")).equals("")))) { /* if A is a sub part of a token, then its ey not ah */ wordRelation.addWord("_a"); } else if (matches(alphabetPattern, tokenVal)) { if (matches(romanNumbersPattern, tokenVal)) { /* XVIII */ romanToWords(tokenVal); } else if (matches(illionPattern, tokenVal) && matches(usMoneyPattern, (String) tokenItem.findFeature("p.name"))) { /* $ X -illion */ wordRelation.addWord(tokenVal); wordRelation.addWord("dollars"); } else if (matches(drStPattern, tokenVal)) { /* St Andrew's St, Dr King Dr */ drStToWords(tokenVal); } else if (tokenVal.equals("Mr")) { tokenItem.getFeatures().setString("punc", ""); wordRelation.addWord("mister"); } else if (tokenVal.equals("Mrs")) { tokenItem.getFeatures().setString("punc", ""); wordRelation.addWord("missus"); } else if (tokenLength == 1 && isUppercaseLetter(tokenVal.charAt(0)) && ((String)tokenItem.findFeature("n.whitespace")).equals(" ") && isUppercaseLetter (((String) tokenItem.findFeature("n.name")).charAt(0))) { tokenFeatures.setString("punc", ""); String aaa = tokenVal.toLowerCase(); if (aaa.equals("a")) { wordRelation.addWord("_a"); } else { wordRelation.addWord(aaa); } } else if (isStateName(tokenVal)) { /* The name of a US state isStateName() has already added the full name of the state, so we're all set. */ } else if (tokenLength > 1 && !isPronounceable(tokenVal)) { /* Need common exception list */ /* unpronouncable list of alphas */ NumberExpander.expandLetters (tokenVal, wordRelation); } else { /* just a word */ wordRelation.addWord(tokenVal.toLowerCase()); } } else if (matches(dottedAbbrevPattern, tokenVal)) { /* U.S.A. */ // remove all dots String aaa = Utilities.deleteChar(tokenVal, '.'); NumberExpander.expandLetters(aaa, wordRelation); } else if (matches(commaIntPattern, tokenVal)) { /* 99,999,999 */ String aaa = Utilities.deleteChar(tokenVal, ','); NumberExpander.expandReal(aaa, wordRelation); } else if (matches(sevenPhoneNumberPattern, tokenVal)) { /* 234-3434 telephone numbers */ int dashIndex = tokenVal.indexOf('-'); String aaa = tokenVal.substring(0, dashIndex); String bbb = tokenVal.substring(dashIndex+1); NumberExpander.expandDigits(aaa, wordRelation); wordRelation.addBreak(); NumberExpander.expandDigits(bbb, wordRelation); } else if (matchesPartPhoneNumber(tokenVal)) { /* part of a telephone number */ String punctuation = (String) tokenItem.findFeature("punc"); if (punctuation.equals("")) { tokenItem.getFeatures().setString("punc", ","); } NumberExpander.expandDigits(tokenVal, wordRelation); wordRelation.addBreak(); } else if (matches(numberTimePattern, tokenVal)) { /* 12:35 */ int colonIndex = tokenVal.indexOf(':'); String aaa = tokenVal.substring(0, colonIndex); String bbb = tokenVal.substring(colonIndex+1); NumberExpander.expandNumber(aaa, wordRelation); if (!(bbb.equals("00"))) { NumberExpander.expandID(bbb, wordRelation); } } else if (matches(digits2DashPattern, tokenVal)) { /* 999-999-999 */ digitsDashToWords(tokenVal); } else if (matches(digitsPattern, tokenVal)) { digitsToWords(tokenVal); } else if (tokenLength == 1 && isUppercaseLetter(tokenVal.charAt(0)) && ((String)tokenItem.findFeature("n.whitespace")).equals (" ") && isUppercaseLetter (((String) tokenItem.findFeature("n.name")).charAt(0))) { tokenFeatures.setString("punc", ""); String aaa = tokenVal.toLowerCase(); if (aaa.equals("a")) { wordRelation.addWord("_a"); } else { wordRelation.addWord(aaa); } } else if (matches(doublePattern, tokenVal)) { NumberExpander.expandReal(tokenVal, wordRelation); } else if (matches(ordinalPattern, tokenVal)) { /* explicit ordinals */ String aaa = tokenVal.substring(0, tokenLength - 2); NumberExpander.expandOrdinal(aaa, wordRelation); } else if (matches(usMoneyPattern, tokenVal)) { /* US money */ usMoneyToWords(tokenVal); } else if (tokenLength > 0 && tokenVal.charAt(tokenLength - 1) == '%') { /* Y% */ tokenToWords(tokenVal.substring(0, tokenLength - 1)); wordRelation.addWord("per"); wordRelation.addWord("cent"); } else if (matches(numessPattern, tokenVal)) { /* 60s and 7s and 9s */ tokenToWords(tokenVal.substring(0, tokenLength - 1)); wordRelation.addWord("'s"); } else if (tokenVal.indexOf('\'') != -1) { postropheToWords(tokenVal); } else if (matches(digitsSlashDigitsPattern, tokenVal) && tokenVal.equals(itemName)) { digitsSlashDigitsToWords(tokenVal); } else if (tokenVal.indexOf('-') != -1) { dashToWords(tokenVal); } else if (tokenLength > 1 && !matches(alphabetPattern, tokenVal)) { notJustAlphasToWords(tokenVal); } else { /* just a word */ wordRelation.addWord(tokenVal.toLowerCase()); } } /** * Convert the given digit token with dashes (e.g. 999-999-999) * into (word) Items in the WordRelation. * * @param tokenVal the digit string */ private void digitsDashToWords(String tokenVal) { int tokenLength = tokenVal.length(); int a = 0; for (int p = 0; p <= tokenLength; p++) { if (p == tokenLength || tokenVal.charAt(p) == '-') { String aaa = tokenVal.substring(a, p); NumberExpander.expandDigits(aaa, wordRelation); wordRelation.addBreak(); a = p+1; } } } /** * Convert the given digit token into (word) Items in the WordRelation. * * @param tokenVal the digit string */ private void digitsToWords(String tokenVal) { FeatureSet featureSet = tokenItem.getFeatures(); String nsw = ""; if (featureSet.isPresent("nsw")) { nsw = featureSet.getString("nsw"); } if (nsw.equals("nide")) { NumberExpander.expandID(tokenVal, wordRelation); } else { String rName = featureSet.getString("name"); String digitsType = null; if (tokenVal.equals(rName)) { digitsType = (String) cart.interpret(tokenItem); } else { featureSet.setString("name", tokenVal); digitsType = (String) cart.interpret(tokenItem); featureSet.setString("name", rName); } if (digitsType.equals("ordinal")) { NumberExpander.expandOrdinal(tokenVal, wordRelation); } else if (digitsType.equals("digits")) { NumberExpander.expandDigits(tokenVal, wordRelation); } else if (digitsType.equals("year")) { NumberExpander.expandID(tokenVal, wordRelation); } else { NumberExpander.expandNumber(tokenVal, wordRelation); } } } /** * Converts the given Roman numeral string into (word) Items in the * WordRelation. * * @param romanString the roman numeral string */ private void romanToWords(String romanString) { String punctuation = (String) tokenItem.findFeature("p.punc"); if (punctuation.equals("")) { /* no preceeding punctuation */ String n = String.valueOf(NumberExpander.expandRoman(romanString)); if (kingLike(tokenItem)) { wordRelation.addWord("the"); NumberExpander.expandOrdinal(n, wordRelation); } else if (sectionLike(tokenItem)) { NumberExpander.expandNumber(n, wordRelation); } else { NumberExpander.expandLetters(romanString, wordRelation); } } else { NumberExpander.expandLetters(romanString, wordRelation); } } /** * Returns true if the given key is in the kingSectionLikeHash * Hashtable, and the value is the same as the given value. * * @param key key to look for in the hashtable * @param value the value to match * * @return true if it matches, or false if it does not or if * the key is not mapped to any value in the hashtable. */ private static boolean inKingSectionLikeHash(String key, String value) { String hashValue = (String) kingSectionLikeHash.get(key); if (hashValue != null) { return (hashValue.equals(value)); } else { return false; } } /** * Returns true if the given token item contains a token that is * in a king-like context, e.g., "King" or "Louis". * * @param tokenItem the token item to check * * @return true or false */ public static boolean kingLike(Item tokenItem) { String kingName = ((String) tokenItem.findFeature("p.name")).toLowerCase(); if (inKingSectionLikeHash(kingName, KING_NAMES)) { return true; } else { String kingTitle = ((String) tokenItem.findFeature("p.p.name")).toLowerCase(); return inKingSectionLikeHash(kingTitle, KING_TITLES); } } /** * Returns true if the given token item contains a token that is * in a section-like context, e.g., "chapter" or "act". * * @param tokenItem the token item to check * * @return true or false */ public static boolean sectionLike(Item tokenItem) { String sectionType = ((String) tokenItem.findFeature("p.name")).toLowerCase(); return inKingSectionLikeHash(sectionType, SECTION_TYPES); } /** * Converts the given string containing "St" and "Dr" to (word) Items * in the WordRelation. * * @param drStString the string with "St" and "Dr" */ private void drStToWords(String drStString) { String street = null; String saint = null; char c0 = drStString.charAt(0); if (c0 == 's' || c0 == 'S') { street = "street"; saint = "saint"; } else { street = "drive"; saint = "doctor"; } FeatureSet featureSet = tokenItem.getFeatures(); String punctuation = featureSet.getString("punc"); String featPunctuation = (String) tokenItem.findFeature("punc"); if (tokenItem.getNext() == null || punctuation.indexOf(',') != -1) { wordRelation.addWord(street); } else if (featPunctuation.equals(",")) { wordRelation.addWord(saint);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -