📄 lexiconimpl.java
字号:
System.arraycopy(phones, 0, copy, 0, phones.length); return copy; } else return null; } /** * Gets a phone list for a word from a given lexicon. If a phone * list cannot be found, returns <code>null</code>. The format is * lexicon dependent. If the part of speech does not matter, pass * in <code>null</code>. * * @param lexicon the lexicon * @param word the word to find * @param partOfSpeech the part of speech * * @return the list of phones for word or <code>null</code> */ protected String[] getPhones(Map lexicon, String word, String partOfSpeech) { String[] phones; partOfSpeech = fixPartOfSpeech(partOfSpeech); phones = getPhones(lexicon, word+partOfSpeech); for (int i = 0; (i < partsOfSpeech.size()) && (phones == null); i++) { if (!partOfSpeech.equals((String) partsOfSpeech.get(i))) { phones = getPhones(lexicon, word + (String) partsOfSpeech.get(i)); } } return phones; } /** * Gets a phone list for a word from a given lexicon. If a phone * list cannot be found, returns <code>null</code>. * * @param lexicon the lexicon * @param wordAndPartOfSpeech word and part of speech concatenated * together * * @return the list of phones for word or <code>null</code> */ protected String[] getPhones(Map lexicon, String wordAndPartOfSpeech) { Object value = lexicon.get(wordAndPartOfSpeech); if (value instanceof String[]) { return (String[]) value; } else if (value instanceof String) { String[] phoneArray; phoneArray = getPhones((String) value); if (tokenizeOnLookup) { lexicon.put(wordAndPartOfSpeech, phoneArray); } return phoneArray; } else { return null; } } /** * Turns the phone <code>String</code> into a <code>String[]</code>, * using " " as the delimiter. * * @param phones the phones * * @return the phones split into an array */ protected String[] getPhones(String phones) { ArrayList phoneList = new ArrayList(); StringTokenizer tokenizer = new StringTokenizer(phones, " "); while (tokenizer.hasMoreTokens()) { phoneList.add(tokenizer.nextToken()); } return (String[]) phoneList.toArray(new String[0]); } /** * Adds a word to the addenda. * * @param word the word to find * @param partOfSpeech the part of speech * @param phones the phones for the word * */ public void addAddendum(String word, String partOfSpeech, String[] phones) { String pos = fixPartOfSpeech(partOfSpeech); if (!partsOfSpeech.contains(pos)) { partsOfSpeech.add(pos); } addenda.put(word + pos, phones); } /** * Removes a word from the addenda. * * @param word the word to remove * @param partOfSpeech the part of speech */ public void removeAddendum(String word, String partOfSpeech) { addenda.remove(word + fixPartOfSpeech(partOfSpeech)); } /** * Outputs a string to a data output stream. * * @param dos the data output stream * @param s the string to output * * @throws IOException if errors occur during writing */ private void outString(DataOutputStream dos, String s) throws IOException { dos.writeByte((byte) s.length()); for (int i = 0; i < s.length(); i++) { dos.writeChar(s.charAt(i)); } } /** * Inputs a string from a DataInputStream. This method is not re-entrant. * * @param dis the data input stream * * @return the string * * @throws IOException if errors occur during reading */ private String getString(DataInputStream dis) throws IOException { int size = dis.readByte(); for (int i = 0; i < size; i++) { charBuffer[i] = dis.readChar(); } return new String(charBuffer, 0, size); } /** * Inputs a string from a DataInputStream. This method is not re-entrant. * * @param bb the input byte buffer * * @return the string * * @throws IOException if errors occur during reading */ private String getString(ByteBuffer bb) throws IOException { int size = bb.get(); for (int i = 0; i < size; i++) { charBuffer[i] = bb.getChar(); } return new String(charBuffer, 0, size); } /** * Dumps a binary form of the database. This method is not thread-safe. * * <p>Binary format is: * <pre> * MAGIC * VERSION * (int) numPhonemes * (String) phoneme0 * (String) phoneme1 * (String) phonemeN * (int) numEntries * (String) nameWithPOS * (byte) numPhonemes * phoneme index 1 * phoneme index 2 * phoneme index n * </pre> * * <p>Strings are formatted as: <code>(byte) len char0 char1 charN</code> * * <p>Limits: Strings: 128 chars * <p>Limits: Strings: 128 phonemes per word * * @param lexicon the lexicon to dump * @param path the path to dump the file to */ private void dumpBinaryLexicon(Map lexicon, String path) { try { FileOutputStream fos = new FileOutputStream(path); DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(fos)); List phonemeList = findPhonemes(lexicon); dos.writeInt(MAGIC); dos.writeInt(VERSION); dos.writeInt(phonemeList.size()); for (int i = 0; i < phonemeList.size(); i++) { outString(dos, (String) phonemeList.get(i)); } dos.writeInt(lexicon.keySet().size()); for (Iterator i = lexicon.keySet().iterator(); i.hasNext(); ) { String key = (String) i.next(); outString(dos, key); String[] phonemes = getPhones(lexicon, key); dos.writeByte((byte) phonemes.length); for (int index = 0; index < phonemes.length; index++) { int phonemeIndex = phonemeList.indexOf(phonemes[index]); if (phonemeIndex == -1) { throw new Error("Can't find phoneme index"); } dos.writeByte((byte) phonemeIndex); } } dos.close(); } catch (FileNotFoundException fe) { throw new Error("Can't dump binary database " + fe.getMessage()); } catch (IOException ioe) { throw new Error("Can't write binary database " + ioe.getMessage()); } } /** * Loads the binary lexicon from the given InputStream. * This method is not thread safe. * * @param is the InputStream to load the database from * @param estimatedSize estimate of how large the database is * * @return a <code>Map</code> containing the lexicon * * @throws IOException if an IO error occurs */ private Map loadMappedBinaryLexicon(FileInputStream is, int estimatedSize) throws IOException { FileChannel fc = is.getChannel(); MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, (int) fc.size()); bb.load(); int size = 0; int numEntries = 0; List phonemeList = new ArrayList(); // we get better performance for some reason if we // just ignore estimated size // // Map lexicon = new HashMap(); Map lexicon = new LinkedHashMap(estimatedSize * 4 / 3); if (bb.getInt() != MAGIC) { throw new Error("bad magic number in lexicon"); } if (bb.getInt() != VERSION) { throw new Error("bad version number in lexicon"); } size = bb.getInt(); for (int i = 0; i < size; i++) { String phoneme = getString(bb); phonemeList.add(phoneme); } numEntries = bb.getInt(); for (int i = 0; i < numEntries; i++) { String wordAndPos = getString(bb); String pos = Character.toString( wordAndPos.charAt(wordAndPos.length() - 1)); if (!partsOfSpeech.contains(pos)) { partsOfSpeech.add(pos); } int numPhonemes = bb.get(); String[] phonemes = new String[numPhonemes]; for (int j = 0; j < numPhonemes; j++) { phonemes[j] = (String) phonemeList.get(bb.get()); } lexicon.put(wordAndPos, phonemes); } fc.close(); return lexicon; } /** * Loads the binary lexicon from the given InputStream. * This method is not thread safe. * * @param is the InputStream to load the database from * @param estimatedSize estimate of how large the database is * * @return a <code>Map</code> containing the lexicon * * @throws IOException if an IO error occurs */ private Map loadBinaryLexicon(InputStream is, int estimatedSize) throws IOException { DataInputStream dis = new DataInputStream(new BufferedInputStream(is)); int size = 0; int numEntries = 0; List phonemeList = new ArrayList(); // we get better performance for some reason if we // just ignore estimated size // Map lexicon = new LinkedHashMap(); if (dis.readInt() != MAGIC) { throw new Error("bad magic number in lexicon"); } if (dis.readInt() != VERSION) { throw new Error("bad version number in lexicon"); } size = dis.readInt(); for (int i = 0; i < size; i++) { String phoneme = getString(dis); phonemeList.add(phoneme); } numEntries = dis.readInt(); for (int i = 0; i < numEntries; i++) { String wordAndPos = getString(dis); String pos = Character.toString( wordAndPos.charAt(wordAndPos.length() - 1)); if (!partsOfSpeech.contains(pos)) { partsOfSpeech.add(pos); } int numPhonemes = dis.readByte(); String[] phonemes = new String[numPhonemes]; for (int j = 0; j < numPhonemes; j++) { phonemes[j] = (String) phonemeList.get(dis.readByte()); } lexicon.put(wordAndPos, phonemes); } dis.close(); return lexicon; } /** * Dumps this lexicon (just the compiled form). Lexicon will be * dumped to two binary files PATH_compiled.bin and * PATH_addenda.bin * * @param path the root path to dump it to */ public void dumpBinary(String path) { String compiledPath = path + "_compiled.bin"; String addendaPath = path + "_addenda.bin"; dumpBinaryLexicon(compiled, compiledPath); dumpBinaryLexicon(addenda, addendaPath); } /** * Returns a list of the unique phonemes in the lexicon. * * @param lexicon the lexicon of interest * * @return list the unique set of phonemes */ private List findPhonemes(Map lexicon) { List phonemeList = new ArrayList(); for (Iterator i = lexicon.keySet().iterator(); i.hasNext(); ) { String key = (String) i.next(); String[] phonemes = getPhones(lexicon, key); for (int index = 0; index < phonemes.length; index++) { if (!phonemeList.contains(phonemes[index])) { phonemeList.add(phonemes[index]); } } } return phonemeList; } /** * Tests to see if this lexicon is identical to the other for * debugging purposes. * * @param other the other lexicon to compare to * * @return true if lexicons are identical */ public boolean compare(LexiconImpl other) { return compare(addenda, other.addenda) && compare(compiled, other.compiled); } /** * Determines if the two lexicons are identical for debugging purposes. * * @param lex this lex * @param other the other lexicon to chd * * @return true if they are identical */ private boolean compare(Map lex, Map other) { for (Iterator i = lex.keySet().iterator(); i.hasNext(); ) { String key = (String) i.next(); String[] thisPhonemes = getPhones(lex, key); String[] otherPhonemes = getPhones(other, key); if (thisPhonemes == null) { System.out.println(key + " not found in this."); return false; } else if (otherPhonemes == null) { System.out.println(key + " not found in other."); return false; } else if (thisPhonemes.length == otherPhonemes.length) { for (int j = 0; j < thisPhonemes.length; j++) { if (!thisPhonemes[j].equals(otherPhonemes[j])) { return false; } } } else { return false; } } return true; } /** * Fixes the part of speech if it is <code>null</code>. The * default representation of a <code>null</code> part of speech * is the number "0". */ static protected String fixPartOfSpeech(String partOfSpeech) { return (partOfSpeech == null) ? "0" : partOfSpeech; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -