binaryloader.java
来自「It is the Speech recognition software. 」· Java 代码 · 共 696 行 · 第 1/2 页
JAVA
696 行
int fileNameLength = readInt(stream, bigEndian); bytesRead += stream.skipBytes(fileNameLength); numberUnigrams = 0; logBigramSegmentSize = LOG2_BIGRAM_SEGMENT_SIZE_DEFAULT; // read version number, if present. it must be <= 0. int version = readInt(stream, bigEndian); // System.out.println("Version: " + version); if (version <= 0) { // yes, its the version number readInt(stream, bigEndian); // read and skip timestamp // read and skip format description int formatLength; for (;;) { if ((formatLength = readInt(stream, bigEndian)) == 0) { break; } bytesRead += stream.skipBytes(formatLength); } // read log bigram segment size if present if (version <= -2) { logBigramSegmentSize = readInt(stream, bigEndian); if (logBigramSegmentSize < 1 || logBigramSegmentSize > 15) { throw new Error("log2(bg_seg_sz) outside range 1..15"); } } numberUnigrams = readInt(stream, bigEndian); } else { numberUnigrams = version; } if (numberUnigrams <= 0) { throw new Error("Bad number of unigrams: " + numberUnigrams + ", must be > 0."); } else { maxNGram = 1; } if ((numberBigrams = readInt(stream, bigEndian)) < 0) { throw new Error("Bad number of bigrams: " + numberBigrams); } else { maxNGram = 2; } if ((numberTrigrams = readInt(stream, bigEndian)) < 0) { throw new Error("Bad number of trigrams: " + numberTrigrams); } else { maxNGram = 3; } } /** * Skips the bigrams and trigrams of the LM. * * @param stream * the source of data */ private void skipBigramsTrigrams(DataInputStream stream) throws IOException { // skip all the bigram entries, the +1 is the sentinel at the end if (numberBigrams > 0) { bigramOffset = bytesRead; int bytesToSkip = (numberBigrams + 1) * LargeTrigramModel.BYTES_PER_BIGRAM; stream.skipBytes(bytesToSkip); bytesRead += bytesToSkip; } // skip all the trigram entries if (numberTrigrams > 0) { trigramOffset = bytesRead; int bytesToSkip = numberTrigrams * LargeTrigramModel.BYTES_PER_TRIGRAM; stream.skipBytes(bytesToSkip); bytesRead += bytesToSkip; } } /** * Apply the unigram weight to the set of unigrams */ private void applyUnigramWeight() { float logUnigramWeight = logMath.linearToLog(unigramWeight); float logNotUnigramWeight = logMath.linearToLog(1.0f - unigramWeight); float logUniform = logMath.linearToLog(1.0f / (numberUnigrams)); float logWip = logMath.linearToLog(wip); float p2 = logUniform + logNotUnigramWeight; for (int i = 0; i < numberUnigrams; i++) { UnigramProbability unigram = unigrams[i]; float p1 = unigram.getLogProbability(); if (i != startWordID) { p1 += logUnigramWeight; p1 = logMath.addAsLinear(p1, p2); } if (applyLanguageWeightAndWip) { p1 = p1 * languageWeight + logWip; unigram.setLogBackoff(unigram.getLogBackoff() * languageWeight); } unigram.setLogProbability(p1); } } /** * Apply the language weight to the given array of probabilities. */ private void applyLanguageWeight(float[] logProbabilities, float languageWeight) { for (int i = 0; i < logProbabilities.length; i++) { logProbabilities[i] = logProbabilities[i] * languageWeight; } } /** * Apply the WIP to the given array of probabilities. */ private void applyWip(float[] logProbabilities, double wip) { float logWip = logMath.linearToLog(wip); for (int i = 0; i < logProbabilities.length; i++) { logProbabilities[i] = logProbabilities[i] + logWip; } } /** * Reads the probability table from the given DataInputStream. * * @param stream * the DataInputStream from which to read the table * @param bigEndian * true if the given stream is bigEndian, false otherwise */ private float[] readFloatTable(DataInputStream stream, boolean bigEndian) throws IOException { int numProbs = readInt(stream, bigEndian); if (numProbs <= 0 || numProbs > MAX_PROB_TABLE_SIZE) { throw new Error("Bad probabilities table size: " + numProbs); } float[] probTable = new float[numProbs]; for (int i = 0; i < numProbs; i++) { probTable[i] = logMath.log10ToLog(readFloat(stream, bigEndian)); } return probTable; } /** * Reads a table of integers from the given DataInputStream. * * @param stream * the DataInputStream from which to read the table * @param bigEndian * true if the given stream is bigEndian, false otherwise * @param tableSize * the size of the trigram segment table * * @return the trigram segment table, which is an array of integers */ private int[] readIntTable(DataInputStream stream, boolean bigEndian, int tableSize) throws IOException { int numSegments = readInt(stream, bigEndian); if (numSegments != tableSize) { throw new Error("Bad trigram seg table size: " + numSegments); } int[] segmentTable = new int[numSegments]; for (int i = 0; i < numSegments; i++) { segmentTable[i] = readInt(stream, bigEndian); } return segmentTable; } /** * Read in the unigrams in the given DataInputStream. * * @param stream * the DataInputStream to read from * @param numberUnigrams * the number of unigrams to read * @param bigEndian * true if the DataInputStream is big-endian, false * otherwise * * @return an array of UnigramProbability index by the unigram ID */ private UnigramProbability[] readUnigrams(DataInputStream stream, int numberUnigrams, boolean bigEndian) throws IOException { UnigramProbability[] unigrams = new UnigramProbability[numberUnigrams]; for (int i = 0; i < numberUnigrams; i++) { // read unigram ID, unigram probability, unigram backoff weight int unigramID = readInt(stream, bigEndian); // if we're not reading the sentinel unigram at the end, // make sure that the unigram IDs are consecutive if (i != (numberUnigrams - 1)) { assert(unigramID == i); } float unigramProbability = readFloat(stream, bigEndian); float unigramBackoff = readFloat(stream, bigEndian); int firstBigramEntry = readInt(stream, bigEndian); float logProbability = logMath.log10ToLog(unigramProbability); float logBackoff = logMath.log10ToLog(unigramBackoff); unigrams[i] = new UnigramProbability(unigramID, logProbability, logBackoff, firstBigramEntry); } return unigrams; } /** * Reads a byte from the given DataInputStream. * * @param stream * the DataInputStream to read from * * @return the byte read */ private final byte readByte(DataInputStream stream) throws IOException { bytesRead++; return stream.readByte(); } /** * Reads an integer from the given DataInputStream. * * @param stream * the DataInputStream to read from * @param bigEndian * true if the DataInputStream is in bigEndian, false * otherwise * * @return the integer read */ private final int readInt(DataInputStream stream, boolean bigEndian) throws IOException { bytesRead += 4; if (bigEndian) { return stream.readInt(); } else { return Utilities.readLittleEndianInt(stream); } } /** * Reads a float from the given DataInputStream. * * @param stream * the DataInputStream to read from * @param bigEndian * true if the DataInputStream is in bigEndian, false * otherwise * * @return the float read */ private final float readFloat(DataInputStream stream, boolean bigEndian) throws IOException { bytesRead += 4; if (bigEndian) { return stream.readFloat(); } else { return Utilities.readLittleEndianFloat(stream); } } /** * Reads a string of the given length from the given DataInputStream. It is * assumed that the DataInputStream contains 8-bit chars. * * @param stream * the DataInputStream to read from * @param length * the number of characters in the returned string * * @return a string of the given length from the given DataInputStream */ private final String readString(DataInputStream stream, int length) throws IOException { StringBuffer buffer = new StringBuffer(); byte[] bytes = new byte[length]; bytesRead += stream.read(bytes); for (int i = 0; i < length; i++) { buffer.append((char) bytes[i]); } return buffer.toString(); } /** * Reads a series of consecutive Strings from the given stream. * * @param stream * the DataInputStream to read from * @param length * the total length in bytes of all the Strings * @param numberUnigrams * the number of String to read * * @return an array of the Strings read */ private final String[] readWords(DataInputStream stream, int length, int numberUnigrams) throws IOException { String[] words = new String[numberUnigrams]; StringBuffer buffer = new StringBuffer(); byte[] bytes = new byte[length]; bytesRead += stream.read(bytes); int s = 0; for (int i = 0; i < length; i++) { char c = (char) bytes[i]; bytesRead++; if (c == '\0') { // if its the end of a string, add it to the 'words' array words[s] = buffer.toString().toLowerCase(); buffer = new StringBuffer(); if (words[s].equals(Dictionary.SENTENCE_START_SPELLING)) { startWordID = s; } else if (words[s].equals(Dictionary.SENTENCE_END_SPELLING)) { endWordID = s; } s++; } else { buffer.append(c); } } assert(s == numberUnigrams); return words; }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?