binaryloader.java

来自「It is the Speech recognition software. 」· Java 代码 · 共 696 行 · 第 1/2 页

JAVA
696
字号
        int fileNameLength = readInt(stream, bigEndian);        bytesRead += stream.skipBytes(fileNameLength);        numberUnigrams = 0;        logBigramSegmentSize = LOG2_BIGRAM_SEGMENT_SIZE_DEFAULT;        // read version number, if present. it must be <= 0.        int version = readInt(stream, bigEndian);        // System.out.println("Version: " + version);        if (version <= 0) { // yes, its the version number            readInt(stream, bigEndian); // read and skip timestamp            // read and skip format description            int formatLength;            for (;;) {                if ((formatLength = readInt(stream, bigEndian)) == 0) {                    break;                }                bytesRead += stream.skipBytes(formatLength);            }            // read log bigram segment size if present            if (version <= -2) {                logBigramSegmentSize = readInt(stream, bigEndian);                if (logBigramSegmentSize < 1 || logBigramSegmentSize > 15) {                    throw new Error("log2(bg_seg_sz) outside range 1..15");                }            }            numberUnigrams = readInt(stream, bigEndian);        } else {            numberUnigrams = version;        }        if (numberUnigrams <= 0) {            throw new Error("Bad number of unigrams: " + numberUnigrams                    + ", must be > 0.");        } else {            maxNGram = 1;        }        if ((numberBigrams = readInt(stream, bigEndian)) < 0) {            throw new Error("Bad number of bigrams: " + numberBigrams);        } else {            maxNGram = 2;        }        if ((numberTrigrams = readInt(stream, bigEndian)) < 0) {            throw new Error("Bad number of trigrams: " + numberTrigrams);        } else {            maxNGram = 3;        }    }    /**     * Skips the bigrams and trigrams of the LM.     *      * @param stream     *                the source of data     */    private void skipBigramsTrigrams(DataInputStream stream) throws IOException {        // skip all the bigram entries, the +1 is the sentinel at the end        if (numberBigrams > 0) {            bigramOffset = bytesRead;            int bytesToSkip = (numberBigrams + 1)                    * LargeTrigramModel.BYTES_PER_BIGRAM;            stream.skipBytes(bytesToSkip);            bytesRead += bytesToSkip;        }        // skip all the trigram entries        if (numberTrigrams > 0) {            trigramOffset = bytesRead;            int bytesToSkip = numberTrigrams                    * LargeTrigramModel.BYTES_PER_TRIGRAM;            stream.skipBytes(bytesToSkip);            bytesRead += bytesToSkip;        }    }    /**     * Apply the unigram weight to the set of unigrams     */    private void applyUnigramWeight() {        float logUnigramWeight = logMath.linearToLog(unigramWeight);        float logNotUnigramWeight = logMath.linearToLog(1.0f - unigramWeight);        float logUniform = logMath.linearToLog(1.0f / (numberUnigrams));        float logWip = logMath.linearToLog(wip);        float p2 = logUniform + logNotUnigramWeight;        for (int i = 0; i < numberUnigrams; i++) {            UnigramProbability unigram = unigrams[i];            float p1 = unigram.getLogProbability();            if (i != startWordID) {                p1 += logUnigramWeight;                p1 = logMath.addAsLinear(p1, p2);            }            if (applyLanguageWeightAndWip) {                p1 = p1 * languageWeight + logWip;                unigram.setLogBackoff(unigram.getLogBackoff() * languageWeight);            }            unigram.setLogProbability(p1);        }    }    /**     * Apply the language weight to the given array of probabilities.     */    private void applyLanguageWeight(float[] logProbabilities,            float languageWeight) {        for (int i = 0; i < logProbabilities.length; i++) {            logProbabilities[i] = logProbabilities[i] * languageWeight;        }    }    /**     * Apply the WIP to the given array of probabilities.     */    private void applyWip(float[] logProbabilities, double wip) {        float logWip = logMath.linearToLog(wip);        for (int i = 0; i < logProbabilities.length; i++) {            logProbabilities[i] = logProbabilities[i] + logWip;        }    }    /**     * Reads the probability table from the given DataInputStream.     *      * @param stream     *                the DataInputStream from which to read the table     * @param bigEndian     *                true if the given stream is bigEndian, false otherwise     */    private float[] readFloatTable(DataInputStream stream, boolean bigEndian)            throws IOException {        int numProbs = readInt(stream, bigEndian);        if (numProbs <= 0 || numProbs > MAX_PROB_TABLE_SIZE) {            throw new Error("Bad probabilities table size: " + numProbs);        }        float[] probTable = new float[numProbs];        for (int i = 0; i < numProbs; i++) {            probTable[i] = logMath.log10ToLog(readFloat(stream, bigEndian));        }        return probTable;    }    /**     * Reads a table of integers from the given DataInputStream.     *      * @param stream     *                the DataInputStream from which to read the table     * @param bigEndian     *                true if the given stream is bigEndian, false otherwise     * @param tableSize     *                the size of the trigram segment table     *      * @return the trigram segment table, which is an array of integers     */    private int[] readIntTable(DataInputStream stream, boolean bigEndian,            int tableSize) throws IOException {        int numSegments = readInt(stream, bigEndian);        if (numSegments != tableSize) {            throw new Error("Bad trigram seg table size: " + numSegments);        }        int[] segmentTable = new int[numSegments];        for (int i = 0; i < numSegments; i++) {            segmentTable[i] = readInt(stream, bigEndian);        }        return segmentTable;    }    /**     * Read in the unigrams in the given DataInputStream.     *      * @param stream     *                the DataInputStream to read from     * @param numberUnigrams     *                the number of unigrams to read     * @param bigEndian     *                true if the DataInputStream is big-endian, false     *                otherwise     *      * @return an array of UnigramProbability index by the unigram ID     */    private UnigramProbability[] readUnigrams(DataInputStream stream,            int numberUnigrams, boolean bigEndian) throws IOException {        UnigramProbability[] unigrams = new UnigramProbability[numberUnigrams];        for (int i = 0; i < numberUnigrams; i++) {            // read unigram ID, unigram probability, unigram backoff weight            int unigramID = readInt(stream, bigEndian);            // if we're not reading the sentinel unigram at the end,            // make sure that the unigram IDs are consecutive            if (i != (numberUnigrams - 1)) {                assert(unigramID == i);            }            float unigramProbability = readFloat(stream, bigEndian);            float unigramBackoff = readFloat(stream, bigEndian);            int firstBigramEntry = readInt(stream, bigEndian);            float logProbability = logMath.log10ToLog(unigramProbability);            float logBackoff = logMath.log10ToLog(unigramBackoff);            unigrams[i] = new UnigramProbability(unigramID, logProbability,                    logBackoff, firstBigramEntry);        }        return unigrams;    }    /**     * Reads a byte from the given DataInputStream.     *      * @param stream     *                the DataInputStream to read from     *      * @return the byte read     */    private final byte readByte(DataInputStream stream) throws IOException {        bytesRead++;        return stream.readByte();    }    /**     * Reads an integer from the given DataInputStream.     *      * @param stream     *                the DataInputStream to read from     * @param bigEndian     *                true if the DataInputStream is in bigEndian, false     *                otherwise     *      * @return the integer read     */    private final int readInt(DataInputStream stream, boolean bigEndian)            throws IOException {        bytesRead += 4;        if (bigEndian) {            return stream.readInt();        } else {            return Utilities.readLittleEndianInt(stream);        }    }    /**     * Reads a float from the given DataInputStream.     *      * @param stream     *                the DataInputStream to read from     * @param bigEndian     *                true if the DataInputStream is in bigEndian, false     *                otherwise     *      * @return the float read     */    private final float readFloat(DataInputStream stream, boolean bigEndian)            throws IOException {        bytesRead += 4;        if (bigEndian) {            return stream.readFloat();        } else {            return Utilities.readLittleEndianFloat(stream);        }    }    /**     * Reads a string of the given length from the given DataInputStream. It is     * assumed that the DataInputStream contains 8-bit chars.     *      * @param stream     *                the DataInputStream to read from     * @param length     *                the number of characters in the returned string     *      * @return a string of the given length from the given DataInputStream     */    private final String readString(DataInputStream stream, int length)            throws IOException {        StringBuffer buffer = new StringBuffer();        byte[] bytes = new byte[length];        bytesRead += stream.read(bytes);        for (int i = 0; i < length; i++) {            buffer.append((char) bytes[i]);        }        return buffer.toString();    }    /**     * Reads a series of consecutive Strings from the given stream.     *      * @param stream     *                the DataInputStream to read from     * @param length     *                the total length in bytes of all the Strings     * @param numberUnigrams     *                the number of String to read     *      * @return an array of the Strings read     */    private final String[] readWords(DataInputStream stream, int length,            int numberUnigrams) throws IOException {        String[] words = new String[numberUnigrams];        StringBuffer buffer = new StringBuffer();        byte[] bytes = new byte[length];        bytesRead += stream.read(bytes);        int s = 0;        for (int i = 0; i < length; i++) {            char c = (char) bytes[i];            bytesRead++;            if (c == '\0') {                // if its the end of a string, add it to the 'words' array                words[s] = buffer.toString().toLowerCase();                buffer = new StringBuffer();                if (words[s].equals(Dictionary.SENTENCE_START_SPELLING)) {                    startWordID = s;                } else if (words[s].equals(Dictionary.SENTENCE_END_SPELLING)) {                    endWordID = s;                }                s++;            } else {                buffer.append(c);            }        }        assert(s == numberUnigrams);        return words;    }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?