simplengrammodel.java

来自「It is the Speech recognition software. 」· Java 代码 · 共 524 行 · 第 1/2 页

JAVA
524
字号
    }    /**     * Dumps the language model     */    public void dump() {        for (Iterator i = map.keySet().iterator(); i.hasNext();) {            WordSequence ws = (WordSequence) i.next();            Probability prob = (Probability) map.get(ws);            System.out.println(ws.toString() + " " + prob);        }    }    /**     * Retrieves a string representation of the wordlist, suitable for map     * access     *      * @param wordList     *                the list of words     *      * @return a string representation of the word list     */    private String getRepresentation(List wordList) {        StringBuffer sb = new StringBuffer();        for (Iterator i = wordList.iterator(); i.hasNext();) {            String s = (String) i.next();            sb.append(s);            if (i.hasNext()) {                sb.append("+");            }        }        return sb.toString();    }    /**     * Loads the language model from the given location.     *      * @param format     *                the format of the model     * @param location     *                the URL location of the model     * @param unigramWeight     *                the unigram weight     *      * @throws IOException     *                 if an error occurs while loading     */    private void load(String format, URL location, float unigramWeight,            Dictionary dictionary) throws FileNotFoundException, IOException {        String line;        float logUnigramWeight = logMath.linearToLog(unigramWeight);        float inverseLogUnigramWeight = logMath                .linearToLog(1.0 - unigramWeight);        if (!format.equals("arpa")) {            throw new IOException("Loading of " + format                    + " language models not supported");        }        open(location);        // look for beginning of data        readUntil("\\data\\");        // look for ngram statements        List ngramList = new ArrayList();        while ((line = readLine()) != null) {            if (line.startsWith("ngram")) {                StringTokenizer st = new StringTokenizer(line, " \t\n\r\f=");                if (st.countTokens() != 3) {                    corrupt("corrupt ngram field " + line + " "                            + st.countTokens());                }                st.nextToken();                int index = Integer.parseInt(st.nextToken());                int count = Integer.parseInt(st.nextToken());                ngramList.add(index - 1, new Integer(count));                if (index > maxNGram) {                    maxNGram = index;                }            } else if (line.equals("\\1-grams:")) {                break;            }        }        int numUnigrams = ((Integer) ngramList.get(0)).intValue() - 1;        // -log(x) = log(1/x)        float logUniformProbability = -logMath.linearToLog(numUnigrams);        for (int index = 0; index < ngramList.size(); index++) {            int ngram = index + 1;            int ngramCount = ((Integer) ngramList.get(index)).intValue();            for (int i = 0; i < ngramCount; i++) {                StringTokenizer tok = new StringTokenizer(readLine());                int tokenCount = tok.countTokens();                if (tokenCount != ngram + 1 && tokenCount != ngram + 2) {                    corrupt("Bad format");                }                float log10Prob = Float.parseFloat(tok.nextToken());                float log10Backoff = 0.0f;                // construct the WordSequence for this N-Gram                List wordList = new ArrayList(maxNGram);                for (int j = 0; j < ngram; j++) {                    String word = tok.nextToken().toLowerCase();                    vocabulary.add(word);                    Word wordObject = dictionary.getWord(word);                    if (wordObject == null) {                        wordObject = Word.UNKNOWN;                    }                    wordList.add(wordObject);                }                WordSequence wordSequence = WordSequence                        .getWordSequence(wordList);                if (tok.hasMoreTokens()) {                    log10Backoff = Float.parseFloat(tok.nextToken());                }                float logProb = logMath.log10ToLog(log10Prob);                float logBackoff = logMath.log10ToLog(log10Backoff);                // Apply unigram weights if this is a unigram probability                if (ngram == 1) {                    float p1 = logProb + logUnigramWeight;                    float p2 = logUniformProbability + inverseLogUnigramWeight;                    logProb = logMath.addAsLinear(p1, p2);                    if (false) {                        System.out                                .println("p1 " + p1 + " p2 " + p2 + " luw "                                        + logUnigramWeight + " iluw "                                        + inverseLogUnigramWeight + " lup "                                        + logUniformProbability + " logprog "                                        + logProb);                    }                }                put(wordSequence, logProb, logBackoff);            }            if (index < ngramList.size() - 1) {                String next = "\\" + (ngram + 1) + "-grams:";                readUntil(next);            }        }        readUntil("\\end\\");        close();    }    /**     * Puts the probability into the map     *      * @param wordSequence     *                the tag for the prob.     * @param logProb     *                the probability in log math base     * @param logBackoff     *                the backoff probability in log math base     */    private void put(WordSequence wordSequence, float logProb, float logBackoff) {        if (false) {            System.out.println("Putting " + wordSequence + " p " + logProb                    + " b " + logBackoff);        }        map.put(wordSequence, new Probability(logProb, logBackoff));    }    /**     * Reads the next line from the LM file. Keeps track of line number.     *      * @throws IOException     *                 if an error occurs while reading the input or an EOF is     *                 encountered.     *       */    private String readLine() throws IOException {        String line;        lineNumber++;        line = reader.readLine();        if (line == null) {            corrupt("Premature EOF");        }        return line;    }    /**     * Opens the language model at the given location     *      * @param location     *                the path to the language model     *      * @throws IOException     *                 if an error occurs while opening the file     */    private void open(URL location) throws FileNotFoundException,            IOException {        lineNumber = 0;        fileName = location.toString();        reader = new BufferedReader	    (new InputStreamReader(location.openStream()));    }    /**     * Reads from the input stream until the input matches the given string     *      * @param match     *                the string to match on     *      * @throws IOException     *                 if an error occurs while reading the input or an EOF is     *                 encountered before finding the match     */    private void readUntil(String match) throws IOException {        try {            while (!readLine().equals(match)) {            }        } catch (IOException ioe) {            corrupt("Premature EOF while waiting for " + match);        }    }    /**     * Closes the language model file     *      * @throws IOException     *                 if an error occurs     */    private void close() throws IOException {        reader.close();        reader = null;    }    /**     * Generates a 'corrupt' IO exception     *      * @throws an     *                 IOException with the given string     */    private void corrupt(String why) throws IOException {        throw new IOException("Corrupt Language Model " + fileName                + " at line " + lineNumber + ":" + why);    }}/** * Represents a probability and a backoff probability */class Probability {    float logProbability;    float logBackoff;    /**     * Constructs a probability     *      * @param logProbability     *                the probability     * @param logBackoff     *                the backoff probability     */    Probability(float logProbability, float logBackoff) {        this.logProbability = logProbability;        this.logBackoff = logBackoff;    }    /**     * Returns a string representation of this object     *      * @return the string form of this object     */    public String toString() {        return "Prob: " + logProbability + " " + logBackoff;    }};

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?