simplengrammodel.java
来自「It is the Speech recognition software. 」· Java 代码 · 共 524 行 · 第 1/2 页
JAVA
524 行
} /** * Dumps the language model */ public void dump() { for (Iterator i = map.keySet().iterator(); i.hasNext();) { WordSequence ws = (WordSequence) i.next(); Probability prob = (Probability) map.get(ws); System.out.println(ws.toString() + " " + prob); } } /** * Retrieves a string representation of the wordlist, suitable for map * access * * @param wordList * the list of words * * @return a string representation of the word list */ private String getRepresentation(List wordList) { StringBuffer sb = new StringBuffer(); for (Iterator i = wordList.iterator(); i.hasNext();) { String s = (String) i.next(); sb.append(s); if (i.hasNext()) { sb.append("+"); } } return sb.toString(); } /** * Loads the language model from the given location. * * @param format * the format of the model * @param location * the URL location of the model * @param unigramWeight * the unigram weight * * @throws IOException * if an error occurs while loading */ private void load(String format, URL location, float unigramWeight, Dictionary dictionary) throws FileNotFoundException, IOException { String line; float logUnigramWeight = logMath.linearToLog(unigramWeight); float inverseLogUnigramWeight = logMath .linearToLog(1.0 - unigramWeight); if (!format.equals("arpa")) { throw new IOException("Loading of " + format + " language models not supported"); } open(location); // look for beginning of data readUntil("\\data\\"); // look for ngram statements List ngramList = new ArrayList(); while ((line = readLine()) != null) { if (line.startsWith("ngram")) { StringTokenizer st = new StringTokenizer(line, " \t\n\r\f="); if (st.countTokens() != 3) { corrupt("corrupt ngram field " + line + " " + st.countTokens()); } st.nextToken(); int index = Integer.parseInt(st.nextToken()); int count = Integer.parseInt(st.nextToken()); ngramList.add(index - 1, new Integer(count)); if (index > maxNGram) { maxNGram = index; } } else if (line.equals("\\1-grams:")) { break; } } int numUnigrams = ((Integer) ngramList.get(0)).intValue() - 1; // -log(x) = log(1/x) float logUniformProbability = -logMath.linearToLog(numUnigrams); for (int index = 0; index < ngramList.size(); index++) { int ngram = index + 1; int ngramCount = ((Integer) ngramList.get(index)).intValue(); for (int i = 0; i < ngramCount; i++) { StringTokenizer tok = new StringTokenizer(readLine()); int tokenCount = tok.countTokens(); if (tokenCount != ngram + 1 && tokenCount != ngram + 2) { corrupt("Bad format"); } float log10Prob = Float.parseFloat(tok.nextToken()); float log10Backoff = 0.0f; // construct the WordSequence for this N-Gram List wordList = new ArrayList(maxNGram); for (int j = 0; j < ngram; j++) { String word = tok.nextToken().toLowerCase(); vocabulary.add(word); Word wordObject = dictionary.getWord(word); if (wordObject == null) { wordObject = Word.UNKNOWN; } wordList.add(wordObject); } WordSequence wordSequence = WordSequence .getWordSequence(wordList); if (tok.hasMoreTokens()) { log10Backoff = Float.parseFloat(tok.nextToken()); } float logProb = logMath.log10ToLog(log10Prob); float logBackoff = logMath.log10ToLog(log10Backoff); // Apply unigram weights if this is a unigram probability if (ngram == 1) { float p1 = logProb + logUnigramWeight; float p2 = logUniformProbability + inverseLogUnigramWeight; logProb = logMath.addAsLinear(p1, p2); if (false) { System.out .println("p1 " + p1 + " p2 " + p2 + " luw " + logUnigramWeight + " iluw " + inverseLogUnigramWeight + " lup " + logUniformProbability + " logprog " + logProb); } } put(wordSequence, logProb, logBackoff); } if (index < ngramList.size() - 1) { String next = "\\" + (ngram + 1) + "-grams:"; readUntil(next); } } readUntil("\\end\\"); close(); } /** * Puts the probability into the map * * @param wordSequence * the tag for the prob. * @param logProb * the probability in log math base * @param logBackoff * the backoff probability in log math base */ private void put(WordSequence wordSequence, float logProb, float logBackoff) { if (false) { System.out.println("Putting " + wordSequence + " p " + logProb + " b " + logBackoff); } map.put(wordSequence, new Probability(logProb, logBackoff)); } /** * Reads the next line from the LM file. Keeps track of line number. * * @throws IOException * if an error occurs while reading the input or an EOF is * encountered. * */ private String readLine() throws IOException { String line; lineNumber++; line = reader.readLine(); if (line == null) { corrupt("Premature EOF"); } return line; } /** * Opens the language model at the given location * * @param location * the path to the language model * * @throws IOException * if an error occurs while opening the file */ private void open(URL location) throws FileNotFoundException, IOException { lineNumber = 0; fileName = location.toString(); reader = new BufferedReader (new InputStreamReader(location.openStream())); } /** * Reads from the input stream until the input matches the given string * * @param match * the string to match on * * @throws IOException * if an error occurs while reading the input or an EOF is * encountered before finding the match */ private void readUntil(String match) throws IOException { try { while (!readLine().equals(match)) { } } catch (IOException ioe) { corrupt("Premature EOF while waiting for " + match); } } /** * Closes the language model file * * @throws IOException * if an error occurs */ private void close() throws IOException { reader.close(); reader = null; } /** * Generates a 'corrupt' IO exception * * @throws an * IOException with the given string */ private void corrupt(String why) throws IOException { throw new IOException("Corrupt Language Model " + fileName + " at line " + lineNumber + ":" + why); }}/** * Represents a probability and a backoff probability */class Probability { float logProbability; float logBackoff; /** * Constructs a probability * * @param logProbability * the probability * @param logBackoff * the backoff probability */ Probability(float logProbability, float logBackoff) { this.logProbability = logProbability; this.logBackoff = logBackoff; } /** * Returns a string representation of this object * * @return the string form of this object */ public String toString() { return "Prob: " + logProbability + " " + logBackoff; }};
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?