📄 sphinx3loader.java
字号:
// System.out.println("resource: " + path + ", " + getClass()); InputStream inputStream = getClass().getResourceAsStream(path); if (inputStream == null) { throw new IOException("Can't open " + path); } DataInputStream dis = new DataInputStream(new BufferedInputStream( inputStream)); String id = readWord(dis); if (!id.equals("s3")) { throw new IOException("Not proper s3 binary file " + location + path); } String name; while ((name = readWord(dis)) != null) { if (!name.equals("endhdr")) { String value = readWord(dis); props.setProperty(name, value); } else { break; } } int byteOrderMagic = dis.readInt(); if (byteOrderMagic == BYTE_ORDER_MAGIC) { // System.out.println("Not swapping " + path); swap = false; } else if (byteSwap(byteOrderMagic) == BYTE_ORDER_MAGIC) { // System.out.println("SWAPPING " + path); swap = true; } else { throw new IOException("Corrupt S3 file " + location + path); } return dis; } /** * Reads the next word (text separated by whitespace) from the given stream * * @param dis * the input stream * * @return the next word * * @throws IOException * on error */ String readWord(DataInputStream dis) throws IOException { StringBuffer sb = new StringBuffer(); char c; // skip leading whitespace do { c = readChar(dis); } while (Character.isWhitespace(c)); // read the word do { sb.append(c); c = readChar(dis); } while (!Character.isWhitespace(c)); return sb.toString(); } /** * Reads a single char from the stream * * @param dis * the stream to read * @return the next character on the stream * * @throws IOException * if an error occurs */ private char readChar(DataInputStream dis) throws IOException { return (char) dis.readByte(); } /** * swap a 32 bit word * * @param val * the value to swap * * @return the swapped value */ private int byteSwap(int val) { return ((0xff & (val >> 24)) | (0xff00 & (val >> 8)) | (0xff0000 & (val << 8)) | (0xff000000 & (val << 24))); } /** * Read an integer from the input stream, byte-swapping as necessary * * @param dis * the inputstream * * @return an integer value * * @throws IOException * on error */ protected int readInt(DataInputStream dis) throws IOException { if (swap) { return Utilities.readLittleEndianInt(dis); } else { return dis.readInt(); } } /** * Read a float from the input stream, byte-swapping as necessary * * @param dis * the inputstream * * @return a floating pint value * * @throws IOException * on error */ protected float readFloat(DataInputStream dis) throws IOException { float val; if (swap) { val = Utilities.readLittleEndianFloat(dis); } else { val = dis.readFloat(); } return val; } // Do we need the method nonZeroFloor?? /** * If a data point is non-zero and below 'floor' make it equal to floor * (don't floor zero values though). * * @param data * the data to floor * @param floor * the floored value */ protected void nonZeroFloor(float[] data, float floor) { for (int i = 0; i < data.length; i++) { if (data[i] != 0.0 && data[i] < floor) { data[i] = floor; } } } /** * If a data point is below 'floor' make it equal to floor. * * @param data * the data to floor * @param floor * the floored value */ private void floorData(float[] data, float floor) { for (int i = 0; i < data.length; i++) { if (data[i] < floor) { data[i] = floor; } } } /** * Normalize the given data * * @param data * the data to normalize */ protected void normalize(float[] data) { float sum = 0; for (int i = 0; i < data.length; i++) { sum += data[i]; } if (sum != 0.0f) { for (int i = 0; i < data.length; i++) { data[i] = data[i] / sum; } } } /** * Dump the data * * @param name * the name of the data * @param data * the data itself * */ private void dumpData(String name, float[] data) { System.out.println(" ----- " + name + " -----------"); for (int i = 0; i < data.length; i++) { System.out.println(name + " " + i + ": " + data[i]); } } /** * Convert to log math * * @param data * the data to normalize */ // linearToLog returns a float, so zero values in linear scale // should return -Float.MAX_VALUE. protected void convertToLogMath(float[] data) { for (int i = 0; i < data.length; i++) { data[i] = logMath.linearToLog(data[i]); } } /** * Reads the given number of floats from the stream and returns them in an * array of floats * * @param dis * the stream to read data from * @param size * the number of floats to read * * @return an array of size float elements * * @throws IOException * if an exception occurs */ protected float[] readFloatArray(DataInputStream dis, int size) throws IOException { float[] data = new float[size]; for (int i = 0; i < size; i++) { data[i] = readFloat(dis); } return data; } /** * Loads the sphinx3 densityfile, a set of density arrays are created and * placed in the given pool. * * @param useCDUnits * if true, loads also the context dependent units * @param inputStream * the open input stream to use * @param path * the path to a density file * * @return a pool of loaded densities * * @throws FileNotFoundException * if a file cannot be found * @throws IOException * if an error occurs while loading the data */ protected Pool loadHMMPool(boolean useCDUnits, InputStream inputStream, String path) throws FileNotFoundException, IOException { int token_type; int numBase; int numTri; int numStateMap; int numTiedState; int numStatePerHMM; int numContextIndependentTiedState; int numTiedTransitionMatrices; ExtendedStreamTokenizer est = new ExtendedStreamTokenizer (inputStream, '#', false); Pool pool = new Pool(path); logger.fine("Loading HMM file from: " + path); est.expectString(MODEL_VERSION); numBase = est.getInt("numBase"); est.expectString("n_base"); numTri = est.getInt("numTri"); est.expectString("n_tri"); numStateMap = est.getInt("numStateMap"); est.expectString("n_state_map"); numTiedState = est.getInt("numTiedState"); est.expectString("n_tied_state"); numContextIndependentTiedState = est.getInt("numContextIndependentTiedState"); est.expectString("n_tied_ci_state"); numTiedTransitionMatrices = est.getInt("numTiedTransitionMatrices"); est.expectString("n_tied_tmat"); numStatePerHMM = numStateMap/(numTri+numBase); assert numTiedState == mixtureWeightsPool.getFeature(NUM_SENONES, 0); assert numTiedTransitionMatrices == matrixPool.size(); // Load the base phones for (int i = 0; i < numBase; i++) { String name = est.getString(); String left = est.getString(); String right = est.getString(); String position = est.getString(); String attribute = est.getString(); int tmat = est.getInt("tmat"); int[] stid = new int[numStatePerHMM-1]; for (int j=0; j < numStatePerHMM-1; j++) { stid[j] = est.getInt("j"); assert stid[j] >= 0 && stid[j] < numContextIndependentTiedState; } est.expectString("N"); assert left.equals("-"); assert right.equals("-"); assert position.equals("-"); assert tmat < numTiedTransitionMatrices; Unit unit = unitManager.getUnit(name, attribute.equals(FILLER)); contextIndependentUnits.put(unit.getName(), unit); if (logger.isLoggable(Level.FINE)) { logger.fine("Loaded " + unit); } // The first filler if (unit.isFiller() && unit.getName().equals(SILENCE_CIPHONE)) { unit = UnitManager.SILENCE; } float[][] transitionMatrix = (float[][]) matrixPool.get(tmat); SenoneSequence ss = getSenoneSequence(stid); HMM hmm = new SenoneHMM(unit, ss, transitionMatrix, HMMPosition.lookup(position)); hmmManager.put(hmm); } // Load the context dependent phones. If the useCDUnits // property is false, the CD phones will not be created, but // the values still need to be read in from the file. String lastUnitName = ""; Unit lastUnit = null; int[] lastStid = null; SenoneSequence lastSenoneSequence = null; for (int i = 0; i < numTri; i++) { String name = est.getString(); String left = est.getString(); String right = est.getString(); String position = est.getString(); String attribute = est.getString(); int tmat = est.getInt("tmat"); int[] stid = new int[numStatePerHMM-1]; for (int j = 0; j < numStatePerHMM-1; j++) { stid[j] = est.getInt("j"); assert stid[j] >= numContextIndependentTiedState && stid[j] < numTiedState; } est.expectString("N"); assert !left.equals("-"); assert !right.equals("-"); assert !position.equals("-"); assert attribute.equals("n/a"); assert tmat < numTiedTransitionMatrices; if (useCDUnits) { Unit unit = null; String unitName = (name + " " + left + " " + right); if (unitName.equals(lastUnitName)) { unit = lastUnit; } else { Unit[] leftContext = new Unit[1]; leftContext[0] = (Unit) contextIndependentUnits.get(left); Unit[] rightContext = new Unit[1]; rightContext[0] = (Unit) contextIndependentUnits.get(right); Context context = LeftRightContext.get(leftContext, rightContext); unit = unitManager.getUnit(name, false, context); } lastUnitName = unitName; lastUnit = unit; if (logger.isLoggable(Level.FINE)) { logger.fine("Loaded " + unit); } float[][] transitionMatrix = (float[][]) matrixPool.get(tmat); SenoneSequence ss = lastSenoneSequence; if (ss == null || !sameSenoneSequence(stid, lastStid)) { ss = getSenoneSequence(stid); } lastSenoneSequence = ss; lastStid = stid; HMM hmm = new SenoneHMM(unit, ss, transitionMatrix, HMMPosition.lookup(position)); hmmManager.put(hmm); } } est.close(); return pool; } /** * Returns true if the given senone sequence IDs are the same. * * @return true if the given senone sequence IDs are the same, false * otherwise */ protected boolean sameSenoneSequence(int[] ssid1, int[] ssid2) { if (ssid1.length == ssid2.length) { for (int i = 0; i < ssid1.length; i++) { if (ssid1[i] != ssid2[i]) { return false; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -