📄 nistalign.java

📁 It is the Speech recognition software. It is platform independent. To execute the source code,
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* * Copyright 1999-2002 Carnegie Mellon University.   * Portions Copyright 2002 Sun Microsystems, Inc.   * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved.  Use is subject to license terms. *  * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL  * WARRANTIES. * */package edu.cmu.sphinx.util;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.InputStreamReader;import java.text.DecimalFormat;import java.util.LinkedList;import java.util.ListIterator;import java.util.StringTokenizer;/** * Implements a portion of the NIST align/scoring algorithm to compare * a reference string to a hypothesis string.  It only keeps track of * substitutions, insertions, and deletions. */public class NISTAlign {    /* Constants that help with the align.  The following are     * used in the backtrace table and backtrace list.     */    final static int OK = 0;    final static int SUBSTITUTION = 1;    final static int INSERTION = 2;    final static int DELETION = 3;    /* Constants that help with the align.  The following are     * used to create the penalty table.     */    final static int MAX_PENALTY = 1000000;    final static int SUBSTITUTION_PENALTY = 100;    final static int INSERTION_PENALTY = 75;    final static int DELETION_PENALTY = 75;    /* Used for padding out aligned strings.     */    final static String STARS  =        "********************************************";    final static String SPACES =        "                                            ";    final static String HRULE =        "============================================"        + "================================";        /**     * Totals over the life of this class.  These can be reset to 0     * with a call to resetTotals.     */    private int totalSentences;    private int totalSentencesWithErrors;    private int totalSentencesWithSubtitutions;    private int totalSentencesWithInsertions;    private int totalSentencesWithDeletions;    private int totalReferenceWords;    private int totalHypothesisWords;    private int totalAlignedWords;    private int totalWordsCorrect;    private int totalSubstitutions;    private int totalInsertions;    private int totalDeletions;        /**     * Error values for one call to 'align'     */    private int substitutions;    private int insertions;    private int deletions;    private int correct;    /**     * The raw reference string.  Updated with each call to 'align'.     */    private String rawReference;    /**     * The reference annotation; typically the name of the     * audio file for the reference string.  This is an optional     * part of the rawReference string.  If it is included, it     * is appended to the end of the string in parentheses.     * Updated with each call to 'align'.     */    private String referenceAnnotation;        /**     * Ordered list of words from rawReference after the annotation     * has been removed.  Updated with each call to 'align'.     */    private LinkedList referenceWords;    /**     * Aligned list of words from rawReference.  Created in     * alignWords.  Updated with each call to 'align'.     */    private LinkedList alignedReferenceWords;    /**     * The raw hypothesis string.  Updated with each call to     * 'align'.     */    private String rawHypothesis;    /**     * Ordered list of words from rawHypothesis after the annotation     * has been removed.  Updated with each call to 'align'.     */    private LinkedList hypothesisWords;    /**     * Aligned list of words from rawHypothesis.  Created in     * alignWords.  Updated with each call to 'align'.     */    private LinkedList alignedHypothesisWords;    /**     * Helpers to create percentage strings.     */    static DecimalFormat percentageFormat = new DecimalFormat("##0.0%");            private boolean showResults;    private boolean showAlignedResults;    /**     * Creates a new NISTAlign object.     */    public NISTAlign(boolean showResults, boolean showAlignedResults) {        this.showResults = showResults;        this.showAlignedResults = showAlignedResults;        resetTotals();    }    /**     * Sets whether results are displayed     *     * @param showResults true if the results should be     * displayed     */    public void setShowResults(boolean showResults) {        this.showResults = showResults;    }    /**     * Sets whether aligned results are displayed     *     * @param showAlignedResults true if the aligned results should be     * displayed     */    public void setShowAlignedResults(boolean showAlignedResults) {        this.showAlignedResults = showAlignedResults;    }    /**     * Reset the total insertions, deletions, and substitutions     * counts for this class.     */     public void resetTotals() {        totalSentences = 0;        totalSentencesWithErrors = 0;        totalSentencesWithSubtitutions = 0;        totalSentencesWithInsertions = 0;        totalSentencesWithDeletions = 0;        totalReferenceWords = 0;        totalHypothesisWords = 0;        totalAlignedWords = 0;        totalWordsCorrect = 0;        totalSubstitutions = 0;        totalInsertions = 0;        totalDeletions = 0;    }        /**     * Performs the NIST alignment on the reference and hypothesis     * strings.  This has the side effect of updating nearly all     * the fields of this class.     *     * @param reference the reference string     * @param hypothesis the hypothesis string     *     * @return true if the reference and hypothesis match     */    public boolean align(String reference, String hypothesis) {        int annotationIndex;        // Save the original strings for future reference.        //        rawReference = reference;        rawHypothesis = hypothesis;        // Strip the annotation off the reference string and        // save it.        //        annotationIndex = rawReference.indexOf('(');        if (annotationIndex != -1) {            referenceAnnotation = rawReference.substring(annotationIndex);            referenceWords = toList(rawReference.substring(0,annotationIndex));        } else {            referenceAnnotation = null;            referenceWords = toList(rawReference);        }        // Strip the annotation off the hypothesis string.        // If one wanted to be anal retentive, they might compare        // the hypothesis annotation to the reference annotation,        // but I'm not quite that obsessive.        //        annotationIndex = rawHypothesis.indexOf('(');        if (annotationIndex != -1) {            hypothesisWords = toList(                rawHypothesis.substring(0, annotationIndex));        } else {            hypothesisWords = toList(rawHypothesis);        }        // Reset the counts for this sentence.        //        substitutions = 0;        insertions = 0;        deletions = 0;        // Turn the list of reference and hypothesis words into two        // aligned lists of strings.  This has the side effect of        // creating alignedReferenceWords and alignedHypothesisWords.        //	alignWords(backtrace(createBacktraceTable(referenceWords,                                                  hypothesisWords)));        // Compute the number of correct words in the hypothesis.        //        correct = alignedReferenceWords.size()            - (insertions + deletions + substitutions);        // Update the totals that are kept over the lifetime of this        // class.        //        updateTotals();        return (insertions + deletions + substitutions) == 0;    }        /**     * Returns the reference string.  This string will be filtered     * (all spurious whitespace removed and annotation removed) and     * set to all lower case.     *     * @return the reference string     */    public String getReference() {        return toString(referenceWords);    }    /**     * Returns the hypothesis string.  This string will be filtered     * (all spurious whitespace removed and annotation removed) and     * set to all lower case.     *     * @return the hypothesis string     */    public String getHypothesis() {        return toString(hypothesisWords);    }    /**     * Returns the aligned reference string.      *     * @return the aligned reference string     */    public String getAlignedReference() {        return toString(alignedReferenceWords);    }    /**     * Returns the aligned hypothesis string.     *     * @return the aligned hypothesis string     */    public String getAlignedHypothesis() {        return toString(alignedHypothesisWords);    }    /**     * Gets the total number of word errors for all calls to align.     *     * @return the total number of word errors for all calls to align     */    public int getTotalWordErrors() {        return totalSubstitutions + totalInsertions + totalDeletions;    }        /**     * Returns the total word accuracy.     *     * @return the accuracy between 0.0 and 1.0     */    public float getTotalWordAccuracy() {        if (totalReferenceWords == 0) {            return 0;        } else {            return ((float) totalWordsCorrect) / ((float) totalReferenceWords);        }    }    /**     * Returns the total word accuracy.     *     * @return the accuracy between 0.0 and 1.0     */    public float getTotalWordErrorRate() {        if (totalReferenceWords == 0) {            return 0;        } else {            return ((float) getTotalWordErrors())
12 3 下一页
💿 文件大小 33845 K
👤 上传用户 wy50094777
📂 所属分类 Java编程
🏷️ 相关标签

#independent #recognition #the #platform
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -