📄 nistalign.java
字号:
/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */package edu.cmu.sphinx.util;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.InputStreamReader;import java.text.DecimalFormat;import java.util.LinkedList;import java.util.ListIterator;import java.util.StringTokenizer;/** * Implements a portion of the NIST align/scoring algorithm to compare * a reference string to a hypothesis string. It only keeps track of * substitutions, insertions, and deletions. */public class NISTAlign { /* Constants that help with the align. The following are * used in the backtrace table and backtrace list. */ final static int OK = 0; final static int SUBSTITUTION = 1; final static int INSERTION = 2; final static int DELETION = 3; /* Constants that help with the align. The following are * used to create the penalty table. */ final static int MAX_PENALTY = 1000000; final static int SUBSTITUTION_PENALTY = 100; final static int INSERTION_PENALTY = 75; final static int DELETION_PENALTY = 75; /* Used for padding out aligned strings. */ final static String STARS = "********************************************"; final static String SPACES = " "; final static String HRULE = "============================================" + "================================"; /** * Totals over the life of this class. These can be reset to 0 * with a call to resetTotals. */ private int totalSentences; private int totalSentencesWithErrors; private int totalSentencesWithSubtitutions; private int totalSentencesWithInsertions; private int totalSentencesWithDeletions; private int totalReferenceWords; private int totalHypothesisWords; private int totalAlignedWords; private int totalWordsCorrect; private int totalSubstitutions; private int totalInsertions; private int totalDeletions; /** * Error values for one call to 'align' */ private int substitutions; private int insertions; private int deletions; private int correct; /** * The raw reference string. Updated with each call to 'align'. */ private String rawReference; /** * The reference annotation; typically the name of the * audio file for the reference string. This is an optional * part of the rawReference string. If it is included, it * is appended to the end of the string in parentheses. * Updated with each call to 'align'. */ private String referenceAnnotation; /** * Ordered list of words from rawReference after the annotation * has been removed. Updated with each call to 'align'. */ private LinkedList referenceWords; /** * Aligned list of words from rawReference. Created in * alignWords. Updated with each call to 'align'. */ private LinkedList alignedReferenceWords; /** * The raw hypothesis string. Updated with each call to * 'align'. */ private String rawHypothesis; /** * Ordered list of words from rawHypothesis after the annotation * has been removed. Updated with each call to 'align'. */ private LinkedList hypothesisWords; /** * Aligned list of words from rawHypothesis. Created in * alignWords. Updated with each call to 'align'. */ private LinkedList alignedHypothesisWords; /** * Helpers to create percentage strings. */ static DecimalFormat percentageFormat = new DecimalFormat("##0.0%"); private boolean showResults; private boolean showAlignedResults; /** * Creates a new NISTAlign object. */ public NISTAlign(boolean showResults, boolean showAlignedResults) { this.showResults = showResults; this.showAlignedResults = showAlignedResults; resetTotals(); } /** * Sets whether results are displayed * * @param showResults true if the results should be * displayed */ public void setShowResults(boolean showResults) { this.showResults = showResults; } /** * Sets whether aligned results are displayed * * @param showAlignedResults true if the aligned results should be * displayed */ public void setShowAlignedResults(boolean showAlignedResults) { this.showAlignedResults = showAlignedResults; } /** * Reset the total insertions, deletions, and substitutions * counts for this class. */ public void resetTotals() { totalSentences = 0; totalSentencesWithErrors = 0; totalSentencesWithSubtitutions = 0; totalSentencesWithInsertions = 0; totalSentencesWithDeletions = 0; totalReferenceWords = 0; totalHypothesisWords = 0; totalAlignedWords = 0; totalWordsCorrect = 0; totalSubstitutions = 0; totalInsertions = 0; totalDeletions = 0; } /** * Performs the NIST alignment on the reference and hypothesis * strings. This has the side effect of updating nearly all * the fields of this class. * * @param reference the reference string * @param hypothesis the hypothesis string * * @return true if the reference and hypothesis match */ public boolean align(String reference, String hypothesis) { int annotationIndex; // Save the original strings for future reference. // rawReference = reference; rawHypothesis = hypothesis; // Strip the annotation off the reference string and // save it. // annotationIndex = rawReference.indexOf('('); if (annotationIndex != -1) { referenceAnnotation = rawReference.substring(annotationIndex); referenceWords = toList(rawReference.substring(0,annotationIndex)); } else { referenceAnnotation = null; referenceWords = toList(rawReference); } // Strip the annotation off the hypothesis string. // If one wanted to be anal retentive, they might compare // the hypothesis annotation to the reference annotation, // but I'm not quite that obsessive. // annotationIndex = rawHypothesis.indexOf('('); if (annotationIndex != -1) { hypothesisWords = toList( rawHypothesis.substring(0, annotationIndex)); } else { hypothesisWords = toList(rawHypothesis); } // Reset the counts for this sentence. // substitutions = 0; insertions = 0; deletions = 0; // Turn the list of reference and hypothesis words into two // aligned lists of strings. This has the side effect of // creating alignedReferenceWords and alignedHypothesisWords. // alignWords(backtrace(createBacktraceTable(referenceWords, hypothesisWords))); // Compute the number of correct words in the hypothesis. // correct = alignedReferenceWords.size() - (insertions + deletions + substitutions); // Update the totals that are kept over the lifetime of this // class. // updateTotals(); return (insertions + deletions + substitutions) == 0; } /** * Returns the reference string. This string will be filtered * (all spurious whitespace removed and annotation removed) and * set to all lower case. * * @return the reference string */ public String getReference() { return toString(referenceWords); } /** * Returns the hypothesis string. This string will be filtered * (all spurious whitespace removed and annotation removed) and * set to all lower case. * * @return the hypothesis string */ public String getHypothesis() { return toString(hypothesisWords); } /** * Returns the aligned reference string. * * @return the aligned reference string */ public String getAlignedReference() { return toString(alignedReferenceWords); } /** * Returns the aligned hypothesis string. * * @return the aligned hypothesis string */ public String getAlignedHypothesis() { return toString(alignedHypothesisWords); } /** * Gets the total number of word errors for all calls to align. * * @return the total number of word errors for all calls to align */ public int getTotalWordErrors() { return totalSubstitutions + totalInsertions + totalDeletions; } /** * Returns the total word accuracy. * * @return the accuracy between 0.0 and 1.0 */ public float getTotalWordAccuracy() { if (totalReferenceWords == 0) { return 0; } else { return ((float) totalWordsCorrect) / ((float) totalReferenceWords); } } /** * Returns the total word accuracy. * * @return the accuracy between 0.0 and 1.0 */ public float getTotalWordErrorRate() { if (totalReferenceWords == 0) { return 0; } else { return ((float) getTotalWordErrors())
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -