📄 nistalign.java
字号:
// for (int i = 1; i <= referenceWords.size(); i++) { for (int j = 1; j <= hypothesisWords.size(); j++) { minPenalty = MAX_PENALTY; // First assume that this represents a deletion. // penalty = penaltyTable[i-1][j] + DELETION_PENALTY; if (penalty < minPenalty) { minPenalty = penalty; penaltyTable[i][j] = penalty; backtraceTable[i][j] = DELETION; } // If the words match, we'll assume it's OK. // Otherwise, we assume we have a substitution. // if (referenceWords.get(i-1).equals(hypothesisWords.get(j-1))) { penalty = penaltyTable[i-1][j-1]; if (penalty < minPenalty) { minPenalty = penalty; penaltyTable[i][j] = penalty; backtraceTable[i][j] = OK; } } else { penalty = penaltyTable[i-1][j-1] + SUBSTITUTION_PENALTY; if (penalty < minPenalty) { minPenalty = penalty; penaltyTable[i][j] = penalty; backtraceTable[i][j] = SUBSTITUTION; } } // If you've made it this far, it should be obvious I // have no idea what the heck this code is doing. I'm // just doing a transliteration. // penalty = penaltyTable[i][j-1] + DELETION_PENALTY; if (penalty < minPenalty) { minPenalty = penalty; penaltyTable[i][j] = penalty; backtraceTable[i][j] = INSERTION; } } } return backtraceTable; } /** * Backtraces through the penalty table. This starts at the * "lower right" corner (i.e., the last word of the longer of * the reference vs. hypothesis strings) and works its way * backwards. * * @param backtraceTable created from call to createBacktraceTable * * @return a linked list of Integers representing the backtrace */ LinkedList backtrace(int[][] backtraceTable) { LinkedList list = new LinkedList(); int i = referenceWords.size(); int j = hypothesisWords.size(); while ((i >= 0) && (j >= 0)) { list.add(new Integer(backtraceTable[i][j])); switch (backtraceTable[i][j]) { case OK : i--; j--; break; case SUBSTITUTION : i--; j--; substitutions++; break; case INSERTION : j--; insertions++; break; case DELETION : i--; deletions++; break; } } return list; } /** * Based on the backtrace information, words are aligned as * appropriate with insertions and deletions causing asterisks * to be placed in the word lists. This generates the * alignedReferenceWords and alignedHypothesisWords lists. * * @param backtrace the backtrace list created in backtrace */ void alignWords(LinkedList backtrace) { ListIterator referenceWordsIterator = referenceWords.listIterator(); ListIterator hypothesisWordsIterator = hypothesisWords.listIterator(); String referenceWord; String hypothesisWord; alignedReferenceWords = new LinkedList(); alignedHypothesisWords = new LinkedList(); for (int m = backtrace.size() - 2; m >= 0; m--) { int backtraceEntry = ((Integer) backtrace.get(m)).intValue(); if (backtraceEntry != INSERTION) { referenceWord = (String) referenceWordsIterator.next(); } else { referenceWord = null; } if (backtraceEntry != DELETION) { hypothesisWord = (String) hypothesisWordsIterator.next(); } else { hypothesisWord = null; } switch (backtraceEntry) { case SUBSTITUTION: { referenceWord = referenceWord.toUpperCase(); hypothesisWord = hypothesisWord.toUpperCase(); break; } case INSERTION: { hypothesisWord = hypothesisWord.toUpperCase(); break; } case DELETION: { referenceWord = referenceWord.toUpperCase(); break; } case OK: break; } // Expand the missing words out to be all *'s. // if (referenceWord == null) { referenceWord = STARS.substring(0, hypothesisWord.length()); } if (hypothesisWord == null) { hypothesisWord = STARS.substring(0, referenceWord.length()); } // Fill the words up with spaces so they are the same // length. // if (referenceWord.length() > hypothesisWord.length()) { hypothesisWord = hypothesisWord.concat( SPACES.substring(0, referenceWord.length() - hypothesisWord.length())); } else if (referenceWord.length() < hypothesisWord.length()) { referenceWord = referenceWord.concat( SPACES.substring(0, hypothesisWord.length() - referenceWord.length())); } alignedReferenceWords.add(referenceWord); alignedHypothesisWords.add(hypothesisWord); } } /** * Updates the total counts based on the current alignment. */ void updateTotals() { totalSentences++; if ((substitutions + insertions + deletions) != 0) { totalSentencesWithErrors++; } if (substitutions != 0) { totalSentencesWithSubtitutions++; } if (insertions != 0) { totalSentencesWithInsertions++; } if (deletions != 0) { totalSentencesWithDeletions++; } totalReferenceWords += referenceWords.size(); totalHypothesisWords += hypothesisWords.size(); totalAlignedWords += alignedReferenceWords.size(); totalWordsCorrect += correct; totalSubstitutions += substitutions; totalInsertions += insertions; totalDeletions += deletions; } /** * Turns the numerator/denominator into a percentage. * * @param pattern percentage pattern (ala DecimalFormat) * @param numerator the numerator * @param denominator the denominator * * @return a String that represents the percentage value. */ String toPercentage(String pattern, int numerator, int denominator) { percentageFormat.applyPattern(pattern); return padLeft( 6, percentageFormat.format((double) numerator / (double) denominator)); } /** * Turns the float into a percentage. * * @param pattern percentage pattern (ala DecimalFormat) * @param value the floating point value * * @return a String that represents the percentage value. */ String toPercentage(String pattern, float value) { percentageFormat.applyPattern(pattern); return percentageFormat.format(value); } /** * Turns the integer into a left-padded string. * * @param width the total width of String, including spaces * @param i the integer * * @return a String padded left with spaces */ String padLeft(int width, int i) { return padLeft(width, Integer.toString(i)); } /** * Pads a string to the left with spaces (i.e., prepends spaces to * the string so it fills out the given width). * * @param width the total width of String, including spaces * @param string the String to pad * * @return a String padded left with spaces */ String padLeft(int width, String string) { int len = string.length(); if (len < width) { return SPACES.substring(0,width-len).concat(string); } else { return string; } } /** * Converts the given String or words to a LinkedList. * * @param s the String of words to parse to a LinkedList * * @return a list, one word per item */ LinkedList toList(String s) { LinkedList list = new LinkedList(); StringTokenizer st = new StringTokenizer(s.trim()); while (st.hasMoreTokens()) { String token = st.nextToken().toLowerCase(); list.add(token); } return list; } /** * convert the list of words back to a space separated string * * @param list the list of words * @return a space separated string */ private String toString(LinkedList list) { if (list != null) { StringBuffer sb = new StringBuffer(); ListIterator iterator = list.listIterator(); while (iterator.hasNext()) { sb.append(iterator.next()); if (iterator.hasNext()) { sb.append(" "); } } return sb.toString(); } else { return ""; } } /** * Take two filenames -- the first contains a list of reference * sentences, the second contains a list of hypothesis sentences. * Aligns each pair of sentences and outputs the individual and * total results. */ public static void main(String args[]) { NISTAlign align = new NISTAlign(true, true); BufferedReader referenceFile; BufferedReader hypothesisFile; String reference; String hypothesis; try { referenceFile = new BufferedReader( new InputStreamReader(new FileInputStream(args[0]))); hypothesisFile = new BufferedReader( new InputStreamReader(new FileInputStream(args[1]))); try { while (true) { reference = referenceFile.readLine(); hypothesis = hypothesisFile.readLine(); if ((reference == null) || (hypothesis == null)) { break; } else { align.align(reference, hypothesis); align.printNISTSentenceSummary(); } } } catch (java.io.IOException e) { } align.printNISTTotalSummary(); } catch (Exception e) { System.err.println(e); e.printStackTrace(); System.out.println(); System.out.println("Usage: align <reference file> <hypothesis file>"); System.out.println(); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -