📄 compute-sclite
字号:
#!/bin/sh## compute-sclite --# compute word error rate from a sentid hyp file and a sentid reference# file, using the NIST 'sclite' program## $Header: /home/srilm/devel/utils/src/RCS/compute-sclite,v 1.29 2005/05/08 21:18:50 stolcke Exp $## enforce proper sorting orderLC_COLLATE=Cexport LC_COLLATEreject="@reject@"subsets=remove_periods=if [ $# -lt 2 ]; then echo "usage: $0 [-v] -h hyps -r refs [-S id-subset] [-M|-multiwords] [-noperiods] [-g glm-file] [sclite-options ...]" >&2 echo " or $0 hyps refs" >&2 exit 2elif [ $# -eq 2 ]; then # old syntax hyps=${1} refs=${2}else # parse arguments while [ $# -gt 0 ]; do case "$1" in -v) verbose=1 ;; -r) refs=$2; shift ;; -h) hyps="$hyps $2" name=`basename $2` shift ;; -S) subsets="$subsets $2"; shift ;; -M|-multiwords) multiwords=1 ;; -noperiods) remove_periods=1 ;; -H) remove_hesitations=1 ;; -R) reject="<>" ;; -g) glmfile=$2; shift ;; -s) case_sensitive=1 ;; *) options="$options $1" ;; esac shift donefiif [ -n "$case_sensitive" ]; then filter_options="-s"; options="$options -s";fi sentids=/tmp/ce.sentids$$speakers=/tmp/ce.speakers$$sortedrefs=/tmp/ce.refs$$sortedhyps=/tmp/ce.hyps$$ignorehyps=/tmp/ce.ign$$trap '/bin/rm -f $sentids $speakers $sortedrefs $sortedhyps $ignorehyps' \ 0 1 2 13 15multijoin () { if [ $# -eq 1 ]; then cat $1 else join $1 $2 | { shift; shift; multijoin - "$@"; } fi}## extract and sort sentids from hyps#case "$hyps" in *.ctm) cat $hyps | \ gawk '!/^;;/ && $7 != "non-lex" && $7 != "fp" { print $1 "_" $2 }' ;;*) cat $hyps | gawk '{ print $1 }' ;;esac | \sort | \multijoin - $subsets > $sentids## extract and sort refs for these sentids#case "$refs" in*.stm) # NIST scoring: sed 's,\([-_][ABab]\)[-_].*,\1,' $sentids | uniq | \ tr '[A-Z]' '[a-z]' > $speakers # filter out speakers not occurring in hyp file gawk '!/^;;/ { print tolower($1 "_" $2), $0 }' $refs | \ sort +0 -1 +4n -5 | \ join - $speakers | \ gawk '{ $1 = ""; if ($7 ~ /^<.*>$/) $7 = "<>"; print }' | \ if [ -n "$glmfile" ]; then gawk '{ gsub("-","_",$1); gsub("-","_",$3); print }' | \ csrfilt.sh $filter_options -i stm -dh $glmfile else cat fi > $sortedrefs ;;*.stm.filt) # NIST scoring with pre-filtered references sed 's,\([-_][ABab]\)[-_].*,\1,' $sentids | uniq | \ tr '[A-Z]' '[a-z]' > $speakers # filter out speakers not occurring in hyp file gawk '!/^;;/ { print tolower($1 "_" $2), $0 }' $refs | \ sort +0 -1 +4n -5 | \ join - $speakers | \ gawk '{ $1 = ""; if ($7 ~ /^<.*>$/) $7 = "<>"; print }' | \ if [ -n "$glmfile" ]; then gawk '{ gsub("-","_",$1); gsub("-","_",$3); print }' else cat fi > $sortedrefs ;;*) sort "$refs" | join - $sentids | \ gawk '{ if (multiwords) for (i = 2; i <= NF; i++) \ gsub("_", " ", $i); print }'\ multiwords=$multiwords | \ sed -e 's,\[[^]]*\],,g' | \ sentid-to-sclite \ > $sortedrefs # find segments to ignore gawk 'NF == 2 && tolower($2) == "ignore_time_segment_in_scoring" \ { print $1 }' < $refs | \ sort > $ignorehyps ;;esac## sort and condition hyps#case "$refs" in*.stm|*.stm.filt) # NIST scoring # sclite will handle ignored segments case "$hyps" in *.ctm) cat $hyps | gawk '!/^;;/ { print tolower($1 "_" $2), $0 }' | \ join - $speakers | \ gawk '{ $1 = ""; print }' ;; *) sort +0 -1 $hyps | join - $sentids | sentid-to-ctm ;; esac | \ gawk '{ # handle new-style CTM format (convert it to old format) if (NF >= 7) { if ($7 != "lex") next; else $7 = $8 = ""; } if (remove_periods) gsub("[.]", "", $5); print; }' remove_periods=$remove_periods | \ if [ -n "$glmfile" ]; then gawk '{ gsub("-","_",$1); print }' | \ csrfilt.sh $filter_options -i ctm -dh $glmfile | \ if [ -n "$remove_hesitations" ]; then grep -v '%HESITATION' else cat fi else cat fi > $sortedhyps ;;*) # we have to remove ignored segments ourselves sort +0 -1 $hyps | join - $sentids | join -v 1 - $ignorehyps | \ gawk '{ if (multiwords) for (i = 2; i <= NF; i++) gsub("_", " ", $i); if (remove_periods) for (i = 2; i <= NF; i++) gsub("[.]", "", $i); print }'\ remove_periods=$remove_periods multiwords=$multiwords | \ sed -e 's,\[[^]]*\],,g' \ -e 's,<[^>]*>,,g' \ -e "s,$reject,,g" \ -e 's,-pau-,,g' | \ sentid-to-sclite \ > $sortedhyps ;;esac[ "$verbose" ] && set -xcase "$refs" in*.stm|*.stm.filt) # NIST scoring sclite -f 0 -O . -n $name \ -h $sortedhyps ctm $name -r $sortedrefs stm \ -D $options ;;*) sclite -f 0 -O . -n $name \ -h $sortedhyps trn $name -r $sortedrefs trn \ -i swb $options ;;esac
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -