📄 nbest-rover
字号:
#!/bin/sh## nbest-rover --# Combine multiple nbest lists ROVER-style## usage: nbest-rover SENTIDS CONTROL-FILE [POSTERIORS]## where SENTIDS is list of sentence ids (filenames of nbest lists)# if SENTIDS is "-" the list is inferred from the contents of# the first N-best directory# CONTROL-FILE describes the nbest list sets to be processed# POSTERIORS is an an optional file to which word posterior probabilities# are written.## The format for CONTROL-FILE is## DIR1 LMW1 WTW1 W1 [ N1 [ S1 ] ]# DIR2 LMW2 WTW2 W2 [ N2 [ S2 ] ]# ...## Each DIRi names a directory in which nbest lists are to be found.# LMWi and WTWi are the rescoring weights to be used for the corresponding# directory. Wi is the weight to be given to the posteriors compute from# the respective list. Ni are optional limits on the number N-best hyps used.# Si are optional posterior scaling parameters.## $Header: /home/srilm/devel/utils/src/RCS/nbest-rover,v 1.28 2004/07/13 03:36:10 stolcke Exp $#if [ $# -lt 2 ]; then echo "usage: $0 [ sentid-list | - ] control-file [posteriors [nbest-lattice-options]]" >&2 exit 2fisentids=$1control=$2shift; shiftamw=1default_lmw=8default_wtw=0default_scale=0default_max_nbest=0default_weight=1mesh_option=-use-meshif [ $# -gt 0 ]; then posteriors=$1 shiftelse posteriors=/dev/nullfilattice_dir=posteriors_dir=nbest_dir=ref_posteriors=filter_script=cat# collect remaining options (mostly to pass them to nbest-lattice)while [ $# -gt 0 ]; do case "$1" in -amw) amw=$2; shift; shift ;; -write-dir) lattice_dir=$2 options="$options $1 $2" shift; shift ;; -write-nbest-dir) nbest_dir=$2 options="$options $1 $2" shift; shift ;; -write-nbest-posteriors) posteriors_dir=$2; shift; shift ;; -write-ref-posteriors) ref_posteriors=$2; options="$options -record-hyps" shift; shift ;; -no-mesh) mesh_option= ; shift ;; -wer) # -wer implies -no-mesh mesh_option= ; options="$options $1" shift ;; -filter) filter_script="$2"; shift; shift ;; *) options="$options $1" shift ;; esacdone> $posteriorstmpdir=${TMPDIR-/tmp}tmp_post=$tmpdir/post$$tmp_sentids=$tmpdir/sentids$$tmp_nbest_dir=$tmpdir/nbest.dir$$tmp_post_dir=$tmpdir/post.dir$$tmp_lat_dir=$tmpdir/lat.dir$$trap "rm -rf $tmp_post $tmp_sentids $tmp_nbest_dir $tmp_post_dir $tmp_lat_dir; exit" 0 1 2 15mkdir -p $tmp_nbest_dir $tmp_post_dir $tmp_lat_dir## make sentid list if none was specified#if [ "$sentids" = "-" ]; then gawk '{ print $1; exit }' $control | xargs ls | \ sed -e 's,.*/,,' -e 's,\.gz$,,' -e 's,\.score$,,' | \ sort > $tmp_sentidselse sort +0 -1 $sentids > $tmp_sentidsfiset -e## create lattice output directory if needed#if [ -n "$lattice_dir" ]; then mkdir -p "$lattice_dir"elif [ -n "$ref_posteriors" ]; then lattice_dir=$tmp_lat_dir options="$options -write-dir $lattice_dir"fiif [ -n "$nbest_dir" ]; then mkdir -p "$nbest_dir"fiif [ -n "$posteriors_dir" ]; then mkdir -p "$posteriors_dir"elif [ -n "$ref_posteriors" ]; then posteriors_dir=$tmp_post_dirficat $tmp_sentids | \while read sentid refwordsdo extra_weights= extra_scores= noheader=0 nbest_tag=1 if [ -n "$posteriors_dir" ]; then posteriors_file=$posteriors_dir/$sentid > $posteriors_file else posteriors_file= fi cat $control | \ while read dir lmw wtw weight max_nbest scale rest do if [ "$wtw" = "+" ]; then if [ -f $dir/$sentid.gz ]; then extra_scores="$extra_scores $dir/$sentid.gz" elif [ -f $dir/$sentid ]; then extra_scores="$extra_scores $dir/$sentid" else echo "$dir/$sentid" is missing >&2 continue fi extra_weights="$extra_weights $lmw" continue else if [ -f $dir/$sentid ]; then nbest_file=$dir/$sentid elif [ -f $dir/$sentid.gz ]; then nbest_file=$dir/$sentid.gz elif [ -f $dir/$sentid.score.gz ]; then nbest_file=$dir/$sentid.score.gz else echo "$dir/$sentid.score.gz is missing" >&2 extra_weights= extra_scores= continue fi if [ -n "$extra_weights" -o "$amw" != 1 ]; then combine-acoustic-scores \ -v "weights=$amw $extra_weights" \ -v max_nbest=${max_nbest:-$default_max_nbest} \ $nbest_file $extra_scores else gunzip -c -f $nbest_file fi | \ nbest-posteriors noheader=$noheader \ lmw=${lmw:-$default_lmw} \ wtw=${wtw:-$default_wtw} \ weight=${weight:-$default_weight} \ max_nbest=${max_nbest:-$default_max_nbest} \ postscale=${scale:-$default_scale} \ nbest_tag=$nbest_tag \ output_posteriors=$posteriors_file extra_weights= extra_scores= noheader=1 nbest_tag=`expr $nbest_tag + 1` fi done | \ eval "$filter_script" \ > $tmp_nbest_dir/$sentid if [ -n "$posteriors_file" ]; then gzip -f $posteriors_file fi echo $tmp_nbest_dir/$sentid done | \nbest-lattice -nbest-files - \ $mesh_option \ -rescore-lmw 0 -rescore-wtw 0 \ -posterior-amw 0 -posterior-lmw 0 -posterior-wtw 0 \ -debug 2 $options 2>$tmp_post | \while read sentid hyp; do # delete tmp nbest lists to avoid huge data accumulation rm -f $tmp_nbest_dir/$sentid echo "$sentid $hyp"doneif [ -n "$ref_posteriors" ]; then > $ref_posteriors cat $tmp_sentids | \ while read sentid refwords do gunzip -c $lattice_dir/$sentid.gz | \ find-reference-posteriors sentid=$sentid \ posteriors_file=$posteriors_dir/$sentid.gz >> $ref_posteriors donefi# extract posteriors to file; output error messages; ignore othersgawk '$2 == "post" { $2 = ""; print; next; } $2 == "err" { next; } { print > "/dev/stderr"; }' $tmp_post > $posteriors
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -