📄 align-with-tags
字号:
#!/bin/sh## align-with-tags --# align reference transcript with tags to hypothesized # transcripts, merging the tags into the latter## $Header: /home/srilm/devel/utils/src/RCS/align-with-tags,v 1.3 2004/09/28 20:39:07 stolcke Exp $#usage () { echo "usage: $0 [-r ref -h hyp] [-options...]" >&2 exit 2;}ref=/dev/nullhyp=/dev/nulldictionary=/dev/nullwhile [ $# -gt 0 ]; do case "$1" in -r) ref="$2" shift; shift;; -h) hyp="$2" shift; shift;; -dictionary) dictionary=$2 shift; shift;; -\?) usage;; -*) pass_options="$pass_option $1" shift;; *) break;; esacdoneif [ $# -ge 2 ]; then ref="$1" hyp="$2"elif [ $# -gt 0 ]; then usage;fitmpdict=/tmp/dict$$tmptags=/tmp/tags$$tmprefs=/tmp/refs$$tmphyps=/tmp/hyps$$tmpnbest=/tmp/nbest$$tmpmerge=/tmp/merged$$trap "rm -rf $tmpdict $tmptags $tmprefs $tmphyps $tmpnbest $tmpmerge; exit" 0 1 2 15prepare_text () { gawk -v tag_file=$2 ' BEGIN { tag_list["<default>"] = 1; } function is_tag(x) { return (x ~ /^<.*>$/); } { for (i = 2; i <= NF; i ++) { if (is_tag($i)) { tag_list[$i] = 1; } else { $i = tolower($i); } if (!is_tag($(i - 1)) && !is_tag($i)) { $(i - 1) = $(i - 1) " <default>"; } } if (!is_tag($NF)) { $NF = $NF " <default>"; } print $0; } END { if (tag_file) { for (tag in tag_list) { print tag > tag_file; } } }' $1;}parse_alignment () { gunzip -c -f < $1 | \ gawk -v sentid=$2 'BEGIN { output = sentid; show_refs = 1; } function is_empty(x) { return x == "<default>" || tolower(x) == "*delete*"; } function is_tag(x) { return x ~ /^<.*>$/; } $1 == "align" { if (NF == 4 && $4 == 1) { # matching hyp and ref if (!is_empty($3)) { output = output " " $3; } } else if (NF == 6 && $4 == 1 && $6 == 0) { # mismatched hyp and ref if (is_empty($3)) { if (is_tag($5)) { if (!is_empty($5)) \ output = output " " $5; } else if (showrefs) { output = output " (" $5 ")"; } } else { if (is_empty($5) || !showrefs) { output = output " " $3; } else { output = output " " $3 " (" $5 ")"; } } } else { print "unexpected alignment: " $0 > "/dev/stderr"; } } END { print output; }'}set -e## format hyps and refs for alignment#prepare_text $ref $tmptags > $tmprefsprepare_text $hyp > $tmphyps## add tag pronunciations to the dictionary#if [ $dictionary != /dev/null ]; then gunzip -c -f $dictionary > $tmpdictelse > $tmpdictfigawk '{ print $1, "**TAG**" }' $tmptags >> $tmpdict## do the alignments#mkdir -p $tmpnbest $tmpmergecat $tmphyps | \while read sentid wordsdo echo "0 0 0 $words" > $tmpnbest/$sentid echo $tmpnbest/$sentiddone | \nbest-lattice -nbest-files - \ -use-mesh \ -dictionary $tmpdict \ -keep-noise \ -refs "$tmprefs" \ $pass_options \ -write-dir $tmpmerge | \( last_sentid= while read sentid rest do if [ -n "$last_sentid" ]; then parse_alignment $tmpmerge/$last_sentid.gz $last_sentid fi last_sentid=$sentid done if [ -n "$last_sentid" ]; then parse_alignment $tmpmerge/$last_sentid.gz $last_sentid fi)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -