⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 align-with-tags

📁 这是一款很好用的工具包
💻
字号:
#!/bin/sh## align-with-tags --#	align reference transcript with tags to hypothesized #	transcripts, merging the tags into the latter## $Header: /home/srilm/devel/utils/src/RCS/align-with-tags,v 1.3 2004/09/28 20:39:07 stolcke Exp $#usage () {	echo "usage: $0 [-r ref -h hyp] [-options...]" >&2	exit 2;}ref=/dev/nullhyp=/dev/nulldictionary=/dev/nullwhile [ $# -gt 0 ]; do	case "$1" in	-r)	ref="$2"		shift; shift;;	-h)	hyp="$2"		shift; shift;;	-dictionary)		dictionary=$2		shift; shift;;	-\?)	usage;;	-*)	pass_options="$pass_option $1"		shift;;	*)	break;;	esacdoneif [ $# -ge 2 ]; then	ref="$1"	hyp="$2"elif [ $# -gt 0 ]; then	usage;fitmpdict=/tmp/dict$$tmptags=/tmp/tags$$tmprefs=/tmp/refs$$tmphyps=/tmp/hyps$$tmpnbest=/tmp/nbest$$tmpmerge=/tmp/merged$$trap "rm -rf $tmpdict $tmptags $tmprefs $tmphyps $tmpnbest $tmpmerge; exit" 0 1 2 15prepare_text () {	gawk -v tag_file=$2 '	BEGIN {		tag_list["<default>"] = 1;	}	function is_tag(x) {		return (x ~ /^<.*>$/);	}	{		for (i = 2; i <= NF; i ++) {			if (is_tag($i)) {				tag_list[$i] = 1;			} else {				$i = tolower($i);			}			if (!is_tag($(i - 1)) && !is_tag($i)) {				$(i - 1) = $(i - 1) " <default>";			}		}		if (!is_tag($NF)) {			$NF = $NF " <default>";		}		print $0;	}	END {		if (tag_file) {			for (tag in tag_list) {				print tag > tag_file;			}		}	}' $1;}parse_alignment () {	gunzip -c -f < $1 | \	gawk -v sentid=$2 'BEGIN {		output = sentid;		show_refs = 1;	}	function is_empty(x) {		return x == "<default>" || tolower(x) == "*delete*";	}	function is_tag(x) {		return x ~ /^<.*>$/;	}	$1 == "align" {		if (NF == 4 && $4 == 1) {			# matching hyp and ref			if (!is_empty($3)) {				output = output " " $3;			}		} else if (NF == 6 && $4 == 1 && $6 == 0) {			# mismatched hyp and ref			if (is_empty($3)) {				if (is_tag($5)) {					if (!is_empty($5)) \						output = output " " $5;				} else if (showrefs) {					output = output " (" $5 ")";				}			} else {				if (is_empty($5) || !showrefs) {					output = output " " $3;				} else {					output = output " " $3 " (" $5 ")";				}			}		} else  {			print "unexpected alignment: " $0 > "/dev/stderr";		}	}	END {		print output;	}'}set -e## format hyps and refs for alignment#prepare_text $ref $tmptags > $tmprefsprepare_text $hyp > $tmphyps## add tag pronunciations to the dictionary#if [ $dictionary != /dev/null ]; then 	gunzip -c -f $dictionary > $tmpdictelse	> $tmpdictfigawk '{ print $1, "**TAG**" }' $tmptags >> $tmpdict## do the alignments#mkdir -p $tmpnbest $tmpmergecat $tmphyps | \while read sentid wordsdo	echo "0 0 0 $words" > $tmpnbest/$sentid	echo $tmpnbest/$sentiddone | \nbest-lattice -nbest-files - \	-use-mesh \	-dictionary $tmpdict \	-keep-noise \	-refs "$tmprefs" \	$pass_options \	-write-dir $tmpmerge | \(	last_sentid=	while read sentid rest	do		if [ -n "$last_sentid" ]; then			parse_alignment $tmpmerge/$last_sentid.gz $last_sentid		fi		last_sentid=$sentid	done	if [ -n "$last_sentid" ]; then		parse_alignment $tmpmerge/$last_sentid.gz $last_sentid	fi)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -