⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wlat-to-pfsg.gawk

📁 这是一款很好用的工具包
💻 GAWK
字号:
#!/usr/local/bin/gawk -f## wlat-to-pfsg --#	Create a Decipher PFSG from a nbest-lattice(1) word lattice## usage: wlat-to-pfsg word-lattice> pfsg## $Header: /home/srilm/devel/utils/src/RCS/wlat-to-pfsg.gawk,v 1.5 2004/11/02 02:00:35 stolcke Exp $############################################ Output format specific code#BEGIN {	logscale = 10000.5;	round = 0.5;	null = "NULL";	tmpfile = "tmp.pfsg.trans";}function rint(x) {	if (x < 0) {	    return int(x - round);	} else {	    return int(x + round);	}}function scale_log(x) {	return rint(x * logscale);}function start_grammar(name) {	num_trans = 0;	num_nodes = 0;	return;}function end_grammar(name) {	printf "%d pfsg nodes\n", num_nodes >> "/dev/stderr";	printf "%d pfsg transitions\n", num_trans >> "/dev/stderr";	print "name " name;	printf "nodes %s", num_nodes;	for (i = 0; i < num_nodes; i ++) {	    if (node_string[i] == "") {		print "warning: node word " i " is undefined" >> "/dev/stderr";		node_string[i] = null;	    }	    printf " %s", node_string[i];	}	printf "\n";		print "initial " initial_node;	print "final " final_node;	print "transitions " num_trans;	fflush();	close(tmpfile);	system("/bin/cat " tmpfile "; /bin/rm -f " tmpfile);}function set_initial(node) {	initial_node = node;}function set_final(node) {	final_node = node;}function add_node(node, word) {	if (node >= num_nodes) {		num_nodes = node + 1;	}	node_string[node] = word;}function add_trans(from, to, prob) {	num_trans ++;	print from, to, scale_log(prob) > tmpfile;}########################################### Word lattice parsing#BEGIN {	grammar_name = "PFSG";}NF == 0 {	next;}$1 == "name" {	grammar_name = $2;	next;}## nbest-lattice output (without -use-mesh)#$1 == "version" {	if ($2 != 2) {		print "need word lattice version 2" >> "/dev/stderr";		exit 1;	}}$1 == "initial" {	start_grammar(grammar_name);	set_initial($2);}$1 == "final" {	set_final($2);}$1 == "node" {	add_node($2, $3);	posterior = $5;	if (posterior == 0) {	    print "node " $2 ": zero posterior, omitting it" >> "/dev/stderr";	    next;	}	for (i = 6; i <= NF; i += 2) {	    if ($(i + 1) == 0) {		print "node " $2 ": omitting zero posterior transition" \							    >> "/dev/stderr";	    } else {		    add_trans($2, $i, log($(i + 1)/posterior));	    }	}}## nbest-lattice -use-mesh output (confusion networks)#$1 == "numaligns" {	start_grammar(grammar_name);	numaligns = $2;	# 	# use nodes 0 ... numaligns between the words	# then allocate additional nodes for each word	set_initial(0);	set_final(numaligns);	for (i = 0; i <= numaligns; i ++) {		add_node(i, null);	}	numnodes = numaligns + 1;}$1 == "align" {	pos = $2;	for (i = 3; i <= NF; i += 2) {		word = $i;		posterior = $(i + 1);		if (posterior == 0) {		    print "align " pos ", word " word \				": zero posterior, omitting it" >> "/dev/stderr";		    continue;		}		if (word == "*DELETE*") {		    add_trans(pos, pos + 1, log(posterior));		} else {		    add_node(numnodes, word);		    add_trans(pos, numnodes, log(posterior));		    add_trans(numnodes, pos + 1, 0);		    numnodes ++;		}	}}END {	end_grammar(grammar_name);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -