📄 wlat-to-pfsg.gawk
字号:
#!/usr/local/bin/gawk -f## wlat-to-pfsg --# Create a Decipher PFSG from a nbest-lattice(1) word lattice## usage: wlat-to-pfsg word-lattice> pfsg## $Header: /home/srilm/devel/utils/src/RCS/wlat-to-pfsg.gawk,v 1.5 2004/11/02 02:00:35 stolcke Exp $############################################ Output format specific code#BEGIN { logscale = 10000.5; round = 0.5; null = "NULL"; tmpfile = "tmp.pfsg.trans";}function rint(x) { if (x < 0) { return int(x - round); } else { return int(x + round); }}function scale_log(x) { return rint(x * logscale);}function start_grammar(name) { num_trans = 0; num_nodes = 0; return;}function end_grammar(name) { printf "%d pfsg nodes\n", num_nodes >> "/dev/stderr"; printf "%d pfsg transitions\n", num_trans >> "/dev/stderr"; print "name " name; printf "nodes %s", num_nodes; for (i = 0; i < num_nodes; i ++) { if (node_string[i] == "") { print "warning: node word " i " is undefined" >> "/dev/stderr"; node_string[i] = null; } printf " %s", node_string[i]; } printf "\n"; print "initial " initial_node; print "final " final_node; print "transitions " num_trans; fflush(); close(tmpfile); system("/bin/cat " tmpfile "; /bin/rm -f " tmpfile);}function set_initial(node) { initial_node = node;}function set_final(node) { final_node = node;}function add_node(node, word) { if (node >= num_nodes) { num_nodes = node + 1; } node_string[node] = word;}function add_trans(from, to, prob) { num_trans ++; print from, to, scale_log(prob) > tmpfile;}########################################### Word lattice parsing#BEGIN { grammar_name = "PFSG";}NF == 0 { next;}$1 == "name" { grammar_name = $2; next;}## nbest-lattice output (without -use-mesh)#$1 == "version" { if ($2 != 2) { print "need word lattice version 2" >> "/dev/stderr"; exit 1; }}$1 == "initial" { start_grammar(grammar_name); set_initial($2);}$1 == "final" { set_final($2);}$1 == "node" { add_node($2, $3); posterior = $5; if (posterior == 0) { print "node " $2 ": zero posterior, omitting it" >> "/dev/stderr"; next; } for (i = 6; i <= NF; i += 2) { if ($(i + 1) == 0) { print "node " $2 ": omitting zero posterior transition" \ >> "/dev/stderr"; } else { add_trans($2, $i, log($(i + 1)/posterior)); } }}## nbest-lattice -use-mesh output (confusion networks)#$1 == "numaligns" { start_grammar(grammar_name); numaligns = $2; # # use nodes 0 ... numaligns between the words # then allocate additional nodes for each word set_initial(0); set_final(numaligns); for (i = 0; i <= numaligns; i ++) { add_node(i, null); } numnodes = numaligns + 1;}$1 == "align" { pos = $2; for (i = 3; i <= NF; i += 2) { word = $i; posterior = $(i + 1); if (posterior == 0) { print "align " pos ", word " word \ ": zero posterior, omitting it" >> "/dev/stderr"; continue; } if (word == "*DELETE*") { add_trans(pos, pos + 1, log(posterior)); } else { add_node(numnodes, word); add_trans(pos, numnodes, log(posterior)); add_trans(numnodes, pos + 1, 0); numnodes ++; } }}END { end_grammar(grammar_name);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -