📄 fsm-to-pfsg.gawk
字号:
#!/usr/local/bin/gawk -f## fsm-to-pfsg --# convert AT&T FSM acceptor to Decipher PFSG format## usage: fsm-to-pfsg [pfsg_name=NAME] [transducer=1] [scale=S] file.fsm > file.pfsg# pfsg_name=NAME sets PFSG name to NAME# transducer=1 indicates input is a transducer# scale=S sets transition weight scaling factor to S# (default -1)## $Header: /home/srilm/devel/utils/src/RCS/fsm-to-pfsg.gawk,v 1.7 2003/07/14 17:56:03 stolcke Exp $#BEGIN { pfsg_name = "from_fsm"; transducer = 0; # input is transducer if ("pid" in PROCINFO) { pid = PROCINFO["pid"]; } else { getline pid < "/dev/pid"; } tmpfile = "/tmp/fsm.tmp" pid; # hack to remove tmpfile when killed print "" | "trap '/bin/rm -f " tmpfile "' 0 1 2 15 30; cat >/dev/null"; num_newnodes = 0; initial_node = -1; empty_output = "NULL"; scale = -1; # scaling of transition weights}# transition descriptionNF >= 3 { from_node = $1; to_node = $2; if (transducer) { # collapse input and output into a single symbol $3 = $3 ":" $4; $4 = ""; } output = $3; if (initial_node < 0) { initial_node = from_node; } # create new node names for pairs of output,old-node if (!(output " " to_node in newnode_table)) { output_table[num_newnodes] = output; newnode_table[output " " to_node] = num_newnodes ++; # create list of incoming outputs for each state insymbols[to_node] = insymbols[to_node] " " output; } # save for re-reading print $0 > tmpfile; next;}# final state descriptionNF >= 1 { node = $1; if (initial_node < 0) { initial_node = node; } # save for re-reading print $0 > tmpfile; next;}END { close(tmpfile); # create initial and final nodes if (!(empty_output " " initial_node in newnode_table)) { output_table[num_newnodes] = empty_output; newnode_table[empty_output " " initial_node] = num_newnodes ++; insymbols[initial_node] = insymbols[initial_node] " " empty_output; } initial_newnode = newnode_table[empty_output " " initial_node]; output_table[num_newnodes] = empty_output; final_newnode = num_newnodes++; # print PFSG header info print "name " pfsg_name; printf "nodes %d", num_newnodes; for (i = 0; i < num_newnodes; i ++) { printf " %s", output_table[i]; } printf "\n"; printf "initial %d\n", initial_newnode; printf "final %d\n", final_newnode; # re-read FSM description, counting total number of new # transitions num_transitions = 0; while (getline < tmpfile) { from_node = $1; # duplicate transition for all insymbols of from_node num_transitions += split(insymbols[from_node], a); } close(tmpfile); printf "transitions %d\n", num_transitions; # re-read FSM description, outputing new transitions while (getline < tmpfile) { if (NF >= 3) { from_node = $1; to_node = $2; output = $3; cost = (NF == 3 ? 0 : $4); # duplicate transition for all insymbols of from_node n = split(insymbols[from_node], a); for (i = 1; i <= n; i ++) { printf "%d %d %d\n", \ newnode_table[a[i] " " from_node], \ newnode_table[output " " to_node], \ scale * cost; } } else { from_node = $1; cost = (NF == 1 ? 0 : $2); # add final transition for all insymbols of from_node n = split(insymbols[from_node], a); for (i = 1; i <= n; i ++) { printf "%d %d %d\n", \ newnode_table[a[i] " " from_node], \ final_newnode, \ scale * cost; } } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -