📄 pfsg-to-fsm.gawk
字号:
#!/usr/local/bin/gawk -f# # pfsg-to-fsm --# convert a Decipher PFSG to AT&T FSM format## usage: pfsg-to-fsm [symbolfile=SYMFILE] [symbolic=1] [scale=S] file.pfsg > file.fsm## symbolic=1 retains output word strings in the fsm file.# symbolfile=SYMFILE dump output symbol table to SYMFILE# (to be used with fsmcompile|fsmdraw|fsmprint -i SYMFILE)# scale=S set transition weight scaling factor to S# (default -1)# ## $Header: /home/srilm/devel/utils/src/RCS/pfsg-to-fsm.gawk,v 1.14 2004/11/02 02:00:35 stolcke Exp $#BEGIN { empty_output = "NULL"; output_symbols[empty_output] = 0; numoutputs = 1; if ("pid" in PROCINFO) { pid = PROCINFO["pid"]; } else { getline pid < "/dev/pid"; } tmpfile = "/tmp/pfsg.tmp" pid; # hack to remove tmpfile when killed print "" | "trap '/bin/rm -f " tmpfile "' 0 1 2 15 30; cat >/dev/null"; symbolfile = ""; symbolic = 0; scale = -1; # scaling of transition weights nofinal = 0; # do output final node definition final_output = "";}$1 == "nodes" { numnodes = $2; for (i = 0; i < numnodes; i++) { node_output[i] = $(i + 3); if (!(node_output[i] in output_symbols)) { output_symbols[node_output[i]] = numoutputs++; } } next;}$1 == "initial" { initial_node = $2; if (node_output[initial_node] != empty_output) { print "initial node must be NULL" >> "/dev/stderr"; exit 1; } next;}$1 == "final" { final_node = $2; if (final_output) { node_output[final_node] = final_output; if (!(final_output in output_symbols)) { output_symbols[final_output] = numoutputs++; } } next;}function print_trans(from_node, to_node, cost) { if (to_node == final_node && node_output[final_node] == empty_output) { print from_node, scale * cost; } else { # PFSG bytelogs have to be negated to FSM default semiring print from_node, to_node, \ (symbolic ? node_output[to_node] : \ output_symbols[node_output[to_node]]), \ scale * cost; }}function print_final() { # if the final node is non-emitting, we don't need to output it # at all (see print_trans above) if (!nofinal && node_output[final_node] != empty_output) { print final_node, 0; }}$1 == "transitions" { num_transitions = $2; # process the transitions and map them to FSM transitions and # final states. # FSM requires the first transition to be out of the initial state, # so we scan the transitions twice. # The first time, to find the initial transitions, then # to add all the others. Yuck! for (k = 1; k <= num_transitions; k ++) { getline; from_node = $1; to_node = $2; cost = $3; if (from_node == initial_node) { print_trans(from_node, to_node, cost); } else { print > tmpfile; } } close(tmpfile); # output definition of the final node print_final(); # now process all the non-initial transitions while (getline < tmpfile) { from_node = $1; to_node = $2; cost = $3; print_trans(from_node, to_node, cost); } next;}END { # dump out the symbol table if (symbolfile) { for (s in output_symbols) { print s, output_symbols[s] > symbolfile; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -