⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pfsg-to-fsm.gawk

📁 这是一款很好用的工具包
💻 GAWK
字号:
#!/usr/local/bin/gawk -f# # pfsg-to-fsm --#	convert a Decipher PFSG to AT&T FSM format## usage: pfsg-to-fsm [symbolfile=SYMFILE] [symbolic=1] [scale=S] file.pfsg > file.fsm## symbolic=1		retains output word strings in the fsm file.# symbolfile=SYMFILE 	dump output symbol table to SYMFILE#			(to be used with fsmcompile|fsmdraw|fsmprint -i SYMFILE)# scale=S		set transition weight scaling factor to S#			(default -1)# ## $Header: /home/srilm/devel/utils/src/RCS/pfsg-to-fsm.gawk,v 1.14 2004/11/02 02:00:35 stolcke Exp $#BEGIN {	empty_output = "NULL";	output_symbols[empty_output] = 0;	numoutputs = 1;	if ("pid" in PROCINFO) {	    pid = PROCINFO["pid"];	} else {	    getline pid < "/dev/pid";	}	tmpfile = "/tmp/pfsg.tmp" pid;	# hack to remove tmpfile when killed	print "" | "trap '/bin/rm -f " tmpfile "' 0 1 2 15 30; cat >/dev/null";	symbolfile = "";	symbolic = 0;	scale = -1;	# scaling of transition weights	nofinal = 0;	# do output final node definition	final_output = "";}$1 == "nodes" {	numnodes = $2;	for (i = 0; i < numnodes; i++) {		node_output[i] = $(i + 3);		if (!(node_output[i] in output_symbols)) {			output_symbols[node_output[i]] = numoutputs++;		}	}	next;}$1 == "initial" {	initial_node = $2;	if (node_output[initial_node] != empty_output) {		print "initial node must be NULL" >> "/dev/stderr";		exit 1;	}	next;}$1 == "final" {	final_node = $2;	if (final_output) {		node_output[final_node] = final_output;		if (!(final_output in output_symbols)) {			output_symbols[final_output] = numoutputs++;		}	}	next;}function print_trans(from_node, to_node, cost) {	if (to_node == final_node && node_output[final_node] == empty_output) {		print from_node, scale * cost;	} else {		# PFSG bytelogs have to be negated to FSM default semiring		print from_node, to_node, \			(symbolic ? node_output[to_node] : \			 output_symbols[node_output[to_node]]), \			scale * cost;	}}function print_final() {	# if the final node is non-emitting, we don't need to output it	# at all (see print_trans above)	if (!nofinal && node_output[final_node] != empty_output) {		print final_node, 0;	}}$1 == "transitions" {	num_transitions = $2;	# process the transitions and map them to FSM transitions and	# final states.	# FSM requires the first transition to be out of the initial state,	# so we scan the transitions twice.	# The first time, to find the initial transitions, then	# to add all the others. Yuck!	for (k = 1; k <= num_transitions; k ++) {		getline;		from_node = $1;		to_node = $2;		cost = $3;		if (from_node == initial_node) {			print_trans(from_node, to_node, cost);		} else {			print > tmpfile;		}	}	close(tmpfile);	# output definition of the final node	print_final();	# now process all the non-initial transitions	while (getline < tmpfile) {		from_node = $1;		to_node = $2;		cost = $3;		print_trans(from_node, to_node, cost);	}	next;}END {	# dump out the symbol table	if (symbolfile) {		for (s in output_symbols) {			print s, output_symbols[s] > symbolfile;		}	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -