add-pauses-to-pfsg.gawk

来自「这是一款很好用的工具包」· GAWK 代码 · 共 172 行

GAWK

172 行

#!/usr/local/bin/gawk -f## add-pauses-to-pfsg --#	Modify Decipher PFSG to allow pauses between words## $Header: /home/srilm/devel/utils/src/RCS/add-pauses-to-pfsg.gawk,v 1.11 2002/06/29 19:59:12 stolcke Exp $#BEGIN {	pause = "-pau-";	top_level_name = "TOP_LEVEL";	pause_filler_name = "PAUSE_FILLER";	null = "NULL";	wordwrap = 1;		# wrap pause filler around words	pauselast = 0;		# make pauses follow wrapped words	version = 0;		# no "version" line by default	# portable way to test for lowercase characters	# check for high-order bit is supposed to catch multibyte characters	word_pattern = "[[:lower:]\x80-\xFF]";			if ("a" !~ word_pattern) word_pattern = "[a-z\x80-\xFF]";}## output the TOP_LEVEL model#	oldname is the name of the original pfsgfunction print_top_level(oldname) {	if (version) {		print "version " version "\n";	}	print "name " top_level_name;	if (pauselast) {	    print "nodes 4 " null " " pause_filler_name " " oldname " " null;	} else {	    print "nodes 4 " null " " oldname " " pause_filler_name " " null;	}	print "initial 0"	print "final 3"	print "transitions 4"	print "0 1 0"	print "1 2 0"	if (pauselast) {	    print "0 2 0"	} else {	    print "1 3 0"	}	print "2 3 0"	print "";}function word_wrapper_name(word) {	return "_" word "_PF";}## output a pause wrapper for word#function print_word_wrapper(word) {	print "name " word_wrapper_name(word);	if (pauselast) {	    print "nodes 3 " word " " pause_filler_name " " null;	} else {	    print "nodes 3 " null " " pause_filler_name " " word;	}	print "initial 0";	print "final 2";	print "transitions 3";	print "0 1 0";	print "1 2 0";	print "0 2 0";	print "";}## output the pause filler#function print_pause_filler() {	print "name " pause_filler_name;	print "nodes 4 " null " " null " " pause " " null;	print "initial 0";	print "final 3";	print "transitions 4";	print "0 1 0";	print "1 2 0";	print "2 3 0";	print "2 1 0";}NF == 0 {	print;	next;}## read vocabulary list if supplied#NR == 1 && vocab != "" {	while (getline line < vocab) {	    if (split(line, a)) {		word_list[a[1]] = 1;	    }	}	close (vocab);}## check that a node name is word# if a vocabulary was not specified we use the following heuristic:# word nodes contain at least one lowercase or non-ascii character and are not# surrounded by "*...*" (which indicates a class name).#function is_word(w) {	if (vocab) {	    return w in word_list;	} else {	    return w !~ /^\*.*\*$/ && w ~ word_pattern;	}}## first time we see a pfsg name, issue a top-level wrapper for it.#$1 == "name" && !have_top_level {	print_top_level($2);	print;	have_top_level = 1;	next;}## maps word nodes to wrapper nodes#$1 == "nodes" {	numnodes = $2;	printf "nodes %d", numnodes;	for (i = 0; i < numnodes; i ++) {	    node_name = $(i + 3);	    # if it contains lowercase characters it's a word and	    # needs to wrapped	    if (wordwrap && is_word(node_name)) {		if (!(node_name in all_words)) {		    all_words[node_name] = 1;		    words[++num_words] = node_name;		}		printf " %s", word_wrapper_name(node_name);	    } else {		printf " %s", node_name;	    }	}	printf "\n";	next;}{	print;}END {	#	# output the word wrappers	#	if (wordwrap) {	    for (i = 1; i <= num_words; i ++) {		print_word_wrapper(words[i]);	    }	}	print_pause_filler();}

add-pauses-to-pfsg.gawk - 源码说明

本页面展示了「这是一款很好用的工具包」中的 add-pauses-to-pfsg.gawk 源码文件，采用 GAWK 编程语言编写，共 172 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与工具包相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?