⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 change-lm-vocab

📁 这是一款很好用的工具包
💻
字号:
#!/bin/sh## change-lm-vocab --#	create a language model from an existing one by changing its#	vocabulary.#	All n-grams in the new vocab are retained with their original#	probabilities.  Backoff weights are recomputed and backed-off#	unigrams for all new words are added.#	-subset option performs subsetting of the vocabulary without adding#	new words.## usage: change-lm-vocab [-subset] -vocab vocabfile -lm oldlm -write-lm newlm## $Header: /home/srilm/devel/utils/src/RCS/change-lm-vocab,v 1.7 2003/04/12 20:11:49 stolcke Exp $#oldlm=-newlm=-vocab=/dev/nullwhile [ $# -gt 0 ]; do	case "$1" in	-vocab)	vocab="$2" ; shift ;;	-lm)	oldlm="$2" ; shift ;;	-write-lm)	newlm="$2" ; shift ;;	-tolower) options="$options $1" ; tolower=1 ;;	-subset)  subset=yes ;;	*)	options="$options $1" ;;	esac	shiftdone# -subset prevents new words being added to the LMif [ "$subset" ]; then	ngram_vocab="/dev/null"else	ngram_vocab="$vocab"figunzip -cf $oldlm | gawk '# read the vocab fileNR == 1 && vocab {	# always include sentence begin/end	is_word["<s>"] = is_word["</s>"] = 1;	while ((getline word < vocab) > 0) {		is_word[to_lower ? tolower(word) : word] = 1;	}			close(vocab);}# process old lmNF==0 {	print; next;}/^ngram *[0-9][0-9]*=/ {	order = substr($2,1,index($2,"=")-1);	print;	next;}/^\\[0-9]-grams:/ {	currorder=substr($0,2,1);	print;	next;}/^\\/ {	print; next;}currorder {	for (i = 2 ; i <= currorder + 1; i ++) {		if (!((to_lower ? tolower($i) : $i) in is_word)) next;	}	print;	next;}{ print }' vocab=$vocab to_lower=$tolower | \ngram -lm - -vocab "$ngram_vocab" -renorm -write-lm "$newlm" $options

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -