📄 make-meta-counts.gawk
字号:
#!/usr/local/bin/gawk -f## make-meta-counts --# Apply N-gram count cut-offs and insert meta-counts (counts-of-counts)## $Header: /home/srilm/devel/utils/src/RCS/make-meta-counts.gawk,v 1.2 2002/07/22 21:24:45 stolcke Exp $#BEGIN { order = 3; # trust_total=1 means we don't have to generate meta-counts, just # apply the cut-offs (in combination with ngram-count -trust-totals) trust_totals = 0; metatag = "__META__";}NR == 1 { mincount[1] = mincount1 + 0; mincount[2] = mincount2 + 0; mincount[3] = mincount3 + 0; mincount[4] = mincount4 + 0; mincount[5] = mincount5 + 0; mincount[6] = mincount6 + 0; mincount[7] = mincount7 + 0; mincount[8] = mincount8 + 0; mincount[9] = mincount9 + 0;}NF > order + 1 { next;}NF > 1 { this_order = NF - 1; if (!trust_totals) { # output buffered ngrams of higher order IF there was at least # one non-meta count of the respective order for (i = order; i > this_order; i --) { if (have_counts[i]) { printf "%s", buffer[i]; have_counts[i] = 0; } delete buffer[i]; } } if ($NF < mincount[this_order]) { if (trust_totals) { next; } else { # convert below-cutoff ngram to meta-ngram $this_order = metatag int($NF); $NF = 1; # add it to buffer buffer[this_order] = buffer[this_order] $0 "\n"; } } else { have_counts[this_order] = 1; print; }}END { # output any remaining buffered ngrams for (i = order; i >= 1; i --) { if (have_counts[i]) { printf "%s", buffer[i]; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -