📄 randomtrain.sh
字号:
#! /bin/bash## $Id: randomtrain.sh,v 1.3 2005/11/01 00:30:59 relson Exp $ ### randomtrain -- bogofilter messages from files in random order# and train if the result is wrong or uncertain# needs: POSIX compliant sh, basename rm grep awk wc perl dd bogofilter# usage: see function usage() of this file## original author: Greg Louis <glouis@dynamicro.on.ca># modified by: David Relson <relson@osagesoftware.com># Note: on Solaris, use /usr/xpg4/bin/sh -- /bin/sh will not work.BOGOFILTER="bogofilter"usage() { name=$(basename $0) echo "Usage: $name [-d bogodir] [-p pid] [-c cfg] [-]n|s filename [-]n|s filename [...]" echo " Messages contained in the files are fed to bogofilter" echo " in random order. If bogofilter is wrong or uncertain" echo " about whether a message is spam, that message is used" echo " for training, with bogofilter's -s or -n option." echo "" echo "Parameters:" echo " bogodir is where bogofilter's wordlists files are kept" echo " (bogodir defaults to $HOME/.bogofilter)." echo " n (or -n) indicates that the next file contains only nonspams." echo " s (or -s) means it contains only spams." echo " No one file may contain both spams and nonspams." echo " Filenames may not contain blanks." echo "" echo " c (or -c) indicates that the next file is the config file." echo " p (or -p) indicates that a pid comes next (used for re-running a test)." echo "NB: At least one spam and one nonspam file are needed!" exit 1}train_mbox() { # go through the list, extract the messages, eval with bogofilter # and train if bogofilter is wrong or uncertain cnt=0; nspam=0; ngood=0; rspam=0; rgood=0; echo " spam reg good reg" while read expect fnam offset length; do let cnt=cnt+1 dd if=$fnam bs=1 skip=$offset count=$length 2>/dev/null >msg.$pid result=$($BOGOFILTER -t -v -d $bogodir $cfg -I msg.$pid) got=$? # 0=spam, 1=ham, 2=unsure, 3=err if [ "$expect" = "s" ]; then let nspam=$nspam+1 else let ngood=$ngood+1; fi if [ $got -eq 0 ]; then got="s"; elif [ $got -eq 1 ]; then got="n"; fi if [ "$got" != "$expect" ]; then if [ "$expect" = "s" ]; then let rspam=$rspam+1 else let rgood=$rgood+1; fi # comment out the next line for dry-run testing $BOGOFILTER -$expect $cfg -d $bogodir -I msg.$pid fi# echo $expect $got $result, $nspam $rspam, $ngood $rgood n=$(expr \( $nspam + $ngood \) % 10) test $n -eq 0 && \ printf "\r%5d%5d %5d%5d " $nspam $rspam $ngood $rgood done printf "\r%5d%5d %5d%5d\n" $nspam $rspam $ngood $rgood}train_maildir() { # go through the list, extract the messages, eval with bogofilter # and train if bogofilter is wrong or uncertain cnt=0; nspam=0; ngood=0; rspam=0; rgood=0; echo " spam reg good reg" while read expect fnam; do let cnt=cnt+1 result=$($BOGOFILTER -t -v -d $bogodir $cfg -I $fnam) got=$? # 0=spam, 1=ham, 2=unsure, 3=err if [ "$expect" = "s" ]; then let nspam=$nspam+1 else let ngood=$ngood+1; fi if [ $got -eq 0 ]; then got="s"; elif [ $got -eq 1 ]; then got="n"; fi if [ "$got" != "$expect" ]; then if [ "$expect" = "s" ]; then let rspam=$rspam+1 else let rgood=$rgood+1; fi # comment out the next line for dry-run testing $BOGOFILTER -$expect $cfg -d $bogodir -I $fnam fi# echo $expect $got $result, $nspam $rspam, $ngood $rgood n=$(expr \( $nspam + $ngood \) % 10) test $n -eq 0 && \ printf "\r%5d%5d %5d%5d " $nspam $rspam $ngood $rgood done printf "\r%5d%5d %5d%5d\n" $nspam $rspam $ngood $rgood}# Execution begins here...cnt=0;cmd=""cfg='-C'pid=$$mode=0 # unknownmbox=1 # mbox modemdir=2 # maildir modewhile [ ${#*} -gt 1 ]; do indic=$1 ; shift case "$indic" in s|-s|n|-n) path=$1 ; shift cmd="$cmd -c $indic $path" if [ ! -r $path ]; then echo "file '$path' not found"; usage; fi let cnt=cnt+2 if [ $mode = 0 ] ; then if [ -f $path ]; then # if mbox mode=$mbox else # else maildir mode=$mdir fi fi ;; c|-c) file=$1 ; shift cfg="-c $file" if [ ! -r $file ]; then echo "file '$file' not found"; usage; fi ;; p|-p) pid=$1; shift; ;; d|-d) [ -n "$bogodir" ] && usage bogodir=$1 ; shift if [ ! -d $bogodir ]; then echo "directory '$bogodir' not found"; usage; fi [ ! -f $bogodir/spamlist.db ] && $BOGOFILTER -d $bogodir -s -C < /dev/null [ ! -f $bogodir/goodlist.db ] && $BOGOFILTER -d $bogodir -n -C < /dev/null ;; *) usage esacdone# if the first param isn't s or n, treat it as a directorytest -z "$bogodir" && bogodir="${HOME}/.bogofilter"# check for an even number of s/n files >= 4test $cnt -ge 4 || usagelet n=$cnt%2test $n -eq 0 || usage# params may be ok, here goes...# create a shuffled list, with lengthsif [ ! -f scram.$pid ] ; then if [ "$mode" = "$mbox" ] ; then # mbox scramble.sh "^From " -l $cmd > scram.$pid else # maildir scramble.sh -d -l $cmd > scram.$pid fifiif [ "$mode" = "$mbox" ] ; then # mbox train_mbox <scram.$pidelse # maildir train_maildir <scram.$pidfiif [ $pid = $$ ] ; then# next line can be commented out for debugging rm -f scram.$pid msg.$pid cfg.$pidfi
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -