⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 randomtrain.sh

📁 一个C语言写的快速贝叶斯垃圾邮件过滤工具
💻 SH
字号:
#! /bin/bash## $Id: randomtrain.sh,v 1.3 2005/11/01 00:30:59 relson Exp $ ###  randomtrain -- bogofilter messages from files in random order#                 and train if the result is wrong or uncertain#  needs:    POSIX compliant sh, basename rm grep awk wc perl dd bogofilter#  usage:    see function usage() of this file##  original author: 	Greg Louis <glouis@dynamicro.on.ca>#  modified by: 	David Relson <relson@osagesoftware.com># Note: on Solaris, use /usr/xpg4/bin/sh -- /bin/sh will not work.BOGOFILTER="bogofilter"usage() {    name=$(basename $0)    echo "Usage: $name [-d bogodir] [-p pid] [-c cfg] [-]n|s filename [-]n|s filename [...]"    echo "       Messages contained in the files are fed to bogofilter"    echo "       in random order.  If bogofilter is wrong or uncertain"    echo "       about whether a message is spam, that message is used"    echo "       for training, with bogofilter's -s or -n option."    echo ""    echo "Parameters:"    echo "       bogodir is where bogofilter's wordlists files are kept"    echo "       (bogodir defaults to $HOME/.bogofilter)."    echo "       n (or -n) indicates that the next file contains only nonspams."    echo "       s (or -s) means it contains only spams."    echo "       No one file may contain both spams and nonspams."    echo "       Filenames may not contain blanks."    echo ""    echo "       c (or -c) indicates that the next file is the config file."    echo "       p (or -p) indicates that a pid comes next (used for re-running a test)."    echo "NB:    At least one spam and one nonspam file are needed!"    exit 1}train_mbox() {    # go through the list, extract the messages, eval with bogofilter    # and train if bogofilter is wrong or uncertain    cnt=0; nspam=0; ngood=0; rspam=0; rgood=0;    echo " spam  reg   good  reg"    while read expect fnam offset length; do	let cnt=cnt+1	dd if=$fnam bs=1 skip=$offset count=$length 2>/dev/null >msg.$pid	result=$($BOGOFILTER -t -v -d $bogodir $cfg -I msg.$pid)	got=$?	# 0=spam, 1=ham, 2=unsure, 3=err	if [ "$expect" = "s" ]; 	then let nspam=$nspam+1	else let ngood=$ngood+1; fi	if [ $got -eq 0 ]; then got="s"; elif [ $got -eq 1 ]; then got="n"; fi	if [ "$got" != "$expect" ]; then	    if [ "$expect" = "s" ]; 	    then let rspam=$rspam+1	    else let rgood=$rgood+1; fi	    # comment out the next line for dry-run testing	    $BOGOFILTER -$expect $cfg -d $bogodir -I msg.$pid	fi#	echo $expect $got $result, $nspam $rspam, $ngood $rgood	n=$(expr \( $nspam + $ngood \) % 10)	test $n -eq 0 && \	    printf "\r%5d%5d  %5d%5d " $nspam $rspam $ngood $rgood    done    printf "\r%5d%5d  %5d%5d\n" $nspam $rspam $ngood $rgood}train_maildir() {    # go through the list, extract the messages, eval with bogofilter    # and train if bogofilter is wrong or uncertain    cnt=0; nspam=0; ngood=0; rspam=0; rgood=0;    echo " spam  reg   good  reg"    while read expect fnam; do	let cnt=cnt+1	result=$($BOGOFILTER -t -v -d $bogodir $cfg -I $fnam)	got=$?	# 0=spam, 1=ham, 2=unsure, 3=err	if [ "$expect" = "s" ]; 	then let nspam=$nspam+1	else let ngood=$ngood+1; fi	if [ $got -eq 0 ]; then got="s"; elif [ $got -eq 1 ]; then got="n"; fi	if [ "$got" != "$expect" ]; then	    if [ "$expect" = "s" ]; 	    then let rspam=$rspam+1	    else let rgood=$rgood+1; fi	    # comment out the next line for dry-run testing	    $BOGOFILTER -$expect $cfg -d $bogodir -I $fnam	fi#	echo $expect $got $result, $nspam $rspam, $ngood $rgood	n=$(expr \( $nspam + $ngood \) % 10)	test $n -eq 0 && \	    printf "\r%5d%5d  %5d%5d " $nspam $rspam $ngood $rgood    done    printf "\r%5d%5d  %5d%5d\n" $nspam $rspam $ngood $rgood}# Execution begins here...cnt=0;cmd=""cfg='-C'pid=$$mode=0		# unknownmbox=1		# mbox modemdir=2		# maildir modewhile [ ${#*} -gt 1 ]; do    indic=$1 ; shift    case "$indic" in	s|-s|n|-n)	    path=$1 ; shift	    cmd="$cmd -c $indic $path"	    if [ ! -r $path ]; then echo "file '$path' not found"; usage; fi	    let cnt=cnt+2	    if [ $mode = 0 ] ; then		if [ -f $path ]; then	# if mbox		    mode=$mbox		else			# else maildir		    mode=$mdir		fi	    fi	    ;;	c|-c)	    file=$1 ; shift	    cfg="-c $file"	    if [ ! -r $file ]; then echo "file '$file' not found"; usage; fi	    ;;	p|-p)	    pid=$1; shift;	    ;;	d|-d)	    [ -n "$bogodir" ] && usage	    bogodir=$1 ; shift	    if [ ! -d $bogodir ]; then echo "directory '$bogodir' not found"; usage; fi	    [ ! -f $bogodir/spamlist.db ] && $BOGOFILTER -d $bogodir -s -C < /dev/null	    [ ! -f $bogodir/goodlist.db ] && $BOGOFILTER -d $bogodir -n -C < /dev/null	    ;;	*)	    usage    esacdone# if the first param isn't s or n, treat it as a directorytest -z "$bogodir" && bogodir="${HOME}/.bogofilter"# check for an even number of s/n files >= 4test $cnt -ge 4 || usagelet n=$cnt%2test $n -eq 0 || usage# params may be ok, here goes...# create a shuffled list, with lengthsif [ ! -f scram.$pid ] ; then    if [ "$mode" = "$mbox" ] ; then	# mbox	scramble.sh "^From " -l $cmd > scram.$pid    else				# maildir	scramble.sh -d -l $cmd > scram.$pid    fifiif [ "$mode" = "$mbox" ] ; then	# mbox    train_mbox <scram.$pidelse				# maildir    train_maildir <scram.$pidfiif [ $pid = $$ ] ; then# next line can be commented out for debugging    rm -f scram.$pid msg.$pid cfg.$pidfi

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -