📄 mailcross
字号:
#!/bin/bash# # Copyright (C) 2002 Laird Breyer# # This program is free software; you can redistribute it and/or modify# it under the terms of the GNU General Public License as published by# the Free Software Foundation; either version 2 of the License, or# (at your option) any later version.# # This program is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU General Public License for more details.# # You should have received a copy of the GNU General Public License# along with this program; if not, write to the Free Software# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.# # Author: Laird Breyer <laird@lbreyer.com>#VERSION="mailcross version 1.2.1"MXDIR="$PWD/mailcross.d"ALOG=$MXDIR/log/activity.logELOG=$MXDIR/log/error.logCLOG=$MXDIR/log/crossval.log# check this for environment variable overrridesif [ -e $HOME/.mailcrossrc ]; then source $HOME/.mailcrossrcfi# this is the default filterif [ -z "$MAILCROSS_FILTER" ]; then MAILCROSS_FILTER="dbacl -v -T email"fi# this is the default learnerif [ -z "$MAILCROSS_LEARNER" ]; then MAILCROSS_LEARNER="dbacl -T email -l"fifunction get_number_of_subsets() { NUM=`ls $MXDIR | wc -l` if [ $(($NUM)) -le 0 ]; then echo "error: you need to prepare first." usage fi NUM=$(($NUM - 1))}function get_category_names() { CATS=`find $MXDIR -name '*.mbox' -exec basename {} \; | sort -u`}function usage() { echo -ne "\nusage: mailcross prepare size\n" echo -ne " mailcross add category [FILE]...\n" echo -ne " mailcross learn\n" echo -ne " mailcross run\n" echo -ne " mailcross summarize\n" echo -ne " mailcross clean\n" exit 0}# main switch statement - this processes commandscase $1 in '-V') echo $VERSION ;; clean) # delete working tree if [ -e $PWD/mailcross.d ]; then rm -rf $MXDIR else echo "Nothing to clean."; fi ;; prepare) # create directory tree if [ -d $MXDIR ]; then echo "error: directory mailcross.d already exists. Remove it first."; usage; elif [ -n "$2" ]; then NUM=$(($2 - 1)); if [ $NUM -gt -1 ]; then mkdir $MXDIR && mkdir $MXDIR/log && \ for i in `seq 0 $NUM`; do mkdir $MXDIR/$i; done && \ echo "=== mailcross $@" >> $ALOG else echo "error: please specify a number greater than zero."; usage; fi else echo "error: please specify a number."; usage; fi ;; add) get_number_of_subsets CATNAME=$2 if [ -z "$CATNAME" ]; then echo "error: missing category name." usage fi if [ -z `which formail` ]; then echo "error: formail not found. Please install mailutils to proceed." exit 0 fi echo "=== mailcross $@" >> $ALOG shift 2 # unused if [ -n "$*" ]; then cat $* | formail -s /bin/bash -c \ "cat >> $MXDIR/\$((\$RANDOM % $NUM))/$CATNAME.mbox" else formail -s /bin/bash -c \ "cat >> $MXDIR/\$((\$RANDOM % $NUM))/$CATNAME.mbox" fi ;; learn) get_number_of_subsets NUM=$(($NUM - 1)) # we count from zero to NUM-1 get_category_names echo "=== mailcross $@" >> $ALOG shift 1 # anything after is an option passed to MAILCROSS_LEARNER for n in $CATS; do echo "Learning ${n/.mbox}" >> $ALOG for i in `seq 0 $NUM`; do echo "| $MAILCROSS_LEARNER $MXDIR/$i/${n/.mbox/} $@" >> $ALOG for j in `seq 0 $NUM`; do if [ "$i" != "$j" ]; then echo " cat $MXDIR/$j/$n |" >> $ALOG cat $MXDIR/$j/$n fi done | $MAILCROSS_LEARNER $MXDIR/$i/${n/.mbox/} $@ done done ;; run) get_number_of_subsets NUM=$(($NUM - 1)) # we count from zero to NUM-1 get_category_names STUFF=$MXDIR/log/run.stuff if [ -z `which formail` ]; then echo "error: formail not found. Please install mailutils to proceed." exit 0 elif [ -z `which sed` ]; then echo "error: sed not found. Please install sed to proceed." exit 0 fi echo "=== mailcross $@" >> $ALOG shift 1 # anything after is an option passed to MAILCROSS_FILTER echo "# location | true | predicted | from" > $CLOG for i in `seq 0 $NUM`; do COMMAND=`echo -ne "$MAILCROSS_FILTER "; for n in $CATS; do echo -ne "-c $MXDIR/$i/${n/.mbox/} "; done` echo "| $COMMAND" >> $ALOG for m in $CATS; do cat $MXDIR/$i/$m | formail -s /bin/bash -c \ "sed -e 'h' -e '/^From /s/^From//w $STUFF' -e 'x' | $COMMAND | xargs echo -ne '$i ${m/.mbox/}' && cat $STUFF" >> $CLOG echo " cat $MXDIR/$i/$m |" >> $ALOG done done ;; summarize) get_number_of_subsets # includes check that directory tree is present get_category_names if [ -z `which awk` ]; then echo "error: awk not found. Please install awk to proceed." exit 0 fi echo "=== mailcross $@" >> $ALOG shift 1 if [ -s $CLOG ]; then cat $CLOG | awk -v "num=$NUM" -v "cats=${CATS//.mbox/}" 'BEGIN{ split(cats,names)}/^[^#]/{ f[$2,$3]++ fn[$3]++ fp[$2]++}END{ printf("Where do misclassifications go?\n\n") printf(" true | but predicted as...\n") printf(" * | ") for(c in names) printf("%10s", names[c]) printf("\n") for(c in names) { printf("%-10s | ", names[c]) for(d in names) { printf("%9.2f%%", 100 * f[names[c],names[d]]/fp[names[c]]) } printf("\n") } printf("\n") printf("What is really in each category after prediction?\n\n") printf("predicted | but true mixture is...\n") printf(" * | ") for(c in names) printf("%10s", names[c]) printf("\n") for(c in names) { printf("%-10s | ", names[c]) for(d in names) { printf("%9.2f%%", 100 * f[names[d],names[c]]/fn[names[c]]) } printf("\n") }}' else echo "error: No results found. You must run the cross validation first." usage fi ;; *) usage ;;esacexit 1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -