📄 bogohist.c
字号:
/* $Id: bogohist.c,v 1.28 2006/07/08 15:29:50 m-a Exp $ *//*****************************************************************************NAME: bogohist.c -- print bogofilter histogramAUTHOR: Gyepi Sam <gyepi@praxis-sw.com>******************************************************************************/#include "common.h"#include <stdlib.h>#include <string.h>#include <ctype.h>#include <errno.h>#include <sys/stat.h>#include "bogohist.h"#include "prob.h"#include "datastore.h"#include "msgcounts.h"#include "word.h"#include "wordlists.h"#include "xmalloc.h"static uint ham_only, ham_hapax;static uint spam_only, spam_hapax;static uint mgood, mbad;#define INTERVALS 20#define PCT(n) 100.0 * n / counttypedef struct rhistogram_s rhistogram_t;struct rhistogram_s { uint32_t count[INTERVALS];};/* Function Prototypes *//* Function Definitions */static int ds_histogram_hook(/*@unused@*/ word_t *key, dsv_t *data, void *userdata)/* returns 0 if ok, 1 if not ok */{ rhistogram_t *hist = userdata; double fw = calc_prob(data->goodcount, data->spamcount, mgood, mbad); uint idx = min(fw * INTERVALS, INTERVALS-1); /* ignore meta-tokens */ if (*key->text == (byte) '.') return 0; hist->count[idx] += 1; if (data->spamcount == 0) { ham_only += 1; if (data->goodcount == 1) ham_hapax += 1; } if (data->goodcount == 0) { spam_only += 1; if (data->spamcount == 1) spam_hapax += 1; } return 0;}static int print_histogram(rhistogram_t *hist){ uint i, r; uint maxcnt = 0; uint count = 0; if (verbose == 0) (void)printf("Histogram\n"); if (verbose == 1) { hist->count[0] -= ham_hapax; hist->count[INTERVALS-1] -= spam_hapax; (void)printf("Histogram without hapaxes\n"); } if (verbose == 2) { hist->count[0] -= ham_only; hist->count[INTERVALS-1] -= spam_only; (void)printf("Histogram without pure ham and spam\n"); } (void)printf("%5s%8s %3s %s\n", "score", "count", "pct", "histogram"); for (i=0; i<INTERVALS; i+=1) { uint32_t cnt = hist->count[i]; if (cnt > maxcnt) maxcnt = cnt; count += cnt; } /* Print histogram */ for (i=0; i<INTERVALS; i+=1) { uint32_t cnt = hist->count[i]; double beg = 1.0 * i / INTERVALS; double pct = PCT(cnt); /* print interval, count, probability, percent, and spamicity */ (void)printf("%3.2f %8u %5.2f ", beg, cnt, pct); /* scale histogram to 48 characters */ if (maxcnt>48) cnt = (cnt * 48 + maxcnt - 1) / maxcnt; /* display histogram */ for (r=0; r<cnt; r+=1) (void)fputc( '#', stdout); (void)fputc( '\n', stdout); } (void)printf("tot %8u\n", count); return count;}ex_t histogram(bfpath *bfp){ ex_t rc; uint count; void *dsh, *dbe; dsv_t val; rhistogram_t hist; dbe = ds_init(bfp); if (dbe == NULL) return EX_ERROR; dsh = ds_open(dbe, bfp, DS_READ); if (dsh == NULL) return EX_ERROR; if (DST_OK != ds_txn_begin(dsh)) { ds_close(dsh); ds_cleanup(dbe); fprintf(stderr, "cannot begin transaction!\n"); return EX_ERROR; } ds_get_msgcounts(dsh, &val); mgood = val.goodcount; mbad = val.spamcount; memset(&hist, 0, sizeof(hist)); rc = ds_foreach(dsh, ds_histogram_hook, &hist); if (DST_OK != ds_txn_commit(dsh)) { ds_close(dsh); ds_cleanup(dbe); fprintf(stderr, "cannot commit transaction!\n"); return EX_ERROR; } ds_close(dsh); ds_cleanup(dbe); count = print_histogram(&hist); if (verbose > 0) { printf("hapaxes: ham %7u, spam %7u\n", ham_hapax, spam_hapax); printf(" pure: ham %7u, spam %7u\n", ham_only, spam_only); } else { printf("hapaxes: ham %7u (%5.2f%%), spam %7u (%5.2f%%)\n", ham_hapax, PCT(ham_hapax), spam_hapax, PCT(spam_hapax)); printf(" pure: ham %7u (%5.2f%%), spam %7u (%5.2f%%)\n", ham_only, PCT(ham_only), spam_only, PCT(spam_only)); } return rc;}/* for a standalone program:**** cc -o bogohist.prog.o -DMAIN -c bogohist.c** cc -o bogohist bogohist.prog.o libbogofilter.a strlcpy.o strlcat.o -ldb -lm*/#ifdef MAINconst char *progname = "bogohist";int main(int argc, char *argv[]){ if (argc < 2) { fprintf(stderr, "usage: %s BOGOFILTER_DIR\n", progname); exit(1); } else { const char *path = argv[1]; int rc = histogram(path); exit(rc); }}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -