⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bogofilter.c

📁 一个C语言写的快速贝叶斯垃圾邮件过滤工具
💻 C
字号:
/* $Id: bogofilter.c,v 1.55 2005/09/04 14:49:43 relson Exp $ *//*****************************************************************************NAME:   bogofilter.c -- detect spam and bogons presented on standard input.AUTHORS:   Eric S. Raymond <esr@thyrsus.com>   David Relson    <relson@osagesoftware.com>   Matthias Andree <matthias.andree@gmx.de>   Greg Louis      <glouis@dynamicro.on.ca>THEORY:   Originally implemented as Paul Graham's variant of Bayes filtering,   as described in      "A Plan For Spam", http://www.paulgraham.com/spam.html   Updated in accordance with Gary Robinson's proposed modifications,   as described at    http://radio.weblogs.com/0101454/stories/2002/09/16/spamDetection.html******************************************************************************/#include "common.h"#include <string.h>#include <stdlib.h>#include "bogofilter.h"#include "bogoconfig.h"#include "bogoreader.h"#include "collect.h"#include "passthrough.h"#include "register.h"#include "rstats.h"#include "score.h"/***	case B_NORMAL:		**	case B_STDIN:		* '-b' - streaming (stdin) mode ***	case B_CMDLINE:		* '-B' - command line mode *****loop:**    read & parse a message**	if -p, save textblocks**    register if -snSN && -pe**    classify if -pue && ! -snSN**    register if -u**    write    if -p**    if (-snSN && -pe) || -u**	free tokens**    else**	accumulate tokens	****end:	register if -snSN && ! -pe*//* Function Definitions */void print_stats(FILE *fp){    msg_print_stats(fp);}rc_t bogofilter(int argc, char **argv){    uint msgcount = 0;    rc_t status = RC_OK;    bool register_opt = (run_type & (REG_SPAM | UNREG_SPAM | REG_GOOD | UNREG_GOOD)) != 0;    bool register_bef = register_opt && passthrough;    bool register_aft = ((register_opt && !passthrough) || (run_type & RUN_UPDATE)) != 0;    bool write_msg    = passthrough || Rtable;    bool classify_msg = write_msg || ((run_type & (RUN_NORMAL | RUN_UPDATE))) != 0;    wordhash_t *words;    score_initialize();			/* initialize constants */    if (query)	return query_config();    words = register_aft ? wordhash_new() : NULL;    bogoreader_init(argc, argv);    while ((*reader_more)()) {	double spamicity;	wordhash_t *w = wordhash_new();	rstats_init();	passthrough_setup();	collect_words(w);	wordhash_sort(w);	msgcount += 1;        if (!passthrough_keepopen())            bogoreader_close_ifeof();        	if (register_opt && DEBUG_REGISTER(1))	    fprintf(dbgout, "Message #%ld\n", (long) msgcount);	if (register_bef)	    register_words(run_type, w, 1);	if (register_aft)	    wordhash_add(words, w, &wordprop_init);	if (classify_msg || write_msg) {	    lookup_words(w);			/* This reads the database */	    spamicity = msg_compute_spamicity(w, NULL);	    status = msg_status();	    if (run_type & RUN_UPDATE)		/* Note: don't register if RC_UNSURE */	    {		if (status == RC_SPAM && spamicity <= 1.0 - thresh_update)		    register_words(REG_SPAM, w, msgcount);		if (status == RC_HAM && spamicity >= thresh_update)		    register_words(REG_GOOD, w, msgcount);	    }	    if (verbose && !passthrough && !quiet) {		const char *filename = (*reader_filename)();		if (filename)		    fprintf(fpo, "%s ", filename); 	    }	    write_message(status);		/* passthrough */	    if (logflag && !register_opt) {		write_log_message(status);		msgcount = 0;	    }	}	wordhash_free(w);	passthrough_cleanup();	rstats_cleanup();	if (DEBUG_MEMORY(2))	    MEMDISPLAY;	if (fDie)	    exit(EX_ERROR);    }    bogoreader_fini();    if (DEBUG_MEMORY(1))	MEMDISPLAY;    if (register_aft && ((run_type & RUN_UPDATE) == 0)) {	wordhash_sort(words);	register_words(run_type, words, msgcount);    }    if (logflag && register_opt)	write_log_message(status);    wordhash_free(words);    if (DEBUG_MEMORY(1))	MEMDISPLAY;    return status;}/* Done */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -