⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bogoreader.c

📁 一个C语言写的快速贝叶斯垃圾邮件过滤工具
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: bogoreader.c,v 1.68 2006/07/08 15:29:50 m-a Exp $ *//*****************************************************************************NAME:   bogoreader.c -- process input filesAUTHORS: (C) Copyright 2003-2005 by   David Relson <relson@osagesoftware.com>   Matthias Andree <matthias.andree@gmx.de>******************************************************************************//*** Formats supported:****	mbox**	Maildir**	MH folder**	rmail**	ANT		RISC-OS only****	msg-count	special for bogofilter*/#include "common.h"#include <ctype.h>#include <errno.h>#include <stdlib.h>#include "bogoreader.h"#include "error.h"#include "fgetsl.h"#include "lexer.h"#include "paths.h"#include "token.h"#include "xmalloc.h"static void (*fini)(void);static int  argc;static char **argv;static const char *filename;static char namebuff[PATH_LEN+1];static char dir_name[PATH_LEN+1];static FILE *yy_file;typedef enum ms_e {MS_FILE, MS_MAILDIR, MS_MH } ms_t;static ms_t mailstore_type;static bool mail_first = true;		/* for the _next_mail functions */static bool mailstore_first = true;	/* for the _next_mailstore functions */static bool firstline = true;		/* for mailbox /^From / match */static bool    have_message = false;/* Lexer-Reader Interface */reader_more_t *reader_more;reader_line_t *reader_getline;reader_file_t *reader_filename;/* Function Prototypes *//* these functions check if there are more file names in bulk modes, * read-mail/mbox-from-stdin for uniformity */static reader_more_t stdin_next_mailstore;static reader_more_t b_stdin_next_mailstore;static reader_more_t b_args_next_mailstore;/* these functions check if there is more mail in a mailbox/maildir/... * to process, trivial mail_next_mail for uniformity */static reader_more_t dir_next_mail;static reader_more_t mail_next_mail;static reader_more_t mailbox_next_mail;/* maildir is the mailbox format specified in * http://cr.yp.to/proto/maildir.html */static reader_line_t simple_getline;	/* ignores /^From / */static reader_line_t mailbox_getline;	/* minds   /^From / */static reader_line_t rmail_getline;	/* minds   /^#! rmail/ */static reader_line_t ant_getline;	/* minds   /^MAIL TO:/ */static reader_file_t get_filename;static void bogoreader_close(void);typedef enum { MBOX, MC, RMAIL, ANT } mbox_t;typedef struct {    const char	*sep;    uint	len;    mbox_t	type;    reader_line_t *fcn;} sep_2_box_t;static sep_2_box_t sep_2_box[] = {    { "From ",      	 5, MBOX,  mailbox_getline },    { "\".MSG_COUNT\"", 12, MC,    mailbox_getline },	/* msg-count */    { "#! rmail",   	 8, RMAIL, rmail_getline   },    { "MAIL FROM:", 	10, ANT,   ant_getline     }	/* RISC-OS only */};static uint        seplen = 0;static const char *separator = NULL;static void dir_init(const char *name);static void dir_fini(void);typedef enum st_e { IS_DIR, IS_FILE, IS_ERR } st_t;/* Function Definitions */bool is_eol(const char *buf, size_t len){    bool ans = ((len == 1 && memcmp(buf, NL, 1) == 0) ||		(len == 2 && memcmp(buf, CRLF, 2) == 0));    return ans;}static reader_line_t *get_reader_line(FILE *fp) {    uint i;    int c;    reader_line_t *fcn = mailbox_getline;    if (fp == NULL)	return NULL;    c = fgetc(fp);    ungetc(c, fp);    for (i = 0; i < COUNTOF(sep_2_box); i += 1) {	sep_2_box_t *s = sep_2_box + i;        if (s->sep[0] == c) {            fcn = s->fcn;	    seplen = s->len;	    separator = s->sep;	    break;	}    }        if (fcn == mailbox_getline && !mbox_mode)        fcn = simple_getline;        return fcn;}/* Checks if name is a directory. * Returns IS_DIR for directory, IS_FILE for other type, IS_ERR for error */static st_t isdir(const char *name){    struct stat stat_buf;    if (stat(name, &stat_buf)) return IS_ERR;    return (S_ISDIR(stat_buf.st_mode) != 0) ? IS_DIR : IS_FILE;}static void save_dirname(const char *name){    size_t l = strlen(name);    l = min(l, sizeof(dir_name)-2);    memcpy(dir_name, name, l);    if (dir_name[l-1] == DIRSEP_C)	l -= 1;    dir_name[l] = '\0';}static const char* const maildir_subs[]={ DIRSEP_S "new", DIRSEP_S "cur", NULL };static const char *const *maildir_sub;static DIR *reader_dir;/* MA: Check if the given name points to a Maildir. We don't require the * /tmp directory for simplicity. * This function checks if dir, dir/new and dir/cur are all directories. * Returns IS_DIR for directory, IS_FILE for other type, IS_ERR for error */static st_t ismaildir(const char *dir) {    st_t r;    size_t l;    char *x;    const char *const *y;    const size_t maxlen = 4;    r = isdir(dir);    if (r != IS_DIR) return r;    x = xmalloc((l = strlen(dir)) + maxlen /* append */ + 1 /* NUL */);    memcpy(x, dir, l);    for (y = maildir_subs; *y; y++) {	strlcpy(x + l, *y, maxlen + 1);	r = isdir(x);	if (r != IS_DIR) {	    xfree(x);	    return r;	}    }    xfree(x);    return IS_DIR;}static void dummy_fini(void) { }static reader_more_t *mailstore_next_store;static reader_more_t *mailstore_next_mail = NULL;/* this is the 'nesting driver' for our input. * mailstore := one of { mail, mbox, maildir } * if we have a current mailstore-specific handle, check that if we have * further input in the mailstore first. if we don't, see if we have * further mailstores to process */static bool reader__next_mail(void){    for (;;) {	/* check mailstore-specific method */	if (mailstore_next_mail) {	    if ((*mailstore_next_mail)()) /* more mails in the mailstore */		return true;	    mailstore_next_mail = NULL;	}	/* ok, that one has been exhausted, try the next mailstore */	/* mailstore_next_store opens the mailstore */	if (!(*mailstore_next_store)())	    return false;	/* ok, we have more mailstores, so check if the current mailstore has	 * input - loop.	 */    }}/* open mailstore (Maildir, mbox file or file with a single mail) and set * _getline and _next_mail pointers dependent on the mailstore's type. * * - automatically detects maildir * - does not automatically distinguish between mbox and mail *   and takes mbox_mode instead */static bool open_mailstore(const char *name){    filename = name;    bogoreader_close();    firstline = true;    switch (isdir(filename)) {    case IS_FILE:	if (DEBUG_READER(0))	    fprintf(dbgout, "%s:%d - assuming %s is a %s\n", __FILE__, __LINE__, filename, mbox_mode ? "mbox" : "message");	fpin = fopen( filename, "r" );	if (fpin == NULL) {	    fprintf(stderr, "Can't open file '%s': %s\n", filename,		    strerror(errno));	    return false;	} else {	    mail_first = true;	    msg_count_file = false;	    reader_getline = get_reader_line(fpin);	    mailstore_next_mail = mbox_mode ? mailbox_next_mail : mail_next_mail;	    return true;	}    case IS_DIR:	if (ismaildir(filename) == IS_DIR) {	    /* MAILDIR */	    mailstore_type = MS_MAILDIR;	    dir_init(filename);	    reader_getline      = simple_getline;	    mailstore_next_mail = dir_next_mail;	    return true;	} else {	    /* MH */	    mailstore_type = MS_MH;	    dir_init(filename);	    reader_getline      = simple_getline;	    mailstore_next_mail = dir_next_mail;	    return true;	}    case IS_ERR:	fprintf(stderr, "Can't stat mailstore '%s': %s\n",		filename, strerror(errno));	break;    default:	fprintf(stderr, "Can't identify type of mailstore '%s'\n", filename);	break;    }    return false;}/*** _next_mailstore functions ***********************************************//* this initializes for reading a single mail or a mbox from stdin */static bool stdin_next_mailstore(void){    bool val = mailstore_first;    reader_getline = get_reader_line(fpin);    if (reader_getline == NULL)	return false;    mailstore_next_mail = mbox_mode ? mailbox_next_mail : mail_next_mail;    mailstore_first = false;    return val;}/* this reads file names from stdin and processes them according to * their type */static bool b_stdin_next_mailstore(void){    int len;    filename = namebuff;    if ((len = fgetsl(namebuff, sizeof(namebuff), stdin)) <= 0)	return false;    if (len > 0 && namebuff[len-1] == '\n')	namebuff[len-1] = '\0';    return open_mailstore(filename);}/* this reads file names from the command line and processes them * according to their type */static bool b_args_next_mailstore(void){    if (argc <= 0)	return false;    filename = *argv;    argc -= 1;    argv += 1;    return open_mailstore(filename);}/*** _next_mail functions ***********************************************//* trivial function, returns true on first run, * returns false on all subsequent runs */static bool mail_next_mail(void){    bool val = mail_first;    mail_first = false;    return val;}/* always returns true on the first run * subsequent runs return true when a From line was encountered */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -