⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bogoconfig.c

📁 一个C语言写的快速贝叶斯垃圾邮件过滤工具
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: bogoconfig.c,v 1.246 2007/01/01 20:17:43 relson Exp $ *//*****************************************************************************NAME:   bogoconfig.c -- process config file parameters   2003-02-12 - split out from config.cAUTHOR:   David Relson <relson@osagesoftware.com>CONTRIBUTORS:   David Saez	-O option, helps embedding into Exim.******************************************************************************The call tree is (roughly):bogoconfig.c	  process_parametersbogoconfig.c	    process_arglist(PASS_1_CLI)bogoconfig.c	      process_arg(PASS_1_CLI)configfile.c	    process_config_filesconfigfile.c	      read_config_fileconfigfile.c	        process_config_optionconfigfile.c	          process_config_option_as_argbogoconfig.c	            process_arg(PASS_2_CFG)bogoconfig.c	    process_arglist(PASS_2_CLI)bogoconfig.c	      process_arg(PASS_3_CLI)Note: bogolexer also uses configfile.c.      bogolexer.c calls process_config_files(), which calls back to it.******************************************************************************/#include "common.h"#include <ctype.h>#include <stdlib.h>#include <string.h>#include <errno.h>#include "bogoconfig.h"#include "bogofilter.h"#include "bogoreader.h"#include "bool.h"#include "charset.h"#include "datastore.h"#include "datastore_db.h"#include "error.h"#include "find_home.h"#include "format.h"#include "lexer.h"#include "longoptions.h"#include "maint.h"#include "paths.h"#include "score.h"#include "wordlists.h"#include "wordlists_base.h"#include "xatox.h"#include "xmalloc.h"#include "xstrdup.h"#include "xstrlcpy.h"#ifndef	DEBUG_CONFIG#define DEBUG_CONFIG(level)	(verbose > level)#endif/*---------------------------------------------------------------------------*//* Global variables */char outfname[PATH_LEN] = "";bool  run_classify = false;bool  run_register = false;const char *logtag = NULL;/* Local variables and declarations */static int inv_terse_mode = 0;static void display_tag_array(const char *label, FIELD *array);static void process_arglist(int argc, char **argv, priority_t precedence, int pass);static bool get_parsed_value(char **arg, double *parm);static void comma_parse(char opt, const char *arg, double *parm1, double *parm2, double *parm3);/*---------------------------------------------------------------------------*/static struct option longopts_bogofilter[] = {    /* longoptions.h - common options */    LONGOPTIONS_COMMON    /* longoptions.h - bogofilter */    LONGOPTIONS_MAIN    /* longoptions.h - bogofilter/bogolexer options */    LONGOPTIONS_LEX    /* longoptions.h - bogofilter/bogoutil options */    LONGOPTIONS_DB    /* bogofilter specific options */    { "classify-files",			N, 0, 'B' },    { "syslog-tag",			N, 0, 'L' },    { "classify-mbox",			N, 0, 'M' },    { "unregister-nonspam",		N, 0, 'N' },    { "dataframe",			N, 0, 'R' },    { "unregister-spam",		N, 0, 'S' },    { "fixed-terse-format",		N, 0, 'T' },    { "report-unsure",			N, 0, 'U' },    { "classify-stdin",			N, 0, 'b' },    { "bogofilter-dir",			R, 0, 'd' },    { "nonspam-exits-zero",		N, 0, 'e' },    { "use-syslog",			N, 0, 'l' },    { "register-ham",			N, 0, 'n' },    { "passthrough",			N, 0, 'p' },    { "register-spam",			N, 0, 's' },    { "update-as-scored",		N, 0, 'u' },    { "debug-flags",			R, 0, 'x' },    { "debug-to-stdout",		N, 0, 'D' },    { "no-header-tags",			N, 0, 'H' },    { "query",				N, 0, 'Q' },    { "db-cachesize",			N, 0, 'k' },    { "ns-esf",				R, 0, O_NS_ESF },    { "sp-esf",				R, 0, O_SP_ESF },    { "ham-cutoff",			R, 0, O_HAM_CUTOFF },    { "header-format",			R, 0, O_HEADER_FORMAT },    { "log-header-format",		R, 0, O_LOG_HEADER_FORMAT },    { "log-update-format",		R, 0, O_LOG_UPDATE_FORMAT },    { "min-dev",			R, 0, O_MIN_DEV },    { "robs",				R, 0, O_ROBS },    { "robx",				R, 0, O_ROBX },    { "spam-cutoff",			R, 0, O_SPAM_CUTOFF },    { "spam-header-name",		R, 0, O_SPAM_HEADER_NAME },    { "spam-subject-tag",		R, 0, O_SPAM_SUBJECT_TAG },    { "spamicity-formats",		R, 0, O_SPAMICITY_FORMATS },    { "spamicity-tags",			R, 0, O_SPAMICITY_TAGS },    { "stats-in-header",		R, 0, O_STATS_IN_HEADER },    { "terse",				R, 0, O_TERSE },    { "terse-format",			R, 0, O_TERSE_FORMAT },    { "thresh-update",			R, 0, O_THRESH_UPDATE },    { "timestamp",			R, 0, O_TIMESTAMP },    { "unsure-subject-tag",		R, 0, O_UNSURE_SUBJECT_TAG },    { "wordlist",			R, 0, O_WORDLIST },    /* end of list */    { NULL,				0, 0, 0 }};/*---------------------------------------------------------------------------*/static bool get_bool(const char *name, const char *arg){    bool b = str_to_bool(arg);    if (DEBUG_CONFIG(2))	fprintf(dbgout, "%s -> %s\n", name,		b ? "Yes" : "No");    return b;}static bool get_double(const char *name, const char *arg, double *d){    remove_comment(arg);    if (!xatof(d, arg))	return false;    if (DEBUG_CONFIG(2))	fprintf(dbgout, "%s -> %f\n", name, *d);    return true;}static char *get_string(const char *name, const char *arg){    char *s = xstrdup(arg);    remove_comment(s);    if (DEBUG_CONFIG(2))	fprintf(dbgout, "%s -> '%s'\n", name, s);    return s;}static e_txn get_txn(const char *name, const char *arg){    e_txn t = get_bool(name, arg) ? T_ENABLED : T_DISABLED;    if (DEBUG_CONFIG(2))	fprintf(dbgout, "%s -> %s\n", name,		t ? "enabled" : "disabled");    return t;}void process_parameters(int argc, char **argv, bool warn_on_error){    bogotest = 0;    verbose = 0;    run_type = RUN_UNKNOWN;    fpin = stdin;    set_today();		/* compute current date for token age */#ifdef __EMX__    _response (&argc, &argv);	/* expand response files (@filename) */    _wildcard (&argc, &argv);	/* expand wildcards (*.*) */#endif    process_arglist(argc, argv, PR_COMMAND, PASS_1_CLI);    process_config_files(warn_on_error, longopts_bogofilter);    process_arglist(argc, argv, PR_COMMAND, PASS_3_CLI);    /* directories from command line and config file are already handled */    wordlists_set_bogohome();    stats_prefix= stats_in_header ? "  " : "# ";    return;}static bool get_parsed_value(char **arg, double *parm){    char *str = *arg;    bool ok = true;    if (parm && str && *str) {	if (*str == ',')	    str += 1;	else {	    ok = xatof(parm, str);	    str = strchr(str+1, ',');	    if (str)		str += 1;	}	*arg = str;    }    return ok;}void comma_parse(char opt, const char *arg, double *parm1, double *parm2, double *parm3){    char *parse = xstrdup(arg);    char *copy = parse;    bool ok = ( get_parsed_value(&copy, parm1) &&		get_parsed_value(&copy, parm2) &&		get_parsed_value(&copy, parm3) );    if (!ok)	fprintf(stderr, "Cannot parse -%c option argument '%s'.\n", opt, arg);    xfree(parse);}static run_t check_run_type(run_t add_type, run_t conflict){    if (run_type & conflict) {	(void)fprintf(stderr, "Error:  Invalid combination of options.\n");	exit(EX_ERROR);    }    return (run_type | add_type);}static int validate_args(void){/*  flags '-s', '-n', '-S', and '-N' are mutually exclusive with    flags '-p', '-u', '-e', and '-R'. */    run_classify = (run_type & (RUN_NORMAL | RUN_UPDATE)) != 0;    run_register = (run_type & (REG_SPAM | REG_GOOD | UNREG_SPAM | UNREG_GOOD)) != 0;    if (*outfname && !passthrough)    {	(void)fprintf(stderr,		      "Warning: Option -O %s has no effect without -p\n",		      outfname);    }        if (run_register && (run_classify || Rtable))    {	(void)fprintf(stderr,		      "Error:  Option '-u' may not be used with options '-s', '-n', '-S', or '-N'.\n"	    );	return EX_ERROR;    }    return EX_OK;}static const char *help_text[] = {    "help options:\n",    "  -h, --help                - print this help message.\n",    "  -V, --version             - print version information and exit.\n",    "  -Q, --query               - query (display) base bogofilter configuration.\n",    "  -QQ                       - display extended configuration info.\n",    "classification options:\n",    "  -p, --passthrough         - passthrough.\n",    "  -e, --ham-true            - in -p mode, exit with code 0 when the mail is not spam.\n",    "  -u, --update-as-scored    - score message as spam or non-spam and register accordingly.\n",    "  -M, --classify-mbox       - set mailbox mode.  Classify multiple messages in an mbox formatted file.\n",    "  -b, --classify-stdin      - set streaming bulk mode. Process multiple messages (files or directories) read from STDIN.\n",    "  -B, --classify-files=list - set bulk mode. Process multiple messages (files or directories) named on the command line.\n",    "  -R, --dataframe           - print an R data frame.\n",    "registration options:\n",    "  -s, --register-spam       - register message(s) as spam.\n",    "  -n, --register-ham        - register message(s) as non-spam.\n",    "  -S, --unregister-spam     - unregister message(s) from spam list.\n",    "  -N, --unregister-nonspam  - unregister message(s) from non-spam list.\n",    "general options:\n",    "  -c, --config-file=file    - read specified config file.\n",    "  -C, --no-config-file      - don't read standard config files.\n",    "  -d, --bogofilter-dir=path - specify directory for wordlists.\n",    "  -H, --no-header-tags      - disables header line tagging.\n",    "  -k, --db-cachesize=size   - set Berkeley DB cache size (MB).\n",    "  -l, --use-syslog          - write messages to syslog.\n",    "  -L, --syslog-tag=tag      - specify the tag value for log messages.\n",    "  -I, --input-file=file     - read message from 'file' instead of stdin.\n",    "  -O, --output-file=file    - save message to 'file' in passthrough mode.\n",    "parameter options:\n",    "  -mv1[,v2[,v3]]            - set user defined min_dev, robs, and robx values.\n",    "      --min-dev=v1, --robs=v2, --robx=v3\n",    "  -ov1[,v2]                 - set user defined spam and non-spam cutoff values.\n",    "      --spam-cutoff=v1, --ham-cutoff=v2\n",    "info options:\n",    "  -t, --terse               - set terse output mode.\n",    "  -T, --fixed-terse-format  - set invariant terse output mode.\n",    "  -q, --quiet               - suppress token statistics.\n",    "  -U, --report-unsure       - print statistics if spamicity is 'unsure'.\n",    "  -v, --verbosity           - set debug verbosity level.\n",    "  -y, --timestamp-date      - set date for token timestamps.\n",    "  -D, --debug-to-stdout     - direct debug output to stdout.\n",    "  -x, --debug-flags=list    - set flags to display debug information.\n",    "config file options:\n",    "  --option=value - can be used to set the value of a config file option.\n",    "                   see bogofilter.cf.example for more info.\n",    "  --block-on-subnets                return class addr tokens\n",    "  --bogofilter-dir                  directory for wordlists\n",    "  --charset-default                 default character set\n",    "  --db-cachesize                    Berkeley db cache in Mb\n",#ifdef	HAVE_DECL_DB_CREATE    "  --db-log-autoremove               enable/disable autoremoval of log files\n",    "  --db-transaction                  enable/disable transactions\n", #ifdef	FUTURE_DB_OPTIONS    "  --db-txn-durable                                 \n", #endif#endif    "  --ham-cutoff                      nonspam if score below this\n",    "  --header-format                   spam header format\n",    "  --log-header-format               header written to log\n",    "  --log-update-format               logged on update\n",    "  --min-dev                         ignore if score near\n",    "  --min-token-len                   min len for single tokens\n",    "  --max-token-len                   max len for single tokens\n",    "  --max-multi-token-len             max len for multi-word tokens\n",    "  --multi-token-count               number of tokens per multi-word token\n",    "  --ns-esf                          effective size factor for ham\n",    "  --replace-nonascii-characters     substitute '?' if bit 8 is 1\n",    "  --robs                            Robinson's s parameter\n",    "  --robx                            Robinson's x parameter\n",    "  --sp-esf                          effective size factor for spam\n",    "  --spam-cutoff                     spam if score above this\n",    "  --spam-header-name                passthrough adds/replaces\n",    "  --spam-subject-tag                passthrough prepends Subject\n",    "  --spamicity-formats               spamicity output format\n",    "  --spamicity-tags                  spamicity tag format\n",    "  --stats-in-header                 use header not body\n",    "  --terse                           report in short form\n",    "  --terse-format                    short form\n",    "  --thresh-update                   no update if near 0 or 1\n",    "  --timestamp                       enable/disable token timestamps\n",#ifndef	DISABLE_UNICODE    "  --unicode                         enable/disable unicode based wordlist\n",#endif    "  --unsure-subject-tag              like spam-subject-tag\n",    "  --user-config-file                configuration file\n",    "  --wordlist                        specify wordlist parameters\n",    "\n",    "bogofilter is a tool for classifying email as spam or non-spam.\n",    "\n",    "For updates and additional information, see\n",    "URL: http://bogofilter.sourceforge.net\n",    NULL};static void help(FILE *fp){    uint i;    (void)fprintf(fp,                  "%s version %s\n"                  "\n"                  "Usage:  %s [options] < message\n"                  "\n",                  progtype, version, PACKAGE	);    for (i=0; help_text[i] != NULL; i++)	(void)fprintf(fp, "%s", help_text[i]);}static void print_version(void){    (void)fprintf(stdout,		  "%s version %s\n"		  "    Database: %s\n"		  "Copyright (C) 2002-2007 David Relson, Matthias Andree\n"		  "Copyright (C) 2002-2004 Greg Louis\n"		  "Copyright (C) 2002-2003 Eric S. Raymond, Adrian Otto, Gyepi Sam\n\n"		  "%s comes with ABSOLUTELY NO WARRANTY.  "		  "This is free software, and\nyou are welcome to "		  "redistribute it under the General Public License.  "		  "See\nthe COPYING file with the source distribution for "		  "details.\n"		  "\n",		  progtype, version, ds_version_str(), PACKAGE);}#define	OPTIONS	":-:bBc:Cd:DehHI:k:lL:m:MnNo:O:pPqQRsStTuUvVx:X:y:"/** These functions process command line arguments. ** ** They are called to perform passes 1 & 2 of command line switch processing. ** The config file is read in between the two function calls. **

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -