📄 bogoutil.c

📁 一个C语言写的快速贝叶斯垃圾邮件过滤工具
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
    "  -V, --version               - print version information and exit.\n",    "\n",    "  -d, --dump=file             - dump data from file to stdout.\n",    "  -l, --load=file             - load data from stdin into file.\n",    "  -u, --upgrade=file          - upgrade wordlist version.\n",    "\n",    "info options:\n",    "  -w file                     - display counts for words from stdin.\n",    "  -p file                     - display word counts and probabilities.\n",    "  -I, --input-file=file       - read 'file' instead of standard input.\n",    "  -H file                     - display histogram and statistics for wordlist.\n",    "                                - use with -v  to exclude hapaxes.\n",    "                                - use with -vv to exclude pure spam/ham.\n",    "  -r file                     - compute Robinson's X for the specified file.\n",    "  -R file                     - compute Robinson's X and save it in wordlist.\n",    "\n",    "database maintenance, the \"-m file\" option is required in this group:\n",    "  -m file                     - enable maintenance works (expiring tokens).\n",    "  -n                          - replace non-ascii characters with '?'.\n",    "  -a age                      - exclude tokens with older ages.\n",    "  -c cnt                      - exclude tokens with lower counts.\n",    "  -s l,h                      - exclude tokens with lengths between 'l' and 'h'\n"    "                                (low and high).\n",#ifndef	DISABLE_UNICODE    "  --unicode=yes/no            - convert wordlist to/from unicode\n",#endif    "\n",    "token parsing options:\n",    "  --min-token-len             - min len for single tokens\n",    "  --max-token-len             - max len for single tokens\n",    "  --max-multi-token-len       - max len for multi-word tokens\n",    "  --multi-token-count         - number of tokens per multi-word token\n",    "\n",    NULL    };static void help(FILE *fp){    uint i;    const char **messages;    usage(fp);    messages = help_text;    for (i=0; messages[i] != NULL; i++)	(void)fprintf(fp, "%s", messages[i]);    messages = dsm_help_bogoutil();    for (i=0; messages[i] != NULL; i++)	(void)fprintf(fp, "%s", messages[i]);    (void)fprintf(fp,		  "%s (version %s) is part of the bogofilter package.\n",                  progname, version	);}static const char *ds_file = NULL;static bool  prob = false;static cmd_t flag = M_NONE;static struct option longopts_bogoutil[] = {    /* longoptions.h - common options */    LONGOPTIONS_COMMON    /* longoptions.h - options for bogofilter and bogoutil */    LONGOPTIONS_DB    /* longoptions.h - options for bogolexer and bogoutil */    LONGOPTIONS_LEX_UTIL    /* bogoutil specific options */    { "db-prune",                       R, 0, O_DB_PRUNE },    { "db-checkpoint",                  R, 0, O_DB_CHECKPOINT },    { "db-list-logfiles",               R, 0, O_DB_LIST_LOGFILES },    { "db-print-leafpage-count",	R, 0, O_DB_PRINT_LEAFPAGE_COUNT },    { "db-print-pagesize",		R, 0, O_DB_PRINT_PAGESIZE },    { "db-recover",                     R, 0, O_DB_RECOVER },    { "db-recover-harder",              R, 0, O_DB_RECOVER_HARDER },    { "db-remove-environment",		R, 0, O_DB_REMOVE_ENVIRONMENT },    { "db-verify",                      R, 0, O_DB_VERIFY },    /* end of list */    { NULL,				0, 0, 0 }};#define	OPTIONS	":a:c:Cd:DhH:I:k:l:m:nO:p:r:R:s:u:vVw:x:X:y:"static int process_arglist(int argc, char **argv){    int option;    int count = 0;    fpin = stdin;    fpo  = stdout;    dbgout = stderr;#ifdef __EMX__    _response (&argc, &argv);	/* expand response files (@filename) */    _wildcard (&argc, &argv);	/* expand wildcards (*.*) */#endif    /* default: no token length checking */    max_token_len = 0;    while (1)    {	int option_index = 0;	int this_option_optind = optind ? optind : 1;	const char *name;	option = getopt_long(argc, argv, OPTIONS,			     longopts_bogoutil, &option_index);	if (option == -1) 	    break;	name = (option_index == 0) ? argv[this_option_optind] : longopts_bogoutil[option_index].name;	count += process_arg(option, name, optarg);    }    if (max_token_len != 0 && max_multi_token_len == 0) {	/* token length checking ... */	if (multi_token_count == 1)	    max_multi_token_len = max_token_len + MAX_PREFIX_LEN;	else	    max_multi_token_len = max_token_len = (max_token_len+1) * multi_token_count + MAX_PREFIX_LEN;    }    if (count != 1)    {	usage(stderr);	fprintf(stderr, "%s: Exactly one of the file or directory commands must be present.\n", progname);	exit(EX_ERROR);    }    return count;}static int process_arg(int option, const char *name, const char *val){    int count = 0;    switch (option) {    case '?':	fprintf(stderr, "Unknown option '%s'.\n", name);	break;    case 'd':	flag = M_DUMP;	count += 1;	ds_file = val;	break;    case O_CONFIG_FILE:	read_config_file(val, false, false, PR_COMMAND, longopts_bogoutil);	/*@fallthrough@*/	/* fall through to suppress reading config files */    case 'C':	suppress_config_file = true;	break;    case 'k':	db_cachesize=(uint) atoi(val);	break;    case 'l':	flag = M_LOAD;	count += 1;	ds_file = val;	break;    case 'm':	flag = M_MAINTAIN;	count += 1;	ds_file = val;	break;    case 'p':	prob = true;	/*@fallthrough@*/    case 'w':	flag = M_WORD;	count += 1;	ds_file = val;	break;    case O_DB_PRINT_LEAFPAGE_COUNT:	flag = M_LEAFPAGES;	count += 1;	ds_file = val;	break;    case O_DB_PRINT_PAGESIZE:	flag = M_PAGESIZE;	count += 1;	ds_file = val;	break;    case 'r':	onlyprint = true;    case 'R':	flag = M_ROBX;	count += 1;	ds_file = val;	break;    case 'u':	upgrade_wordlist_version = true;	flag = M_MAINTAIN;	count += 1;	ds_file = val;	break;    case 'v':	verbose++;	break;    case ':':	fprintf(stderr, "Option %s requires an argument.\n", name);	exit(EX_ERROR);    case 'h':	help(stdout);	exit(EX_OK);    case 'H':	flag = M_HIST;	count += 1;	ds_file = val;	break;    case 'V':	print_version();	exit(EX_OK);    case 'x':	set_debug_mask(val);	break;    case 'X':	set_bogotest(val);	break;    case 'a':	maintain = true;	thresh_date = string_to_date(val);	break;    case 'c':	maintain = true;	thresh_count = (uint) atoi(val);	break;    case 's':    {	unsigned long mi, ma;	maintain = true;	    	if (2 == sscanf(val, "%lu,%lu", &mi, &ma)) {	    size_min = mi;	    size_max = ma;	} else {	    fprintf(stderr, "syntax error in argument \"%s\" of -s\n.",		    val);	    exit(EX_ERROR);	}    }    break;    case 'n':	maintain = true;	replace_nonascii_characters ^= true;	break;    case 'y':		/* date as YYYYMMDD */    {	YYYYMMDD date = string_to_date(val);	maintain = true;	if (date != 0 && date < 19990000) {	    fprintf(stderr, "Date format for '-y' option is YYYYMMDD\n");	    exit(EX_ERROR);	}	set_date( date );	break;    }    case 'I':	fpin = fopen( val, "r" );	if (fpin == NULL) {	    fprintf(stderr, "Can't read file '%s'\n", val);	    exit(EX_ERROR);	}	break;    case 'O':	fpo = fopen(val, "wt");	if (fpo == NULL) {	    fprintf(stderr, "Can't write file '%s'\n", val);	    exit(EX_ERROR);	}	break;    case 'D':	dbgout = stdout;	break;    case O_DB_VERIFY:	flag = M_VERIFY;	count += 1;	ds_file = val;	break;    case O_UNICODE:	encoding = str_to_bool(val) ? E_UNICODE : E_RAW;	break;    case O_MAX_TOKEN_LEN:	max_token_len = atoi(optarg);	break;    case O_MIN_TOKEN_LEN:	min_token_len = atoi(optarg);	break;    case O_MAX_MULTI_TOKEN_LEN:	max_multi_token_len=atoi(optarg);	break;    case O_MULTI_TOKEN_COUNT:	multi_token_count=atoi(optarg);	break;    default:	if (!dsm_options_bogoutil(option, &flag, &count, &ds_file, name, val)) {	    fprintf(stderr, "Invalid option '%s'\n", name);	    exit(EX_ERROR);	}    }    return count;}static bfpath_mode get_mode(cmd_t cmd){    bfpath_mode mode = BFP_ERROR;    switch (cmd) {    case M_LOAD:	mode = BFP_MAY_CREATE;	break;    case M_DUMP:    case M_HIST:    case M_MAINTAIN:    case M_ROBX:    case M_VERIFY:    case M_WORD:    case M_CHECKPOINT:	/* database transaction/integrity operations */    case M_CRECOVER:    case M_LEAFPAGES:    case M_PAGESIZE:    case M_PURGELOGS:    case M_RECOVER:    case M_REMOVEENV:    case M_LIST_LOGFILES:	mode = BFP_MUST_EXIST;	break;    case M_NONE:	usage(stderr);	exit(EX_ERROR);    }    return mode;}int main(int argc, char *argv[]){    ex_t rc = EX_OK;    bfpath *bfp;    bfpath_mode mode;    fBogoutil = true;    signal_setup();			/* setup to catch signals */    atexit(bf_exit);    progtype = build_progtype(progname, DB_TYPE);    set_today();			/* compute current date for token age */    process_arglist(argc, argv);    process_config_files(false, longopts_bogoutil);	/* need to read lock sizes */    /* Extra or missing parameters */    if (flag != M_WORD && flag != M_LIST_LOGFILES && argc != optind) {	fprintf(stderr, "Missing or extraneous argument.\n");	usage(stderr);	exit(EX_ERROR);    }    bfp = bfpath_create(ds_file);    if (bogohome == NULL)	set_bogohome( "." );		/* set default */    bfpath_set_bogohome(bfp);    mode = get_mode(flag);    if (bfpath_check_mode(bfp, mode)) {	if (bfp->isdir)	    bfpath_set_filename(bfp, WORDLIST);    }    if (!bfpath_check_mode(bfp, mode)) {	fprintf(stderr, "Can't open wordlist '%s'\n", bfp->filepath);	exit(EX_ERROR);    }    errno = 0;		/* clear error status */    switch (flag) {	case M_RECOVER:	    ds_init(bfp);	    rc = ds_recover(bfp, false);	    break;	case M_CRECOVER:	    ds_init(bfp);	    rc = ds_recover(bfp, true);	    break;	case M_CHECKPOINT:	    ds_init(bfp);	    rc = ds_checkpoint(bfp);	    break;	case M_LIST_LOGFILES:	    dsm_init(bfp);	    rc = ds_list_logfiles(bfp, argc - optind, argv + optind);	    break;	case M_PURGELOGS:	    ds_init(bfp);	    rc = ds_purgelogs(bfp);	    break;	case M_REMOVEENV:	    dsm_init(bfp);	    rc = ds_remove(bfp);	    break;	case M_VERIFY:	    dsm_init(bfp);	    rc = ds_verify(bfp);	    break;	case M_LEAFPAGES:	    {		u_int32_t c;		dsm_init(bfp);		c = ds_leafpages(bfp);		if (c == 0xffffffff) {		    fprintf(stderr, "%s: error getting leaf page count.\n", ds_file);		    rc = EX_ERROR;		} else if (c == 0) {		    puts("UNKNOWN");		} else {		    printf("%lu\n", (unsigned long)c);		}	    }	    break;	case M_PAGESIZE:	    {		u_int32_t s;		dsm_init(bfp);		s = ds_pagesize(bfp);		if (s == 0xffffffff) {		    fprintf(stderr, "%s: error getting page size.\n", ds_file);		} else if (s == 0) {		    puts("UNKNOWN");		} else {		    printf("%lu\n", (unsigned long)s);		}	    }	    break;	case M_DUMP:	    rc = dump_wordlist(bfp);	    break;	case M_LOAD:	    rc = load_wordlist(bfp);	    break;	case M_MAINTAIN:	    maintain = true;	    rc = maintain_wordlist_file(bfp);	    break;	case M_WORD:	    argc -= optind;	    argv += optind;	    rc = display_words(bfp, argc, argv, prob);	    break;	case M_HIST:	    rc = histogram(bfp);	    break;	case M_ROBX:	    rc = get_robx(bfp);	    break;	case M_NONE:	default:	    /* should have been handled above */	    abort();	    break;    }    bfpath_free(bfp);    return rc;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -