📄 token.c

📁 一个C语言写的快速贝叶斯垃圾邮件过滤工具
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
		leng = min(queue_id->leng, leng);		memcpy( queue_id->text, text, leng );		Z(queue_id->text[leng]);	    }	    continue;	case MESSAGE_ADDR:	{	    /* trim brackets */	    text += 1;	    leng -= 2;	    Z(text[leng]);	/* for easier debugging - removable */	    token_set( &yylval, text, leng);	    /* if top level, no address, not localhost, .... */	    if (token_prefix == w_recv &&		msg_state->parent == NULL && 		*msg_addr->text == '\0' &&		strcmp((char *)text, "127.0.0.1") != 0)	    {		/* Not guaranteed to be the originating address of the message. */		memcpy( msg_addr->text, yylval.text, min(msg_addr->leng, yylval.leng)+D );		Z(msg_addr->text[yylval.leng]);	    }	}	/*@fallthrough@*/	case IPADDR:	    if (block_on_subnets)	    {		int q1, q2, q3, q4;		/*		 * Trick collected by ESR in real time during John		 * Graham-Cummings's talk at Paul Graham's spam conference		 * in January 2003...  Some spammers know that people are		 * doing recognition on spamhaus IP addresses.  They use		 * the fact that HTML clients normally interpret IP addresses		 * by doing a simple accumulate-and-shift algorithm; they		 * add large random multiples of 256 to the quads to		 * mask their origin.  Nuke the high bits to unmask the		 * address.		 */		if (sscanf((const char *)text, "%d.%d.%d.%d", &q1, &q2, &q3, &q4) == 4)		    /* safe because result string guaranteed to be shorter */		    sprintf((char *)text, "%d.%d.%d.%d",			    q1 & 0xff, q2 & 0xff, q3 & 0xff, q4 & 0xff);		leng = strlen((const char *)text);		token->text = text;		token->leng = leng;		token_copy( ipsave, token );		save_class = IPADDR;		return (cls);	    }	    token->text = text;	    token->leng = leng;	    break;	case NONE:		/* nothing to do */	    break;	case MSG_COUNT_LINE:	    msg_count_file = true;	    multi_token_count = 1;	    header_line_markup = false;	    token_prefix = NULL;	    lexer = &msg_count_lexer;	    if (mbox_mode) {		/* Allows processing multiple messages, **		** but only a single file.              */		reader_more = msgcount_more;	    }	    continue;	case BOGO_LEX_LINE:	    token_set( &yylval, text, leng);	    done = true;	    break;	}	if (DEBUG_TEXT(1)) {	    word_puts(&yylval, 0, dbgout);	    fputc('\n', dbgout);	}	/* eat all long words */	if (token->leng <= max_token_len)	    done = true;    }   if (!msg_count_file) {	/* Remove trailing blanks */	/* From "From ", for example */	while (token->leng > 1 && token->text[token->leng-1] == ' ') {	    token->leng -= 1;	    token->text[token->leng] = (byte) '\0';	}	/* Remove trailing colon */	if (token->leng > 1 && token->text[token->leng-1] == ':') {	    token->leng -= 1;	    token->text[token->leng] = (byte) '\0';	}	if (replace_nonascii_characters) {	    /* replace nonascii characters by '?'s */	    for (cp = token->text; cp < token->text+token->leng; cp += 1)		*cp = casefold_table[*cp];	}    }    return(cls);}/* save token in token array */static void add_token_to_array(word_t *token){    word_t *w = w_token_array[WRAP(tok_count)];    w->leng = token->leng;    memcpy(w->text, token->text, w->leng);    Z(w->text[w->leng]);	/* for easier debugging - removable */    if (DEBUG_MULTI(1))	fprintf(stderr, "%s:%d  %2s  %2d %2d %p %s\n", __FILE__, __LINE__,		"", tok_count, w->leng, w->text, w->text);    tok_count += 1;    init_token = 1;    return;}static void build_token_from_array(word_t *token){    int tok;    const char *sep = "";    uint  leng;    byte *dest;    leng = init_token;    for ( tok = init_token; tok >= 0; tok -= 1 ) {	uint idx = tok_count - 1 - tok;	leng += strlen((char *) w_token_array[WRAP(idx)]->text);    }    if (leng > max_multi_token_len)	leng = max_multi_token_len;    token->leng = leng;    token->text = dest = p_multi_buff;    for ( tok = init_token; tok >= 0; tok -= 1 ) {	uint  idx = tok_count - 1 - tok;	uint  len = w_token_array[WRAP(idx)]->leng;	byte *str = w_token_array[WRAP(idx)]->text;	if (DEBUG_MULTI(1))	    fprintf(stderr, "%s:%d  %2d  %2d %2d %p %s\n", __FILE__, __LINE__,		    idx, tok_count, len, str, str);		len = token_copy_leng((const char *)sep, leng, dest);	leng -= len;	dest += len;	len = token_copy_leng((const char *)str, leng, dest);	leng -= len;	dest += len;	sep = "*";    }    Z(token->text[token->leng]);	/* for easier debugging - removable */    init_token += 1;			/* progress to next multi-token */    return;}static uint token_copy_leng(const char *str, uint leng, byte *dest){    uint len = strlen(str);    if (leng < len)	len  = leng;    if (len != 0)	memcpy(dest, str, len);    return (uint) len;}void token_init(void){    static bool fTokenInit = false;    yyinit();    if ( fTokenInit) {	token_clear();    }    else {	fTokenInit = true;	if (max_multi_token_len == 0)	    max_multi_token_len = (max_token_len+1) * multi_token_count + MAX_PREFIX_LEN;	yylval_text_size = max_multi_token_len + MSG_COUNT_PADDING;	yylval_text = (byte *) malloc( yylval_text_size+D );	yylval.leng   = 0;	yylval.text   = yylval_text;	/* First IP Address in Received: statement */	msg_addr = word_new( NULL, max_token_len );	/* Message ID */	msg_id = word_new( NULL, max_token_len * 3 );	/* Message's first queue ID */	queue_id = word_new( NULL, max_token_len );	ipsave = word_new( NULL, max_token_len );	/* word_new() used to avoid compiler complaints */	w_to   = word_news("to:");	/* To:          */	w_from = word_news("from:");	/* From:        */	w_rtrn = word_news("rtrn:");	/* Return-Path: */	w_subj = word_news("subj:");	/* Subject:     */	w_recv = word_news("rcvd:");	/* Received:    */	w_head = word_news("head:");	/* Header:      */	w_mime = word_news("mime:");	/* Mime:        */	w_ip   = word_news("ip:");	/* ip:          */	w_url  = word_news("url:");	/* url:         */	nonblank_line = word_news(NONBLANK);	/* do multi-word token initializations */	init_token_array();    }    return;}void clr_tag(void){    token_prefix = NULL;    tok_count = 0;}void set_tag(const char *text){    word_t *old_prefix = token_prefix;    if (!header_line_markup)	return;    if (msg_state->parent != NULL &&	msg_state->parent->mime_type == MIME_MESSAGE) {	clr_tag();			/* don't tag if inside message/rfc822 */	return;    }    switch (tolower(*text)) {    case 'c':				/* CC: */    case 't':	token_prefix = w_to;		/* To: */	break;    case 'f':	token_prefix = w_from;		/* From: */	break;    case 'h':	if (msg_state->parent == NULL)	    token_prefix = w_head;	/* Header: */	else	    token_prefix = w_mime;	/* Mime:   */	break;    case 'r':	if (tolower(text[2]) == 't')	    token_prefix = w_rtrn;	/* Return-Path: */	else	    token_prefix = w_recv;	/* Received: */	break;    case 's':	token_prefix = w_subj;		/* Subject: */	break;    default:	fprintf(stderr, "%s:%d  invalid tag - '%s'\n",		__FILE__, __LINE__,		text);	exit(EX_ERROR);    }    token_prefix_len = token_prefix->leng;    assert(token_prefix_len <= MAX_PREFIX_LEN);    if (DEBUG_LEXER(2)) {	fprintf(dbgout,"--- set_tag(%s) -> prefix=", text);	if (token_prefix)	    word_puts(token_prefix, 0, dbgout);	fputc('\n', dbgout);    }    /* discard tokens when prefix changes */    if (old_prefix != NULL && old_prefix != token_prefix)	tok_count = 0;    return;}void set_msg_id(byte *text, uint leng){    (void) leng;		/* suppress compiler warning */    token_set( msg_id, text, msg_id->leng );}#define WFREE(n)	word_free(n); n = NULL/* Cleanup storage allocation */void token_cleanup(){    WFREE(w_to);    WFREE(w_from);    WFREE(w_rtrn);    WFREE(w_subj);    WFREE(w_recv);    WFREE(w_head);    WFREE(w_mime);    WFREE(w_ip);    WFREE(w_url);    WFREE(nonblank_line);    token_clear();    /* do multi-word token cleanup */    free_token_array();}void token_clear(){    if (msg_addr != NULL)    {	*msg_addr->text = '\0';	*msg_id->text   = '\0';	*queue_id->text = '\0';    }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -