📄 token.c
字号:
/* $Id: token.c,v 1.155 2007/01/12 00:20:16 relson Exp $ *//*****************************************************************************NAME: token.c -- post-lexer token processing 12/08/02 - split out from lexer.lAUTHOR: David Relson <relson@osagesoftware.com>******************************************************************************/#include "common.h"#include <assert.h>#include <ctype.h>#include <stdlib.h>#include "bogoreader.h"#include "charset.h"#include "error.h"#include "mime.h"#include "msgcounts.h"#include "word.h"#include "token.h"#include "xmemrchr.h"#define MSG_COUNT_PADDING 2 * 10 /* space for 2 10-digit numbers *//* Local Variables */word_t *msg_addr; /* First IP Address in Received: statement */word_t *msg_id; /* Message ID */word_t *queue_id; /* Message's first queue ID */static token_t save_class = NONE;static word_t *ipsave;static byte *yylval_text;static size_t yylval_text_size;static word_t yylval;static word_t *w_to = NULL; /* To: */static word_t *w_from = NULL; /* From: */static word_t *w_rtrn = NULL; /* Return-Path: */static word_t *w_subj = NULL; /* Subject: */static word_t *w_recv = NULL; /* Received: */static word_t *w_head = NULL; /* Header: */static word_t *w_mime = NULL; /* Mime: */static word_t *w_ip = NULL; /* ip: */static word_t *w_url = NULL; /* url: *//* Global Variables */bool block_on_subnets = false;static word_t *token_prefix = NULL;static uint32_t token_prefix_len;#define NONBLANK "spc:invalid_end_of_header"static word_t *nonblank_line = NULL;static uint tok_count = 0;static uint init_token = 1;static word_t *p_multi_words = NULL;static byte *p_multi_buff = NULL;static byte *p_multi_text = NULL;static word_t **w_token_array = NULL;/* Function Prototypes */static void token_clear(void);static token_t parse_new_token(word_t *token);static void add_token_to_array(word_t *token);static void build_token_from_array(word_t *token);static uint token_copy_leng(const char *str, uint leng, byte *dest);/* Function Definitions */static void init_token_array(void){ uint i; byte *text; word_t *words; p_multi_words = calloc( max_token_len, sizeof(word_t) ); p_multi_buff = malloc( max_multi_token_len+D ); p_multi_text = calloc( max_token_len+1+D, multi_token_count ); w_token_array = calloc( multi_token_count, sizeof(*w_token_array) ); text = p_multi_text; words = p_multi_words; for (i = 0; i < multi_token_count; i += 1) { words->leng = 0; words->text = text; w_token_array[i] = words; words += 1; text += max_token_len+1+D; }}static void free_token_array(void){ free(p_multi_words); free(p_multi_text ); free(w_token_array);}static void token_set( word_t *token, byte *text, uint leng ){ token->leng = leng; memcpy(token->text, text, leng); /* include nul terminator */ token->text[leng] = '\0'; /* ensure nul termination */}static inline void token_copy( word_t *dst, word_t *src ){ token_set(dst, src->text, src->leng);}static void build_prefixed_token( word_t *prefix, word_t *token, word_t *temp, uint32_t temp_size ){ uint len = token->leng + prefix->leng; if (len >= temp_size) len = temp_size - prefix->leng - 1; temp->leng = len; memmove(temp->text+prefix->leng, token->text, len-prefix->leng); memcpy(temp->text, prefix->text, prefix->leng); Z(temp->text[temp->leng]); token->leng = temp->leng; token->text = temp->text;}#define WRAP(n) ((n) % multi_token_count)token_t get_token(word_t *token){ token_t cls; bool fSingle = (tok_count < 2 || tok_count <= init_token || multi_token_count <= init_token); if (fSingle) { cls = parse_new_token(token); if (multi_token_count > 1) add_token_to_array(token); } else { cls = TOKEN; build_token_from_array(token); } if (token_prefix != NULL) { /* IP addresses get special prefix */ if (save_class != IPADDR) { build_prefixed_token(token_prefix, token, &yylval, yylval_text_size); } else { word_t *prefix = (wordlist_version >= IP_PREFIX) ? w_ip : w_url; build_prefixed_token(prefix, token, &yylval, yylval_text_size); } /* if excessive length caused by prefix, get another token */ if (fSingle && token->leng > max_token_len) cls = get_token(token); } return cls;}token_t parse_new_token(word_t *token){ token_t cls = NONE; unsigned char *cp; bool done = false; /* If saved IPADDR, truncate last octet */ if ( block_on_subnets && save_class == IPADDR ) { byte *t = xmemrchr(ipsave->text, '.', ipsave->leng); if (t == NULL) save_class = NONE; else { ipsave->leng = (uint) (t - ipsave->text); token_set( token, ipsave->text, ipsave->leng); cls = save_class; done = true; } } while (!done) { uint leng; byte *text; cls = (*lexer->yylex)(); token->leng = (uint) *lexer->yyleng; token->text = (byte *) *lexer->yytext; Z(token->text[token->leng]); /* for easier debugging - removable */ leng = token->leng; text = token->text; if (DEBUG_TEXT(2)) { word_puts(token, 0, dbgout); fputc('\n', dbgout); } if (cls == NONE) /* End of message */ break; switch (cls) { case EOH: /* end of header - bogus if not empty */ if (leng > max_token_len) continue; if (msg_state->mime_type == MIME_MESSAGE) mime_add_child(msg_state); if (leng == 2) continue; else { /* "spc:invalid_end_of_header" */ token_copy( &yylval, nonblank_line); done = true; } break; case BOUNDARY: /* don't return boundary tokens to the user */ continue; case VERP: /* Variable Envelope Return Path */ { byte *st = (byte *)text; byte *in; byte *fst = NULL; byte *lst = NULL; for (in = st; *in != '\0'; in += 1) { if (*in == '-') { if (fst == NULL) fst = in; lst = in; } } if (fst != NULL && lst != NULL && lst - fst > 3) { byte *ot = fst; *ot++ = '-'; *ot++ = '#'; for (in = lst; *in != '\0'; in += 1, ot += 1) *ot = *in; token->leng = leng = (uint) (ot - st); } Z(token->text[token->leng]); /* for easier debugging - removable */ } break; case HEADKEY: { if (!header_line_markup || *text == '\0') continue; else { const char *delim = strchr((const char *)text, ':'); leng = (uint) (delim - (const char *)text); if (leng > max_token_len) continue; token_set( &yylval, text, leng); } } /*@fallthrough@*/ case TOKEN: /* ignore anything when not reading text MIME types */ if (leng < min_token_len) continue; case MONEY: /* 2 character money is OK */ if (leng > max_token_len) continue; token->text = text; token->leng = leng; if (token_prefix == NULL) { switch (msg_state->mime_type) { case MIME_TEXT: case MIME_TEXT_HTML: case MIME_TEXT_PLAIN: case MIME_MULTIPART: break; case MIME_MESSAGE: case MIME_APPLICATION: case MIME_IMAGE: continue; default: continue; } } break; case MESSAGE_ID: /* special token; saved for formatted output, but not returned to bogofilter */ /** \bug: the parser MUST be aligned with lexer_v3.l! */ if (leng < max_token_len) { while (!isspace(text[0])) { text += 1; leng -= 1; } while (isspace(text[0])) { text += 1; leng -= 1; } token_set( msg_id, text, leng); } continue; case QUEUE_ID: /* special token; saved for formatted output, but not returned to bogofilter */ /** \bug: the parser MUST be aligned with lexer_v3.l! */ if (*queue_id->text == '\0' && leng < max_token_len ) { while (isspace(text[0])) { text += 1; leng -= 1; } if (memcmp(text, "id", 2) == 0) { text += 2; leng -= 2; } while (isspace(text[0])) { text += 1; leng -= 1; } if (text[0] == '<') { text += 1; leng -= 1; } if (text[leng-1] == '>') { leng -= 1; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -