📄 rfc822.c

📁 使用具有增量学习的监控式学习方法。包括几个不同的分类算法。
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*  $Header: /cvs/src/mairix/rfc822.c,v 1.3 2002/07/28 23:18:16 richard Exp $  mairix - message index builder and finder for maildir folders. ********************************************************************** * Copyright (C) Richard P. Curnow  2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * ********************************************************************** *//** * @file * Parse mail messages according to RFC822 and MIME. * This code is based on code written by Richard P. Curnow for mairix. * * @author  Richard P. Curnow (original author)\n *          Mikael Ylikoski * @date    2002 */#include <ctype.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <time.h>#include "rfc822.h"#include "utility.h"enum content_type {    CT_TEXT_PLAIN,    CT_TEXT_HTML,    CT_TEXT_OTHER,    CT_MESSAGE_RFC822,    CT_OTHER};enum encoding_type {    ENC_UNKNOWN,    ENC_NONE,    ENC_BINARY,    ENC_7BIT,    ENC_8BIT,    ENC_QUOTED_PRINTABLE,    ENC_BASE64};struct attachment {    struct attachment *next;    struct attachment *prev;    enum content_type ct;    union attachment_body {	struct normal_attachment_body {	    int len;	    char *bytes;	    char *charset;	} normal;	struct rfc822 *rfc822;    } data;};struct headers {    const char *to;    const char *cc;    const char *from;    const char *subject;    char *from_name;		/**< Name part of from header */    char *from_address;		/**< Address part of from header */    /* The following are needed to support threading */    const char *message_id;    const char *in_reply_to;    const char *references;    time_t date;};struct rfc822 {    struct headers hdrs;	/**< Headers */    struct attachment atts;	/**< Attachments */    char *data;			/**< Source data, not really used */    text_part *parts;		/**< Linked list of text parts */};struct content_type_header {    char *major;	/**< e.g. text */    char *minor;	/**< e.g. plain */    char *boundary;	/**< For multipart */    char *charset;	/**< Character set */};/** * Double linked list of text lines. */struct line {    struct line *next;    struct line *prev;    char *text;};/** * Prototype for double linked list element. */struct DLL {    struct DLL *next;    struct DLL *prev;};/** * Compare two strings ignoring case. * Compares only as many characters as there are in the first string. * * @param s1  string to compare with; must be in lowercase * @param s2  string to compare * @return Nonzero if equal, or zero otherwise */static intmy_strncasematch (const char *s1, const char *s2) {    int i, j;    for (i = 0; s1[i]; i++) {	if (s2[i] >= 'A' && s2[i] <= 'Z')	    j = s2[i] + 'a' - 'A';	else	    j = s2[i];	if (j != s1[i])	    return 0;    }    return 1;}/** * Compare two strings ignoring case. * Both strings must have same length to be considered equal. * * @param s1  string to compare with; must be in lowercase * @param s2  string to compare * @return Nonzero if equal, or zero otherwise */static intmy_strcasematch (const char *s1, const char *s2) {    int i, j;    for (i = 0; s1[i] || s2[i]; i++) {	if (s2[i] >= 'A' && s2[i] <= 'Z')	    j = s2[i] + 'a' - 'A';	else	    j = s2[i];	if (j != s1[i])	    return 0;    }    return 1;}/** * Insert an element into a generic double linked list. * Declare so it can be used with any kind of double linked list * having next & prev pointers in its first two words. */static voidenqueue (void *head, void *x) {    struct DLL *h = (struct DLL *)head;    struct DLL *xx = (struct DLL *)x;    xx->next = h;    xx->prev = h->prev;    h->prev->next = xx;    h->prev = xx;}/** * Unfold header lines. */static voidsplice_header_lines (struct line *header) {    const char *p;    struct line *x, *y, *next;    for (x = header->next; x != header; x = next) {	next = x->next;	if (isspace (x->text[0])) {	    /* Glue to previous line */	    for (p = x->text; *p; p++)		if (!isspace (*p))		    break;	    p--;	/* Point to final space */	    y = x->prev;	    x->text[-1] = ' ';	    y->next = x->next;	    x->next->prev = y;	    free(x);	}    }    return;}/** * Convert ascii hexadecimal value to integer. * For decoding Quoted Printable. */static inthex_to_val (char x) {    switch (x) {    case '0': case '1': case '2': case '3': case '4':    case '5': case '6': case '7': case '8': case '9':	return (x - '0');	break;    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':	return 10 + (x - 'a');	break;    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':	return 10 + (x - 'A');	break;    default:	return 0;	// -1    }}/** * Table for Base64 decoding. */static char equal_table[] = {    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 00-0f */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 10-1f */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 20-2f */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,  /* 30-3f */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 40-4f */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 50-5f */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 60-6f */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 70-7f */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 80-8f */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 90-9f */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* a0-af */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* b0-bf */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* c0-cf */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* d0-df */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* e0-ef */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0   /* f0-ff */};/** * Table for Base64 decoding. */static int base64_table[] = {    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00-0f */    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10-1f */    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, /* 20-2f */    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1,  0, -1, -1, /* 30-3f */    -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, /* 40-4f */    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 50-5f */    -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 60-6f */    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, /* 70-7f */    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 80-8f */    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 90-9f */    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a0-af */    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* b0-bf */    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* c0-cf */    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* d0-df */    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* e0-ef */    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* f0-ff */};/** * Decode rfc2047 encoded words in headers. * FIXME allows whitespace in encoded words. */static voiddecode_header_value (char *text) {    char *s, *a, *b, *e;    char *p, *q;    int val;    int reg, nc, eq;	/* register, #characters in reg, #equals */    unsigned char cq;    for (p = q = s = text; (s = strstr (s, "=?")); s = e + 2) {	if (p == q)	    p = q = s;	else	    while (q != s)		*p++ = *q++;	s += 2;	a = strchr (s, '?');	if (!a)	    break;	a++;	b = strchr (a, '?');	if (!b)	    break;	b++;	e = strstr (b, "?=");	if (!e)	    break;	// Have found an encoded word	if (b - a != 2)	    continue;	// Unknown encoding	if (*a == 'q' || *a == 'Q') {	    q = b;	    while (q < e) {		if (*q == '_') {		    *p++ = 0x20;		    q++;		} else if (*q == '=') {		    q++;		    val = hex_to_val (*q++) << 4;		    val += hex_to_val (*q++);		    *p++ = val;		} else		    *p++ = *q++;	    }	} else if (*a == 'b' || *a == 'B') {	    eq = reg = nc = 0;	    for (q = b; q < e; q++) {		cq = *(unsigned char *)q;		val = base64_table[cq];		eq += equal_table[cq];		if (val >= 0) {		    reg <<= 6;		    reg += val;		    if (++nc == 4) {			*p++ = ((reg >> 16) & 0xff);			if (eq < 2)			    *p++ = ((reg >> 8) & 0xff);			if (eq < 1)			    *p++ = reg & 0xff;			nc = reg = 0;			if (eq) {			    break;			}		    }		}	    }	} else {	    fprintf (stderr, "Warning: Unknown header word encoding\n");	    continue;	}	q = e + 2;    }    if (p == q)	return;    while (*q != '\0')	*p++ = *q++;    *p = '\0';}/** * Get header line value. */static char *get_header_value (char *text) {    char *p;    for (p = text; *p && *p != ':'; p++)	;    if (!*p)	return NULL;    p++;    decode_header_value (p);    return p;}/** * Get encoding type from string. */static enum encoding_typeget_encoding_type (const char *e) {    enum encoding_type result;    const char *p;    if (!e)	result = ENC_NONE;    else {	for (p = e; *p && isspace (*p); p++)	    ;	if (my_strncasematch ("7bit", p) || my_strncasematch ("7-bit", p))	    result = ENC_7BIT;	else if (my_strncasematch ("8bit", p) || my_strncasematch ("8-bit", p))	    result = ENC_8BIT;	else if (my_strncasematch ("quoted-printable", p))	    result = ENC_QUOTED_PRINTABLE;	else if (my_strncasematch ("base64", p))	    result = ENC_BASE64;	else if (my_strncasematch ("binary", p))	    result = ENC_BINARY;	else {	    fprintf (stderr, "Warning: Unkown encoding type: '%s'\n", e);	    result = ENC_UNKNOWN;	}    }    return result;}static char *copy_string_start_end_unquote (const char *start, const char *end) {    const char *q;    char *result, *p;    result = my_malloc (sizeof(char) * (end - start + 1));    for (p = result, q = start; q < end; q++)	if (*q != '"')	    *p++ = *q;    *p = 0;    return result;}/** * Parse 'Content-Type:' header line. */static voidparse_content_type (const char *hdrline, struct content_type_header *result) {    const char *p, *q, *eq, *semi, *name, *value;    char *s;    result->major = NULL;    result->minor = NULL;    result->boundary = NULL;    result->charset = NULL;    p = hdrline;    while (*p && isspace (*p))	p++;    for (q = p + 1; *q && (*q != '/'); q++)	;    /* assert(*q); */    if (*q) {	result->major = my_malloc (sizeof(char) * (q - p + 1));	for (s = result->major; p<q;)	    *s++ = *p++;	*s = 0;	p = q + 1;	for (q = p + 1; *q && !isspace (*q) && *q != ';'; q++)	    ;	result->minor = my_malloc (sizeof(char) * (q - p + 1));	for (s = result->minor; p < q;)	    *s++ = *p++;	*s = 0;	/* Now try to extract other fields */	/* FIXME: won't work if ';' or '=' occur within quotation marks */	while (*q && *q != ';')	    q++;	semi = q;	while (semi && *semi) {
12 3 下一页
💿 文件大小 472 K
👤 上传用户 huanzhudev
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#增量 #分类算法 #监控
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -