📄 rfc822.c

📁 使用具有增量学习的监控式学习方法。包括几个不同的分类算法。
💻 C
📖 第 1 页 / 共 3 页
字号:
	    name = semi + 1;	    while (*name && isspace (*name))		name++;	    if (!*name)		break;	    for (eq = name + 1; *eq && (*eq != '='); eq++)		;	    if (!*eq)		break;	    value = eq + 1;	    if (!*value)		break;	    for (semi = value + 1; *semi && !isspace (*semi) && (*semi != ';');		 semi++)		;	    // FIXME Copying really needed? No!	    if (my_strncasematch ("boundary", name))		result->boundary = copy_string_start_end_unquote (value, semi);	    else if (my_strncasematch ("charset", name))		result->charset = copy_string_start_end_unquote (value, semi);	    semi = strchr (semi, ';'); /* in case value string was ended by					  whitespace */	}    } else {	/* If we can't find the '/', just take the first word */	for (q = p + 1; *q && (*q != '/') && (*q != ' '); q++)	    ;	result->major = my_malloc (sizeof(char) * (q - p + 1));	for (s = result->major; p < q;)	    *s++ = *p++;	*s = 0;	/* Assume text will be plain */	if (my_strncasematch ("text", result->major))	    result->minor = my_strdup ("plain");	else	    result->minor = my_strdup ("\0");    }}/** * Check if there is no more text on line. */static const char *looking_at_ws_then_newline (const char *start) {    for (; *start != '\n'; start++)	if (!isspace (*start))	    return NULL;    return start;}/** * Decode data. */static intdecode_data (char *input, int input_len, const char *enc) {    enum encoding_type encoding;    const char *end_result, *end_input;    encoding = get_encoding_type (enc);    end_input = input + input_len;    /* All mime encodings result in expanded data, so this is guaranteed to       safely oversize the output array */    end_result = input;    /* Now decode */    switch (encoding) {    case ENC_UNKNOWN:	fprintf (stderr, "Warning: Unknown content encoding\n");	//break;    case ENC_7BIT:    case ENC_8BIT:    case ENC_BINARY:    case ENC_NONE:	end_result = input + input_len;	break;    case ENC_QUOTED_PRINTABLE: {	const char *q;	char *p;	for (p = input, q = input; q < end_input;)	    if (*q == '=') {		/* followed by optional whitespace then \n? discard them */		const char *r;		int val;		q++;		r = looking_at_ws_then_newline (q);		if (r) {		    q = r + 1; /* Point into next line */		    continue;		}		/* not that case */		val = hex_to_val (*q++) << 4;		val += hex_to_val (*q++);		*p++ = val;	    } else /* Normal character */		*p++ = *q++;	end_result = p;	break;    }    case ENC_BASE64: {	const char *q;	char *p;	unsigned char cq;	int reg, nc, eq;	/* register, #characters in reg, #equals */	int dc;			/* decoded character */	eq = reg = nc = 0;	for (q = input, p = input; q < end_input; q++) {	    cq = *(unsigned char *)q;	    dc = base64_table[cq];	    eq += equal_table[cq];	    if (dc >= 0) {		reg <<= 6;		reg += dc;		nc++;		if (nc == 4) {		    *p++ = ((reg >> 16) & 0xff);		    if (eq < 2)			*p++ = ((reg >> 8) & 0xff);		    if (eq < 1)			*p++ = reg & 0xff;		    nc = reg = 0;		    if (eq)			goto done_base_64;		}	    }	}    done_base_64:	end_result = p;	break;    }    }    //input[end_result - input] = '\0';    return end_result - input;}/** * Check for obvious broken-ness. * 1st line has no leading spaces, single word then colon * following lines have leading spaces or single word followed by colon. */static intaudit_header (struct line *header) {    char *p;    int first = 1;    int is_blank;    int has_leading_space = 0;    int has_word_colon = 0;    int saw_char;    struct line *x;    for (x = header->next; x != header; x = x->next) {	is_blank = !x->text[0];	if (!is_blank) {	    saw_char = 0;	    has_leading_space = isspace (x->text[0]);	    has_word_colon = 0; /* default */	    for (p = x->text; *p; p++)		if (*p == ':') {		    has_word_colon = saw_char;		    break;		} else if (isspace (*p)) {		    has_word_colon = 0;		    break;		} else		    saw_char = 1;	}	if ((first && (is_blank || has_leading_space || !has_word_colon)) ||	    (!first && (is_blank || !(has_leading_space || has_word_colon))))	    /* Header fails the audit */	    return -1;	first = 0;    }    /* If we get here the header must have been OK */    return 0;}/** * Parse header lines. */static intsplit_and_splice_header (char *data, struct line *header, char **body_start) {    char *sol, *eol;    int blank_line;    struct line *new_header;    header->next = header->prev = header;    sol = data;    blank_line = 0;    while (!blank_line) {	if (!*sol)	    break;	blank_line = 1; /* until proven otherwise */	eol = sol;	while (*eol && *eol != '\n') {	    if (!isspace (*eol))		blank_line = 0;	    eol++;	}	if (*eol == '\n') {	    if (!blank_line) {		*eol = '\0';		new_header = my_malloc (sizeof(struct line));		new_header->text = sol;		enqueue (header, new_header);	    }	    sol = eol + 1; /* Start of next line */	} else { /* must be null char */	    fprintf (stderr, "Warning: Got null character whilst processing "		     "header\n");	    return -1;	}    }    *body_start = sol;    if (audit_header (header)) {	fprintf (stderr, "Warning: Message had bad rfc822 headers\n");	return -1;    }    splice_header_lines (header);    return 0;}/* Forward prototypes */static struct rfc822 *rfc822_create (char *data, int length, int new_msg);static voiddo_multipart (char *input, int input_len, const char *boundary,	      struct attachment *atts);/** * Parse body of message. */static voiddo_body (char *body_start, int body_len, const char *content_type,	 const char *content_transfer_encoding, struct attachment *atts) {    int decoded_body_len;    struct content_type_header ct;    struct attachment *new_att;    if (content_type)	parse_content_type (content_type, &ct);    if (content_type && my_strcasematch ("multipart", ct.major))	decoded_body_len = decode_data (body_start, body_len, "8bit");    else	decoded_body_len = decode_data (body_start, body_len,					content_transfer_encoding);    if (content_type) {	if (my_strcasematch ("multipart", ct.major))	    do_multipart (body_start, decoded_body_len, ct.boundary, atts);	else {	/* unipart */	    new_att = my_malloc (sizeof(struct attachment));	    if (my_strcasematch ("text", ct.major)) {		if (my_strcasematch ("plain", ct.minor))		    new_att->ct = CT_TEXT_PLAIN;		else if (my_strcasematch ("html", ct.minor))		    new_att->ct = CT_TEXT_HTML;		else		    new_att->ct = CT_TEXT_OTHER;	    } else if (my_strcasematch ("message", ct.major) &&		       my_strcasematch ("rfc822", ct.minor))		new_att->ct = CT_MESSAGE_RFC822;	    else		new_att->ct = CT_OTHER;	    if (new_att->ct == CT_MESSAGE_RFC822) {		new_att->data.rfc822 = rfc822_create (body_start,						      decoded_body_len, 0);	    } else {		new_att->data.normal.len = decoded_body_len;		new_att->data.normal.bytes = body_start;		new_att->data.normal.charset = ct.charset;		ct.charset = NULL;	    }	    enqueue (atts, new_att);	}	free (ct.major);	free (ct.minor);	if (ct.boundary)	    free (ct.boundary);	if (ct.charset)	  free (ct.charset);    } else {	/* Treat as text/plain */	new_att = my_malloc (sizeof(struct attachment));	new_att->ct = CT_TEXT_PLAIN;	new_att->data.normal.bytes = body_start;	new_att->data.normal.len = decoded_body_len;	new_att->data.normal.charset = NULL;	/* Add null termination on the end? */	enqueue (atts, new_att);    }}/** * Decode attachment and add to attachment list. */static voiddo_attachment (char *start, const char *after_end, struct attachment *atts) {    const char *content_type, *content_transfer_encoding;    char *body_start;    int body_len;    struct line header, *x, *nx;    if (split_and_splice_header (start, &header, &body_start) < 0) {	fprintf (stderr, "Warning: Giving up on attachment with bad header\n");	return;    }    /* Extract key headers */    content_type = NULL;    content_transfer_encoding = NULL;    for (x = header.next; x != &header; x = x->next)	if (my_strncasematch ("content-type", x->text) && !content_type)	    content_type = get_header_value (x->text);	else if (my_strncasematch ("content-transfer-encoding", x->text) &&		 !content_transfer_encoding)	    content_transfer_encoding = get_header_value (x->text);    if (body_start > after_end)	/* This is a (maliciously?) b0rken attachment, e.g. maybe empty */	fprintf (stderr, "Warning: This message contains an invalid "		 "attachment, length=%d bytes\n", after_end - start);    else {	body_len = after_end - body_start;	do_body (body_start, body_len, content_type, content_transfer_encoding,		 atts);    }    /* Free header memory */    for (x = header.next; x != &header; x = nx) {	nx = x->next;	free (x);    }}/** * Process multipart message. */static voiddo_multipart (char *input, int input_len, const char *boundary,	      struct attachment *atts) {    char *normal_boundary, *end_boundary;    char *b0, *b1, *be;    char *line_after_b0;    const char *start_b1_search_from;    int boundary_len;    int looking_at_end_boundary;    if (!boundary) {	fprintf (stderr, "Warning: Can't process multipart message with no "		 "boundary string\n");	return;    }    boundary_len = strlen (boundary);    normal_boundary = my_malloc (sizeof(char) * (boundary_len + 3));    end_boundary = my_malloc (sizeof(char) * (boundary_len + 5));    strcpy (normal_boundary, "--");    strcat (normal_boundary, boundary);    strcpy (end_boundary, "--");    strcat (end_boundary, boundary);    strcat (end_boundary, "--");    b0 = NULL;    /* Scan input to look for boundary markers */    be = strstr (input, end_boundary);    if (!be) {	fprintf (stderr, "Warning: Multipart message without end boundary\n");	goto cleanup;	//be = input + input_len;    }    line_after_b0 = input;    do {	int boundary_ok;	start_b1_search_from = line_after_b0;	do {	    /* reject boundaries that aren't a whole line */	    b1 = strstr (start_b1_search_from, normal_boundary);	    if (!b1) {		fprintf (stderr, "Warning: Didn't find normal boundary\n");		goto cleanup;	    }	    looking_at_end_boundary = (b1 == be);	    boundary_ok = 1;	    if ((b1 > input) && (*(b1 - 1) != '\n'))		boundary_ok = 0;	    if (!looking_at_end_boundary && (*(b1 + boundary_len + 2) != '\n'))		boundary_ok = 0;	    if (!boundary_ok)		start_b1_search_from = 1 + strchr (b1, '\n');	} while (!boundary_ok);	/* b1 is now looking at a good boundary,	   which might be the final one */	if (b0)	    /* don't treat preamble as an attachment */	    do_attachment (line_after_b0, b1, atts);	b0 = b1;	line_after_b0 = strchr (b0, '\n') + 1;    } while (b1 != be); cleanup:    free (normal_boundary);    free (end_boundary);}/** * Parse date header. * * Format: [weekday,] day-of-month month year hour:minute[:second] [timezone] * * Some of the ideas, sanity checks etc taken from parse.c in the mutt * sources, credit to Michael R. Elkins et al. */static time_tparse_rfc822_date (const char *date_string) {    struct tm tm = { .tm_isdst = -1 };    const char *s, *z;    int i, j;    /* Ignore day name */    s = date_string;    z = strchr (s, ',');    if (z)	s = z + 1;    while (*s && isspace (*s))	s++;    /* Day of month */    if (!isdigit (*s))	goto error;    tm.tm_mday = atoi (s);    if (tm.tm_mday > 31)	goto error;    while (isdigit (*s))	s++;    while (*s && isspace (*s))	s++;    if (!*s)	goto error;    /* Month */    if (my_strncasematch ("jan", s))	tm.tm_mon = 0;    else if (my_strncasematch ("feb", s))
💿 文件大小 472 K
👤 上传用户 huanzhudev
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#增量 #分类算法 #监控
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -