📄 rfc2047.c
字号:
/* * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org> * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #if HAVE_CONFIG_H# include "config.h"#endif#include "mutt.h"#include "mime.h"#include "charset.h"#include "rfc2047.h"#include <ctype.h>#include <errno.h>#include <stdio.h>#include <stdlib.h>#include <string.h>/* If you are debugging this file, comment out the following line. *//*#define NDEBUG*/#ifdef NDEBUG#define assert(x)#else#include <assert.h>#endif#define ENCWORD_LEN_MAX 75#define ENCWORD_LEN_MIN 9 /* strlen ("=?.?.?.?=") */#define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')#define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)extern char RFC822Specials[];typedef size_t (*encoder_t) (char *, ICONV_CONST char *, size_t, const char *);static size_t convert_string (ICONV_CONST char *f, size_t flen, const char *from, const char *to, char **t, size_t *tlen){ iconv_t cd; char *buf, *ob; size_t obl, n; int e; cd = mutt_iconv_open (to, from, 0); if (cd == (iconv_t)(-1)) return (size_t)(-1); obl = 4 * flen + 1; ob = buf = safe_malloc (obl); n = iconv (cd, &f, &flen, &ob, &obl); if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1)) { e = errno; FREE (&buf); iconv_close (cd); errno = e; return (size_t)(-1); } *ob = '\0'; *tlen = ob - buf; safe_realloc (&buf, ob - buf + 1); *t = buf; iconv_close (cd); return n;}char *mutt_choose_charset (const char *fromcode, const char *charsets, char *u, size_t ulen, char **d, size_t *dlen){ char canonical_buff[LONG_STRING]; char *e = 0, *tocode = 0; size_t elen = 0, bestn = 0; const char *p, *q; for (p = charsets; p; p = q ? q + 1 : 0) { char *s, *t; size_t slen, n; q = strchr (p, ':'); n = q ? q - p : strlen (p); if (!n || /* Assume that we never need more than 12 characters of encoded-text to encode a single character. */ n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12)) continue; t = safe_malloc (n + 1); memcpy (t, p, n); t[n] = '\0'; n = convert_string (u, ulen, fromcode, t, &s, &slen); if (n == (size_t)(-1)) continue; if (!tocode || n < bestn) { bestn = n; FREE (&tocode); tocode = t; if (d) { FREE (&e); e = s; } else FREE (&s); elen = slen; if (!bestn) break; } else { FREE (&t); FREE (&s); } } if (tocode) { if (d) *d = e; if (dlen) *dlen = elen; mutt_canonical_charset (canonical_buff, sizeof (canonical_buff), tocode); mutt_str_replace (&tocode, canonical_buff); } return tocode;}static size_t b_encoder (char *s, ICONV_CONST char *d, size_t dlen, const char *tocode){ char *s0 = s; memcpy (s, "=?", 2), s += 2; memcpy (s, tocode, strlen (tocode)), s += strlen (tocode); memcpy (s, "?B?", 3), s += 3; for (;;) { if (!dlen) break; else if (dlen == 1) { *s++ = B64Chars[(*d >> 2) & 0x3f]; *s++ = B64Chars[(*d & 0x03) << 4]; *s++ = '='; *s++ = '='; break; } else if (dlen == 2) { *s++ = B64Chars[(*d >> 2) & 0x3f]; *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)]; *s++ = B64Chars[(d[1] & 0x0f) << 2]; *s++ = '='; break; } else { *s++ = B64Chars[(*d >> 2) & 0x3f]; *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)]; *s++ = B64Chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)]; *s++ = B64Chars[d[2] & 0x3f]; d += 3, dlen -= 3; } } memcpy (s, "?=", 2), s += 2; return s - s0;}static size_t q_encoder (char *s, ICONV_CONST char *d, size_t dlen, const char *tocode){ char hex[] = "0123456789ABCDEF"; char *s0 = s; memcpy (s, "=?", 2), s += 2; memcpy (s, tocode, strlen (tocode)), s += strlen (tocode); memcpy (s, "?Q?", 3), s += 3; while (dlen--) { unsigned char c = *d++; if (c == ' ') *s++ = '_'; else if (c >= 0x7f || c < 0x20 || c == '_' || strchr (MimeSpecials, c)) { *s++ = '='; *s++ = hex[(c & 0xf0) >> 4]; *s++ = hex[c & 0x0f]; } else *s++ = c; } memcpy (s, "?=", 2), s += 2; return s - s0;}/* * Return 0 if and set *encoder and *wlen if the data (d, dlen) could * be converted to an encoded word of length *wlen using *encoder. * Otherwise return an upper bound on the maximum length of the data * which could be converted. * The data is converted from fromcode (which must be stateless) to * tocode, unless fromcode is 0, in which case the data is assumed to * be already in tocode, which should be 8-bit and stateless. */static size_t try_block (ICONV_CONST char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen){ char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1]; iconv_t cd; ICONV_CONST char *ib; char *ob, *p; size_t ibl, obl; int count, len, len_b, len_q; if (fromcode) { cd = mutt_iconv_open (tocode, fromcode, 0); assert (cd != (iconv_t)(-1)); ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode); if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1)) { assert (errno == E2BIG); iconv_close (cd); assert (ib > d); return (ib - d == dlen) ? dlen : ib - d + 1; } iconv_close (cd); } else { if (dlen > sizeof (buf1) - strlen (tocode)) return sizeof (buf1) - strlen (tocode) + 1; memcpy (buf1, d, dlen); ob = buf1 + dlen; } count = 0; for (p = buf1; p < ob; p++) { unsigned char c = *p; assert (strchr (MimeSpecials, '?')); if (c >= 0x7f || c < 0x20 || *p == '_' || (c != ' ' && strchr (MimeSpecials, *p))) ++count; } len = ENCWORD_LEN_MIN - 2 + strlen (tocode); len_b = len + (((ob - buf1) + 2) / 3) * 4; len_q = len + (ob - buf1) + 2 * count; /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */ if (!ascii_strcasecmp (tocode, "ISO-2022-JP")) len_q = ENCWORD_LEN_MAX + 1; if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) { *encoder = b_encoder; *wlen = len_b; return 0; } else if (len_q <= ENCWORD_LEN_MAX) { *encoder = q_encoder; *wlen = len_q; return 0; } else return dlen;}/* * Encode the data (d, dlen) into s using the encoder. * Return the length of the encoded word. */static size_t encode_block (char *s, char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t encoder){ char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1]; iconv_t cd; ICONV_CONST char *ib; char *ob; size_t ibl, obl, n1, n2; if (fromcode) { cd = mutt_iconv_open (tocode, fromcode, 0); assert (cd != (iconv_t)(-1)); ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode); n1 = iconv (cd, &ib, &ibl, &ob, &obl); n2 = iconv (cd, 0, 0, &ob, &obl); assert (n1 != (size_t)(-1) && n2 != (size_t)(-1)); iconv_close (cd); return (*encoder) (s, buf1, ob - buf1, tocode); } else return (*encoder) (s, d, dlen, tocode);}/* * Discover how much of the data (d, dlen) can be converted into * a single encoded word. Return how much data can be converted, * and set the length *wlen of the encoded word and *encoder. * We start in column col, which limits the length of the word. */static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen){ size_t n, nn; int utf8 = fromcode && !ascii_strcasecmp (fromcode, "UTF-8"); n = dlen; for (;;) { assert (d + n > d); nn = try_block (d, n, fromcode, tocode, encoder, wlen); if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1)) break; n = (nn ? nn : n) - 1; assert (n > 0); if (utf8) while (n > 1 && CONTINUATION_BYTE(d[n])) --n; } return n;}/* * Place the result of RFC-2047-encoding (d, dlen) into the dynamically * allocated buffer (e, elen). The input data is in charset fromcode * and is converted into a charset chosen from charsets. * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8 * failed, otherwise 0. If conversion failed, fromcode is assumed to be * compatible with us-ascii and the original data is used. * The input data is assumed to be a single line starting at column col; * if col is non-zero, the preceding character was a space. */static int rfc2047_encode (ICONV_CONST char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen, char *specials){ int ret = 0; char *buf; size_t bufpos, buflen; char *u, *t0, *t1, *t; char *s0, *s1; size_t ulen, r, n, wlen; encoder_t encoder; char *tocode1 = 0; const char *tocode; char *icode = "UTF-8"; /* Try to convert to UTF-8. */ if (convert_string (d, dlen, fromcode, icode, &u, &ulen)) { ret = 1; icode = 0; u = safe_malloc ((ulen = dlen) + 1); memcpy (u, d, dlen); u[ulen] = 0; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -