📄 rfc2192.txt
字号:
<ftp://ds.internic.net/rfc/rfc1808.txt> [UTF8] Yergeau, F. "UTF-8, a transformation format of Unicode and ISO 10646", RFC 2044, Alis Technologies, October 1996. <ftp://ds.internic.net/rfc/rfc2044.txt>14. Author's Address Chris Newman Innosoft International, Inc. 1050 Lakes Drive West Covina, CA 91790 USA EMail: chris.newman@innosoft.comNewman Standards Track [Page 11]RFC 2192 IMAP URL Scheme September 1997Appendix A. Sample codeHere is sample C source code to convert between URL paths and IMAPmailbox names, taking into account mapping between IMAP's modified UTF-7[IMAP4] and hex-encoded UTF-8 which is more appropriate for URLs. Thiscode has not been rigorously tested nor does it necessarily behavereasonably with invalid input, but it should serve as a useful example.This code just converts the mailbox portion of the URL and does not dealwith parameters, query or server components of the URL.#include <stdio.h>#include <string.h>/* hexadecimal lookup table */static char hex[] = "0123456789ABCDEF";/* URL unsafe printable characters */static char urlunsafe[] = " \"#%&+:;<=>?@[\\]^`{|}";/* UTF7 modified base64 alphabet */static char base64chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";#define UNDEFINED 64/* UTF16 definitions */#define UTF16MASK 0x03FFUL#define UTF16SHIFT 10#define UTF16BASE 0x10000UL#define UTF16HIGHSTART 0xD800UL#define UTF16HIGHEND 0xDBFFUL#define UTF16LOSTART 0xDC00UL#define UTF16LOEND 0xDFFFUL/* Convert an IMAP mailbox to a URL path * dst needs to have roughly 4 times the storage space of src * Hex encoding can triple the size of the input * UTF-7 can be slightly denser than UTF-8 * (worst case: 8 octets UTF-7 becomes 9 octets UTF-8) */void MailboxToURL(char *dst, char *src){ unsigned char c, i, bitcount; unsigned long ucs4, utf16, bitbuf; unsigned char base64[256], utf8[6];Newman Standards Track [Page 12]RFC 2192 IMAP URL Scheme September 1997 /* initialize modified base64 decoding table */ memset(base64, UNDEFINED, sizeof (base64)); for (i = 0; i < sizeof (base64chars); ++i) { base64[base64chars[i]] = i; } /* loop until end of string */ while (*src != '\0') { c = *src++; /* deal with literal characters and &- */ if (c != '&' || *src == '-') { if (c < ' ' || c > '~' || strchr(urlunsafe, c) != NULL) { /* hex encode if necessary */ dst[0] = '%'; dst[1] = hex[c >> 4]; dst[2] = hex[c & 0x0f]; dst += 3; } else { /* encode literally */ *dst++ = c; } /* skip over the '-' if this is an &- sequence */ if (c == '&') ++src; } else { /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */ bitbuf = 0; bitcount = 0; ucs4 = 0; while ((c = base64[(unsigned char) *src]) != UNDEFINED) { ++src; bitbuf = (bitbuf << 6) | c; bitcount += 6; /* enough bits for a UTF-16 character? */ if (bitcount >= 16) { bitcount -= 16; utf16 = (bitcount ? bitbuf >> bitcount : bitbuf) & 0xffff; /* convert UTF16 to UCS4 */ if (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND) { ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT; continue; } else if (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND) { ucs4 += utf16 - UTF16LOSTART + UTF16BASE; } else { ucs4 = utf16; }Newman Standards Track [Page 13]RFC 2192 IMAP URL Scheme September 1997 /* convert UTF-16 range of UCS4 to UTF-8 */ if (ucs4 <= 0x7fUL) { utf8[0] = ucs4; i = 1; } else if (ucs4 <= 0x7ffUL) { utf8[0] = 0xc0 | (ucs4 >> 6); utf8[1] = 0x80 | (ucs4 & 0x3f); i = 2; } else if (ucs4 <= 0xffffUL) { utf8[0] = 0xe0 | (ucs4 >> 12); utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f); utf8[2] = 0x80 | (ucs4 & 0x3f); i = 3; } else { utf8[0] = 0xf0 | (ucs4 >> 18); utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f); utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f); utf8[3] = 0x80 | (ucs4 & 0x3f); i = 4; } /* convert utf8 to hex */ for (c = 0; c < i; ++c) { dst[0] = '%'; dst[1] = hex[utf8[c] >> 4]; dst[2] = hex[utf8[c] & 0x0f]; dst += 3; } } } /* skip over trailing '-' in modified UTF-7 encoding */ if (*src == '-') ++src; } } /* terminate destination string */ *dst = '\0';}/* Convert hex coded UTF-8 URL path to modified UTF-7 IMAP mailbox * dst should be about twice the length of src to deal with non-hex * coded URLs */void URLtoMailbox(char *dst, char *src){ unsigned int utf8pos, utf8total, i, c, utf7mode, bitstogo, utf16flag; unsigned long ucs4, bitbuf; unsigned char hextab[256]; /* initialize hex lookup table */Newman Standards Track [Page 14]RFC 2192 IMAP URL Scheme September 1997 memset(hextab, 0, sizeof (hextab)); for (i = 0; i < sizeof (hex); ++i) { hextab[hex[i]] = i; if (isupper(hex[i])) hextab[tolower(hex[i])] = i; } utf7mode = 0; utf8total = 0; bitstogo = 0; while ((c = *src) != '\0') { ++src; /* undo hex-encoding */ if (c == '%' && src[0] != '\0' && src[1] != '\0') { c = (hextab[src[0]] << 4) | hextab[src[1]]; src += 2; } /* normal character? */ if (c >= ' ' && c <= '~') { /* switch out of UTF-7 mode */ if (utf7mode) { if (bitstogo) { *dst++ = base64chars[(bitbuf << (6 - bitstogo)) & 0x3F]; } *dst++ = '-'; utf7mode = 0; } *dst++ = c; /* encode '&' as '&-' */ if (c == '&') { *dst++ = '-'; } continue; } /* switch to UTF-7 mode */ if (!utf7mode) { *dst++ = '&'; utf7mode = 1; } /* Encode US-ASCII characters as themselves */ if (c < 0x80) { ucs4 = c; utf8total = 1; } else if (utf8total) { /* save UTF8 bits into UCS4 */ ucs4 = (ucs4 << 6) | (c & 0x3FUL); if (++utf8pos < utf8total) { continue; }Newman Standards Track [Page 15]RFC 2192 IMAP URL Scheme September 1997 } else { utf8pos = 1; if (c < 0xE0) { utf8total = 2; ucs4 = c & 0x1F; } else if (c < 0xF0) { utf8total = 3; ucs4 = c & 0x0F; } else { /* NOTE: can't convert UTF8 sequences longer than 4 */ utf8total = 4; ucs4 = c & 0x03; } continue; } /* loop to split ucs4 into two utf16 chars if necessary */ utf8total = 0; do { if (ucs4 >= UTF16BASE) { ucs4 -= UTF16BASE; bitbuf = (bitbuf << 16) | ((ucs4 >> UTF16SHIFT) + UTF16HIGHSTART); ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART; utf16flag = 1; } else { bitbuf = (bitbuf << 16) | ucs4; utf16flag = 0; } bitstogo += 16; /* spew out base64 */ while (bitstogo >= 6) { bitstogo -= 6; *dst++ = base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F]; } } while (utf16flag); } /* if in UTF-7 mode, finish in ASCII */ if (utf7mode) { if (bitstogo) { *dst++ = base64chars[(bitbuf << (6 - bitstogo)) & 0x3F]; } *dst++ = '-'; } /* tie off string */ *dst = '\0';}Newman Standards Track [Page 16]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -