⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utf8.h

📁 广泛使用的邮件服务器!同时
💻 H
📖 第 1 页 / 共 2 页
字号:
/* ======================================================================== * Copyright 1988-2008 University of Washington * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * *  * ======================================================================== *//* * Program:	UTF-8 routines * * Author:	Mark Crispin *		Networks and Distributed Computing *		Computing & Communications *		University of Washington *		Administration Building, AG-44 *		Seattle, WA  98195 *		Internet: MRC@CAC.Washington.EDU * * Date:	11 June 1997 * Last Edited:	17 January 2008 *//* UTF-8 size and conversion routines from UCS-2 values (thus in the BMP). * Don't use these if UTF-16 data (surrogate pairs) are an issue. * For UCS-4 values, use the utf8_size() and utf8_put() functions. */#define UTF8_SIZE_BMP(c) ((c & 0xff80) ? ((c & 0xf800) ? 3 : 2) : 1)#define UTF8_PUT_BMP(b,c) {				\  if (c & 0xff80) {		/* non-ASCII? */	\    if (c & 0xf800) {		/* three byte code */	\      *b++ = 0xe0 | (c >> 12);				\      *b++ = 0x80 | ((c >> 6) & 0x3f);			\    }							\    else *b++ = 0xc0 | ((c >> 6) & 0x3f);		\    *b++ = 0x80 | (c & 0x3f); 				\  }							\  else *b++ = c;					\}/* utf8_text() flag values */#define U8T_CASECANON 2		/* canonicalize case */#define U8T_DECOMPOSE 4		/* decompose */				/* full canonicalization */#define U8T_CANONICAL (U8T_CASECANON | U8T_DECOMPOSE)/* utf8_get() return values */				/* 0x0000 - 0xffff BMP plane */#define U8GM_NONBMP 0xffff0000	/* mask for non-BMP values */				/* 0x10000 - 0x10ffff extended planes */				/* 0x110000 - 0x7ffffff non-Unicode */#define U8G_ERROR 0x80000000	/* error flag */#define U8G_BADCONT U8G_ERROR+1	/* continuation when not in progress */#define U8G_INCMPLT U8G_ERROR+2	/* incomplete UTF-8 character */#define U8G_NOTUTF8 U8G_ERROR+3	/* not a valid UTF-8 octet */#define U8G_ENDSTRG U8G_ERROR+4	/* end of string */#define U8G_ENDSTRI U8G_ERROR+5	/* end of string w/ incomplete UTF-8 char */#define U8G_SURROGA U8G_ERROR+6	/* surrogate codepoint */#define U8G_NOTUNIC U8G_ERROR+7	/* non-Unicode codepoint *//* ucs4_width() return values */#define U4W_ERROR 0x80000000	/* error flags */#define U4W_NOTUNCD U4W_ERROR+1	/* not a Unicode char */#define U4W_PRIVATE U4W_ERROR+2	/* private-space plane */#define U4W_SSPCHAR U4W_ERROR+3	/* Supplementary Special-purpose Plane */#define U4W_UNASSGN U4W_ERROR+4	/* unassigned space plane */#define U4W_CONTROL U4W_ERROR+5	/* C0/C1 control */#define U4W_CTLSRGT U4W_CONTROL	/* in case legacy code references this *//* ISO-2022 engine states */#define I2S_CHAR 0		/* character */#define I2S_ESC 1		/* previous character was ESC */#define I2S_MUL 2		/* previous character was multi-byte code */#define I2S_INT 3		/* previous character was intermediate *//* ISO-2022 Gn selections */#define I2C_G0 0		/* G0 */#define I2C_G1 1		/* G1 */#define I2C_G2 2		/* G2 */#define I2C_G3 3		/* G3 */#define I2C_SG2 (2 << 2)	/* single shift G2 */#define I2C_SG3 (3 << 2)	/* single shift G2 *//* ISO-2022 octet definitions */#define I2C_ESC 0x1b		/* ESCape */	/* Intermediate character */#define I2C_STRUCTURE 0x20	/* announce code structure */#define I2C_C0 0x21		/* C0 */#define I2C_C1 0x22		/* C1 */#define I2C_CONTROL 0x23	/* single control function */#define I2C_MULTI 0x24		/* multi-byte character set */#define I2C_OTHER 0x25		/* other coding system */#define I2C_REVISED 0x26	/* revised registration */#define I2C_G0_94 0x28		/* G0 94-character set */#define I2C_G1_94 0x29		/* G1 94-character set */#define I2C_G2_94 0x2A		/* G2 94-character set */#define I2C_G3_94 0x2B		/* G3 94-character set */#define I2C_G0_96 0x2C		/* (not in ISO-2022) G0 96-character set */#define I2C_G1_96 0x2D		/* G1 96-character set */#define I2C_G2_96 0x2E		/* G2 96-character set */#define I2C_G3_96 0x2F		/* G3 96-character set */	/* Locking shifts */#define I2C_SI 0x0f		/* lock shift to G0 (Shift In) */#define I2C_SO 0x0e		/* lock shift to G1 (Shift Out) */	/* prefixed by ESC */#define I2C_LS2 0x6e		/* lock shift to G2 */#define I2C_LS3 0x6f		/* lock shift to G3 */#define I2C_LS1R 0x7e		/* lock shift GR to G1 */#define I2C_LS2R 0x7d		/* lock shift GR to G2 */#define I2C_LS3R 0x7c		/* lock shift GR to G3 */	/* Single shifts */#define I2C_SS2_ALT 0x8e	/* single shift to G2 (SS2) */#define I2C_SS3_ALT 0x8f	/* single shift to G3 (SS3) */#define I2C_SS2_ALT_7 0x19	/* single shift to G2 (SS2) */#define I2C_SS3_ALT_7 0x1d	/* single shift to G3 (SS3) */	/* prefixed by ESC */#define I2C_SS2 0x4e		/* single shift to G2 (SS2) */#define I2C_SS3 0x4f		/* single shift to G3 (SS3) *//* 94 character sets */				/* 4/0 ISO 646 IRV */#define I2CS_94_BRITISH 0x41	/* 4/1 ISO 646 British */#define I2CS_94_ASCII 0x42	/* 4/2 ISO 646 USA (ASCII) */				/* 4/3 NATS Finland/Sweden (primary) */				/* 4/4 NATS Finland/Sweden (secondary) */				/* 4/5 NATS Denmark/Norway (primary) */				/* 4/6 NATS Denmark/Norway (secondary) */				/* 4/7 ISO 646 Swedish SEN 850200 */				/* 4/8 ISO 646 Swedish names */#define I2CS_94_JIS_BUGROM 0x48	/* 4/8 some buggy software does this */#define I2CS_94_JIS_KANA 0x49	/* 4/9 JIS X 0201-1976 right half */#define I2CS_94_JIS_ROMAN 0x4a	/* 4/a JIS X 0201-1976 left half */				/* 4/b ISO 646 German */				/* 4/c ISO 646 Portuguese (Olivetti) */				/* 4/d ISO 6438 African */				/* 4/e ISO 5427 Cyrillic (Honeywell-Bull) */				/* 4/f DIN 31624 extended bibliography  */				/* 5/0 ISO 5426-1980 Bibliography */				/* 5/1 ISO 5427-1981 Cyrillic*/				/* 5/2 ISO 646 French (withdrawn) */				/* 5/3 ISO 5428-1980 Greek bibliography */				/* 5/4 GB 1988-80 Chinese */				/* 5/5 Latin-Greek (Honeywell-Bull) */				/* 5/6 UK Viewdata/Teletext */				/* 5/7 INIS (IRV subset) */				/* 5/8 ISO 5428 Greek Bibliography */				/* 5/9 ISO 646 Italian (Olivetti) */				/* 5/a ISO 646 Spanish (Olivetti) */				/* 5/b Greek (Olivetti) */				/* 5/c Latin-Greek (Olivetti) */				/* 5/d INIS non-standard extension */				/* 5/e INIS Cyrillic extension */				/* 5/f Arabic CODAR-U IERA */				/* 6/0 ISO 646 Norwegian */				/* 6/1 Norwegian version 2 (withdrawn) */				/* 6/2 Videotex supplementary */				/* 6/3 Videotex supplementary #2 */				/* 6/4 Videotex supplementary #3 */				/* 6/5 APL */				/* 6/6 ISO 646 French */				/* 6/7 ISO 646 Portuguese (IBM) */				/* 6/8 ISO 646 Spanish (IBM) */				/* 6/9 ISO 646 Hungarian */				/* 6/a Greek ELOT (withdrawn) */				/* 6/b ISO 9036 Arabic 7-bit */				/* 6/c ISO 646 IRV supplementary set */				/* 6/d JIS C6229-1984 OCR-A */				/* 6/e JIS C6229-1984 OCR-B */				/* 6/f JIS C6229-1984 OCR-B additional */				/* 7/0 JIS C6229-1984 hand-printed */				/* 7/1 JIS C6229-1984 additional hand-printd */				/* 7/2 JIS C6229-1984 katakana hand-printed */				/* 7/3 E13B Japanese graphic */				/* 7/4 Supplementary Videotex (withdrawn) */				/* 7/5 Teletex primary CCITT T.61 */				/* 7/6 Teletex secondary CCITT T.61 */				/* 7/7 CSA Z 243.4-1985 Alternate primary #1 */				/* 7/8 CSA Z 243.4-1985 Alternate primary #2 */				/* 7/9 Mosaic CCITT T.101 */				/* 7/a Serbocroatian/Slovenian Latin */				/* 7/b Serbocroatian Cyrillic */				/* 7/c Supplementary CCITT T.101 */				/* 7/d Macedonian Cyrillic *//* 94 character sets - second intermediate byte */				/* 4/0 Greek primary CCITT */				/* 4/1 Cuba */				/* 4/2 ISO/IEC 646 invariant */				/* 4/3 Irish Gaelic 7-bit */				/* 4/4 Turkmen *//* 94x94 character sets */#define I2CS_94x94_JIS_OLD 0x40	/* 4/0 JIS X 0208-1978 */#define I2CS_94x94_GB 0x41	/* 4/1 GB 2312 */#define I2CS_94x94_JIS_NEW 0x42	/* 4/2 JIS X 0208-1983 */#define I2CS_94x94_KSC 0x43	/* 4/3 KSC 5601 */#define I2CS_94x94_JIS_EXT 0x44	/* 4/4 JIS X 0212-1990 */				/* 4/5 CCITT Chinese */				/* 4/6 Blisssymbol Graphic */#define I2CS_94x94_CNS1 0x47	/* 4/7 CNS 11643 plane 1 */#define I2CS_94x94_CNS2 0x48	/* 4/8 CNS 11643 plane 2 */#define I2CS_94x94_CNS3 0x49	/* 4/9 CNS 11643 plane 3 */#define I2CS_94x94_CNS4 0x4a	/* 4/a CNS 11643 plane 4 */#define I2CS_94x94_CNS5 0x4b	/* 4/b CNS 11643 plane 5 */#define I2CS_94x94_CNS6 0x4c	/* 4/c CNS 11643 plane 6 */#define I2CS_94x94_CNS7 0x4d	/* 4/d CNS 11643 plane 7 */				/* 4/e DPRK (North Korea) KGCII */				/* 4/f JGCII plane 1 */				/* 5/0 JGCII plane 2 *//* 96 character sets */#define I2CS_96_ISO8859_1 0x41	/* 4/1 Latin-1 (Western Europe) */#define I2CS_96_ISO8859_2 0x42	/* 4/2 Latin-2 (Czech, Slovak) */#define I2CS_96_ISO8859_3 0x43	/* 4/3 Latin-3 (Dutch, Turkish) */#define I2CS_96_ISO8859_4 0x44	/* 4/4 Latin-4 (Scandinavian) */				/* 4/5 CSA Z 243.4-1985 */#define I2CS_96_ISO8859_7 0x46	/* 4/6 Greek */#define I2CS_96_ISO8859_6 0x47	/* 4/7 Arabic */#define I2CS_96_ISO8859_8 0x48	/* 4/8 Hebrew */				/* 4/9 Czechoslovak CSN 369103 */				/* 4/a Supplementary Latin and non-alpha */				/* 4/b Technical */#define I2CS_96_ISO8859_5 0x4c	/* 4/c Cyrillic */#define I2CS_96_ISO8859_9 0x4d	/* 4/d Latin-5 (Finnish, Portuguese) */				/* 4/e ISO 6937-2 residual */				/* 4/f Basic Cyrillic */				/* 5/0 Supplementary Latin 1, 2 and 5 */				/* 5/1 Basic Box */				/* 5/2 Supplementary ISO/IEC 6937 : 1992 */				/* 5/3 CCITT Hebrew supplementary */#define I2CS_96_TIS620 0x54	/* 5/4 TIS 620 */				/* 5/5 Arabic/French/German */#define I2CS_96_ISO8859_10 0x56	/* 5/6 Latin-6 (Northern Europe) */				/* 5/7 ??? */				/* 5/8 Sami (Lappish) supplementary */#define I2CS_96_ISO8859_13 0x59	/* 5/9 Latin-7 (Baltic) */#define I2CS_96_VSCII 0x5a	/* 5/a Vietnamese */				/* 5/b Technical #1 IEC 1289 */#define I2CS_96_ISO8859_14 0x5c	/* 5/c Latin-8 (Celtic) */				/* 5/d Sami supplementary Latin */				/* 5/e Latin/Hebrew */				/* 5/f Celtic supplementary Latin */				/* 6/0 Uralic supplementary Cyrillic */				/* 6/1 Volgaic supplementary Cyrillic */#define I2CS_96_ISO8859_15 0x62	/* 6/2 Latin-9 (Euro) */				/* 6/3 Latin-1 with Euro */				/* 6/4 Latin-4 with Euro */				/* 6/5 Latin-7 with Euro */#define I2CS_96_ISO8859_16 0x66	/* 6/6 Latin-10 (Balkan) */				/* 6/7 Ogham */				/* 6/8 Sami supplementary Latin #2 */				/* 7/d Supplementary Mosaic for CCITT 101 *//* 96x96 character sets *//* Types of character sets */#define I2CS_94 0x000		/* 94 character set */#define I2CS_96 0x100		/* 96 character set */#define I2CS_MUL 0x200		/* multi-byte */#define I2CS_94x94 (I2CS_MUL | I2CS_94)#define I2CS_96x96 (I2CS_MUL | I2CS_96)/* Character set identifiers stored in Gn */#define I2CS_BRITISH (I2CS_94 | I2CS_94_BRITISH)#define I2CS_ASCII (I2CS_94 | I2CS_94_ASCII)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -