⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utf8.c

📁 广泛使用的邮件服务器!同时
💻 C
📖 第 1 页 / 共 5 页
字号:
/* ======================================================================== * Copyright 1988-2008 University of Washington * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * *  * ======================================================================== *//* * Program:	UTF-8 routines * * Author:	Mark Crispin *		Networks and Distributed Computing *		Computing & Communications *		University of Washington *		Administration Building, AG-44 *		Seattle, WA  98195 *		Internet: MRC@CAC.Washington.EDU * * Date:	11 June 1997 * Last Edited:	17 January 2008 */#include <stdio.h>#include <ctype.h>#include "c-client.h"/*	*** IMPORTANT *** * *  There is a very important difference between "character set" and "charset", * and the comments in this file reflect these differences.  A "character set" * (also known as "coded character set") is a mapping between codepoints and * characters.  A "charset" is as defined in MIME, and incorporates one or more * coded character sets in a character encoding scheme.  See RFC 2130 for more * details. *//* Character set conversion tables */#include "iso_8859.c"		/* 8-bit single-byte coded graphic */#include "koi8_r.c"		/* Cyrillic - Russia */#include "koi8_u.c"		/* Cyrillic - Ukraine */#include "tis_620.c"		/* Thai */#include "viscii.c"		/* Vietnamese */#include "windows.c"		/* Windows */#include "ibm.c"		/* IBM */#include "gb_2312.c"		/* Chinese (PRC) - simplified */#include "gb_12345.c"		/* Chinese (PRC) - traditional */#include "jis_0208.c"		/* Japanese - basic */#include "jis_0212.c"		/* Japanese - supplementary */#include "ksc_5601.c"		/* Korean */#include "big5.c"		/* Taiwanese (ROC) - industrial standard */#include "cns11643.c"		/* Taiwanese (ROC) - national standard */#include "widths.c"		/* Unicode character widths */#include "tmap.c"		/* Unicode titlecase mapping */#include "decomtab.c"		/* Unicode decomposions *//* EUC parameters */#ifdef GBTOUNICODE		/* PRC simplified Chinese */static const struct utf8_eucparam gb_param = {  BASE_GB2312_KU,BASE_GB2312_TEN,MAX_GB2312_KU,MAX_GB2312_TEN,  (void *) gb2312tab};#endif#ifdef GB12345TOUNICODE		/* PRC traditional Chinese */static const struct utf8_eucparam gbt_param = {  BASE_GB12345_KU,BASE_GB12345_TEN,MAX_GB12345_KU,MAX_GB12345_TEN,  (void *) gb12345tab};#endif#ifdef BIG5TOUNICODE		/* ROC traditional Chinese */static const struct utf8_eucparam big5_param[] = {  {BASE_BIG5_KU,BASE_BIG5_TEN_0,MAX_BIG5_KU,MAX_BIG5_TEN_0,(void *) big5tab},  {BASE_BIG5_KU,BASE_BIG5_TEN_1,MAX_BIG5_KU,MAX_BIG5_TEN_1,NIL}};#endif#ifdef JISTOUNICODE		/* Japanese */static const struct utf8_eucparam jis_param[] = {  {BASE_JIS0208_KU,BASE_JIS0208_TEN,MAX_JIS0208_KU,MAX_JIS0208_TEN,     (void *) jis0208tab},  {MIN_KANA_8,0,MAX_KANA_8,0,(void *) KANA_8},#ifdef JIS0212TOUNICODE		/* Japanese extended */  {BASE_JIS0212_KU,BASE_JIS0212_TEN,MAX_JIS0212_KU,MAX_JIS0212_TEN,     (void *) jis0212tab}#else  {0,0,0,0,NIL}#endif};#endif#ifdef KSCTOUNICODE		/* Korean */static const struct utf8_eucparam ksc_param = {  BASE_KSC5601_KU,BASE_KSC5601_TEN,MAX_KSC5601_KU,MAX_KSC5601_TEN,  (void *) ksc5601tab};#endif/* List of supported charsets */static const CHARSET utf8_csvalid[] = {  {"US-ASCII",CT_ASCII,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   NIL,NIL,NIL},  {"UTF-8",CT_UTF8,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   NIL,SC_UNICODE,NIL},  {"UTF-7",CT_UTF7,CF_PRIMARY | CF_POSTING | CF_UNSUPRT,   NIL,SC_UNICODE,"UTF-8"},  {"ISO-8859-1",CT_1BYTE0,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   NIL,SC_LATIN_1,NIL},  {"ISO-8859-2",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_2tab,SC_LATIN_2,NIL},  {"ISO-8859-3",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_3tab,SC_LATIN_3,NIL},  {"ISO-8859-4",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_4tab,SC_LATIN_4,NIL},  {"ISO-8859-5",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_5tab,SC_CYRILLIC,"KOI8-R"},  {"ISO-8859-6",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_6tab,SC_ARABIC,NIL},  {"ISO-8859-7",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_7tab,SC_GREEK,NIL},  {"ISO-8859-8",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_8tab,SC_HEBREW,NIL},  {"ISO-8859-9",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_9tab,SC_LATIN_5,NIL},  {"ISO-8859-10",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_10tab,SC_LATIN_6,NIL},  {"ISO-8859-11",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_11tab,SC_THAI,NIL},#if 0				/* ISO 8859-12 reserved for ISCII(?) */  {"ISO-8859-12",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_12tab,NIL,NIL},#endif  {"ISO-8859-13",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_13tab,SC_LATIN_7,NIL},  {"ISO-8859-14",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_14tab,SC_LATIN_8,NIL},  {"ISO-8859-15",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_15tab,SC_LATIN_9,NIL},  {"ISO-8859-16",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) iso8859_16tab,SC_LATIN_10,NIL},  {"KOI8-R",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) koi8rtab,SC_CYRILLIC,NIL},  {"KOI8-U",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) koi8utab,SC_CYRILLIC | SC_UKRANIAN,NIL},  {"KOI8-RU",CT_1BYTE,CF_DISPLAY,   (void *) koi8utab,SC_CYRILLIC | SC_UKRANIAN,"KOI8-U"},  {"TIS-620",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) tis620tab,SC_THAI,"ISO-8859-11"},  {"VISCII",CT_1BYTE8,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) visciitab,SC_VIETNAMESE,NIL},#ifdef GBTOUNICODE  {"GBK",CT_DBYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,     (void *) &gb_param,SC_CHINESE_SIMPLIFIED,NIL},  {"GB2312",CT_DBYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,   (void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"},  {"CN-GB",CT_DBYTE,CF_DISPLAY,     (void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"},#ifdef CNS1TOUNICODE  {"ISO-2022-CN",CT_2022,CF_PRIMARY | CF_UNSUPRT,     NIL,SC_CHINESE_SIMPLIFIED | SC_CHINESE_TRADITIONAL,   NIL},#endif#endif#ifdef GB12345TOUNICODE  {"CN-GB-12345",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,     (void *) &gbt_param,SC_CHINESE_TRADITIONAL,"BIG5"},#endif#ifdef BIG5TOUNICODE  {"BIG5",CT_DBYTE2,CF_PRIMARY | CF_DISPLAY | CF_POSTING,     (void *) big5_param,SC_CHINESE_TRADITIONAL,NIL},  {"CN-BIG5",CT_DBYTE2,CF_DISPLAY,     (void *) big5_param,SC_CHINESE_TRADITIONAL,"BIG5"},  {"BIG-5",CT_DBYTE2,CF_DISPLAY,     (void *) big5_param,SC_CHINESE_TRADITIONAL,"BIG5"},#endif#ifdef JISTOUNICODE  {"ISO-2022-JP",CT_2022,CF_PRIMARY | CF_DISPLAY | CF_POSTING,     NIL,SC_JAPANESE,NIL},  {"EUC-JP",CT_EUC,CF_PRIMARY | CF_DISPLAY,     (void *) jis_param,SC_JAPANESE,"ISO-2022-JP"},  {"SHIFT_JIS",CT_SJIS,CF_PRIMARY | CF_DISPLAY,     NIL,SC_JAPANESE,"ISO-2022-JP"},  {"SHIFT-JIS",CT_SJIS,CF_PRIMARY | CF_DISPLAY,     NIL,SC_JAPANESE,"ISO-2022-JP"},#ifdef JIS0212TOUNICODE  {"ISO-2022-JP-1",CT_2022,CF_UNSUPRT,     NIL,SC_JAPANESE,"ISO-2022-JP"},#ifdef GBTOUNICODE#ifdef KSCTOUNICODE  {"ISO-2022-JP-2",CT_2022,CF_UNSUPRT,     NIL,     SC_LATIN_1 | SC_LATIN_2 | SC_LATIN_3 | SC_LATIN_4 | SC_LATIN_5 |       SC_LATIN_6 | SC_LATIN_7 | SC_LATIN_8 | SC_LATIN_9 | SC_LATIN_10 |	 SC_ARABIC | SC_CYRILLIC | SC_GREEK | SC_HEBREW | SC_THAI |	   SC_VIETNAMESE | SC_CHINESE_SIMPLIFIED | SC_JAPANESE | SC_KOREAN#ifdef CNS1TOUNICODE	     | SC_CHINESE_TRADITIONAL#endif	       ,"UTF-8"},#endif#endif#endif#endif#ifdef KSCTOUNICODE  {"ISO-2022-KR",CT_2022,CF_PRIMARY | CF_DISPLAY | CF_UNSUPRT,     NIL,SC_KOREAN,"EUC-KR"},  {"EUC-KR",CT_DBYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,     (void *) &ksc_param,SC_KOREAN,NIL},  {"KSC5601",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,     (void *) &ksc_param,SC_KOREAN,"EUC-KR"},  {"KSC_5601",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,     (void *) &ksc_param,SC_KOREAN,"EUC-KR"},  {"KS_C_5601-1987",CT_DBYTE,CF_DISPLAY,     (void *) &ksc_param,SC_KOREAN,"EUC-KR"},  {"KS_C_5601-1989",CT_DBYTE,CF_DISPLAY,     (void *) &ksc_param,SC_KOREAN,"EUC-KR"},  {"KS_C_5601-1992",CT_DBYTE,CF_DISPLAY,     (void *) &ksc_param,SC_KOREAN,"EUC-KR"},  {"KS_C_5601-1997",CT_DBYTE,CF_DISPLAY,     (void *) &ksc_param,SC_KOREAN,"EUC-KR"},#endif				/* deep sigh */  {"WINDOWS-874",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) windows_874tab,SC_THAI,"ISO-8859-11"},  {"CP874",CT_1BYTE,CF_DISPLAY,     (void *) windows_874tab,SC_THAI,"ISO-8859-11"},#ifdef GBTOUNICODE  {"WINDOWS-936",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,     (void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"},  {"CP936",CT_DBYTE,CF_DISPLAY,     (void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"},#endif#ifdef KSCTOUNICODE  {"WINDOWS-949",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,     (void *) &ksc_param,SC_KOREAN,"EUC-KR"},  {"CP949",CT_DBYTE,CF_DISPLAY,     (void *) &ksc_param,SC_KOREAN,"EUC-KR"},  {"X-WINDOWS-949",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,     (void *) &ksc_param,SC_KOREAN,"EUC-KR"},#endif  {"WINDOWS-1250",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) windows_1250tab,SC_LATIN_2,"ISO-8859-2"},  {"CP1250",CT_1BYTE,CF_DISPLAY,     (void *) windows_1250tab,SC_LATIN_2,"ISO-8859-2"},  {"WINDOWS-1251",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,     (void *) windows_1251tab,SC_CYRILLIC,"KOI8-R"},  {"CP1251",CT_1BYTE,CF_DISPLAY,     (void *) windows_1251tab,SC_CYRILLIC,"KOI8-R"},  {"WINDOWS-1252",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) windows_1252tab,SC_LATIN_1,"ISO-8859-1"},  {"CP1252",CT_1BYTE,CF_DISPLAY,     (void *) windows_1252tab,SC_LATIN_1,"ISO-8859-1"},  {"WINDOWS-1253",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) windows_1253tab,SC_GREEK,"ISO-8859-7"},  {"CP1253",CT_1BYTE,CF_DISPLAY,     (void *) windows_1253tab,SC_GREEK,"ISO-8859-7"},  {"WINDOWS-1254",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) windows_1254tab,SC_LATIN_5,"ISO-8859-9"},  {"CP1254",CT_1BYTE,CF_DISPLAY,     (void *) windows_1254tab,SC_LATIN_5,"ISO-8859-9"},  {"WINDOWS-1255",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) windows_1255tab,SC_HEBREW,"ISO-8859-8"},  {"CP1255",CT_1BYTE,CF_DISPLAY,     (void *) windows_1255tab,SC_HEBREW,"ISO-8859-8"},  {"WINDOWS-1256",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) windows_1256tab,SC_ARABIC,"ISO-8859-6"},  {"CP1256",CT_1BYTE,CF_DISPLAY,     (void *) windows_1256tab,SC_ARABIC,"ISO-8859-6"},  {"WINDOWS-1257",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) windows_1257tab,SC_LATIN_7,"ISO-8859-13"},  {"CP1257",CT_1BYTE,CF_DISPLAY,     (void *) windows_1257tab,SC_LATIN_7,"ISO-8859-13"},  {"WINDOWS-1258",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) windows_1258tab,SC_VIETNAMESE,"VISCII"},  {"CP1258",CT_1BYTE,CF_DISPLAY,     (void *) windows_1258tab,SC_VIETNAMESE,"VISCII"},				/* deeper sigh */  {"IBM367",CT_ASCII,CF_PRIMARY | CF_DISPLAY,     NIL,NIL,"US-ASCII"},  {"IBM437",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_437tab,SC_LATIN_1,"ISO-8859-1"},  {"IBM737",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_737tab,SC_GREEK,"ISO-8859-7"},  {"IBM775",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_775tab,SC_LATIN_7,"ISO-8859-13"},  {"IBM850",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_850tab,SC_LATIN_1,"ISO-8859-1"},  {"IBM852",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_852tab,SC_LATIN_2,"ISO-8859-2"},  {"IBM855",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_855tab,SC_CYRILLIC,"ISO-8859-5"},  {"IBM857",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_857tab,SC_LATIN_5,"ISO-8859-9"},  {"IBM860",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_860tab,SC_LATIN_1,"ISO-8859-1"},  {"IBM861",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_861tab,SC_LATIN_6,"ISO-8859-10"},  {"IBM862",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_862tab,SC_HEBREW,"ISO-8859-8"},  {"IBM863",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_863tab,SC_LATIN_1,"ISO-8859-1"},  {"IBM864",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_864tab,SC_ARABIC,"ISO-8859-6"},  {"IBM865",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_865tab,SC_LATIN_6,"ISO-8859-10"},  {"IBM866",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_866tab,SC_CYRILLIC,"KOI8-R"},  {"IBM869",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_869tab,SC_GREEK,"ISO-8859-7"},  {"IBM874",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,     (void *) ibm_874tab,SC_THAI,"ISO-8859-11"},				/* deepest sigh */  {"ANSI_X3.4-1968",CT_ASCII,CF_DISPLAY,     NIL,NIL,"US-ASCII"},  {"UNICODE-1-1-UTF-7",CT_UTF7,CF_UNSUPRT,     NIL,SC_UNICODE,"UTF-8"},				/* these should never appear in email */  {"UCS-2",CT_UCS2,CF_PRIMARY | CF_DISPLAY | CF_NOEMAIL,     NIL,SC_UNICODE,"UTF-8"},  {"UCS-4",CT_UCS4,CF_PRIMARY | CF_DISPLAY | CF_NOEMAIL,     NIL,SC_UNICODE,"UTF-8"},  {"UTF-16",CT_UTF16,CF_PRIMARY | CF_DISPLAY | CF_NOEMAIL,     NIL,SC_UNICODE,"UTF-8"},  NIL};/* Non-Unicode Script table */static const SCRIPT utf8_scvalid[] = {  {"Arabic",NIL,SC_ARABIC},  {"Chinese Simplified","China, Singapore",SC_CHINESE_SIMPLIFIED},  {"Chinese Traditional","Taiwan, Hong Kong, Macao",SC_CHINESE_TRADITIONAL},  {"Cyrillic",NIL,SC_CYRILLIC},  {"Cyrillic Ukranian",NIL,SC_UKRANIAN},  {"Greek",NIL,SC_GREEK},  {"Hebrew",NIL,SC_HEBREW},  {"Japanese",NIL,SC_JAPANESE},  {"Korean",NIL,SC_KOREAN},  {"Latin-1","Western Europe",SC_LATIN_1},  {"Latin-2","Eastern Europe",SC_LATIN_2},  {"Latin-3","Southern Europe",SC_LATIN_3},  {"Latin-4","Northern Europe",SC_LATIN_4},  {"Latin-5","Turkish",SC_LATIN_5},  {"Latin-6","Nordic",SC_LATIN_6},  {"Latin-7","Baltic",SC_LATIN_7},  {"Latin-8","Celtic",SC_LATIN_8},  {"Latin-9","Euro",SC_LATIN_9},  {"Latin-10","Balkan",SC_LATIN_10},  {"Thai",NIL,SC_THAI},  {"Vietnamese",NIL,SC_VIETNAMESE},  NIL};/* Look up script name or return entire table * Accepts: script name or NIL * Returns: pointer to script table entry or NIL if unknown */SCRIPT *utf8_script (char *script)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -