📄 charset.c

📁 AnyQ服务端源代码(2004/10/28)源码
💻 C
字号:
/* -------------------------------------------------------------------------- * * License * * The contents of this file are subject to the Jabber Open Source License * Version 1.0 (the "License").  You may not copy or use this file, in either * source code or executable form, except in compliance with the License.  You * may obtain a copy of the License at http://www.jabber.com/license/ or at * http://www.opensource.org/. * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied.  See the License * for the specific language governing rights and limitations under the * License. * * Copyright (c) 1999-2000 Schuyler Heath <sheath@jabber.org> * * Acknowledgements * * Special thanks to the Jabber Open Source Contributors for their * suggestions and support of Jabber. * * --------------------------------------------------------------------------*/#include "aimtrans.h"/* Constants for character set conversion. */#define BYTEMASK_3   0xFFFFF800UL   /* mask to test for 3-byte UTF-8 representation required */#define BYTEMASK_2   0xFFFFFF80UL   /* mask to test for 2-byte UTF-8 representation required */#define BYTEU8_LEN6  0xFC           /* length prefix for 6-byte UTF-8 sequence */#define BYTEU8_LEN5  0xF8           /* length prefix for 5-byte UTF-8 sequence */#define BYTEU8_LEN4  0xF0           /* length prefix for 4-byte UTF-8 sequence */#define BYTEU8_LEN3  0xE0           /* length prefix for 3-byte UTF-8 sequence */#define BYTEU8_LEN2  0xC0           /* length prefix for 2-byte UTF-8 sequence */#define BYTEU8_PFIX  0x80           /* prefix for supplemental bytes of a multibyte sequence */#define BYTETEST_1   0x80           /* test mask for 1-byte UTF-8 sequence */#define BYTETEST_PF  0xC0           /* test mask for second/subsequent bytes of multibyte */#define BYTETEST_2   0xE0           /* test mask for 2-byte UTF-8 sequence */#define BYTETEST_3   0xF0           /* test mask for 3-byte UTF-8 sequence */#define BYTETEST_4   0xF8           /* test mask for 4-byte UTF-8 sequence */#define BYTETEST_5   0xFC           /* test mask for 5-byte UTF-8 sequence */#define BYTETEST_6   0xFE           /* test mask for 6-byte UTF-8 sequence */#define LOBITS_1     0x7F           /* low-bit mask for 1-byte UTF-8 sequence */#define LOBITS_PFX   0x3F           /* low-bit mask for supp. bytes of multibyte sequence */#define LOBITS_2     0x1F           /* low-bit mask for 2-byte UTF-8 sequence */#define LOBITS_3     0x0F           /* low-bit mask for 3-byte UTF-8 sequence */#define LOBITS_4     0x07           /* low-bit mask for 4-byte UTF-8 sequence */#define LOBITS_5     0x03           /* low-bit mask for 5-byte UTF-8 sequence */#define LOBITS_6     0x01           /* low-bit mask for 6-byte UTF-8 sequence */#define SAFE2_MASK   0xFE           /* test for safe encoding of 2-byte sequences (mask) */#define SAFE2_TEST   0xC0           /* test for safe encoding of 2-byte sequences (value) */#define SAFE3A_TEST  0xE0           /* test for safe encoding of 3-byte sequences (value 1) */#define SAFE3B_MASK  0xE0           /* test for safe encoding of 3-byte sequences (mask 2) */#define SAFE3B_TEST  0x80           /* test for safe encoding of 3-byte sequences (value 2) */#define SAFE4A_TEST  0xF0           /* test for safe encoding of 4-byte sequences (value 1) */#define SAFE4B_MASK  0xF0           /* test for safe encoding of 4-byte sequences (mask 2) */#define SAFE4B_TEST  0x80           /* test for safe encoding of 4-byte sequences (value 2) */#define SAFE5A_TEST  0xF8           /* test for safe encoding of 5-byte sequences (value 1) */#define SAFE5B_MASK  0xF8           /* test for safe encoding of 5-byte sequences (mask 2) */#define SAFE5B_TEST  0x80           /* test for safe encoding of 5-byte sequences (value 2) */#define SAFE6A_TEST  0xFC           /* test for safe encoding of 6-byte sequences (value 1) */#define SAFE6B_MASK  0xFC           /* test for safe encoding of 6-byte sequences (mask 2) */#define SAFE6B_TEST  0x80           /* test for safe encoding of 6-byte sequences (value 2) */#define BITOFS       6              /* bit offset for fragments of UTF-8 data */#define BITSOFS(x)   ((x) * BITOFS) /* multiples of the bit offset */#define UTF8SUB      '?'            /* UCS-4 substitute character */#define WINSUB       '?'            /* Windows substitute character */#define NULCHR       '\0'           /* null character for termination *//* Conversion descriptors */iconv_t fromutf8, toutf8;/*  Converts a string encoded in the eight-bit character set 	to the UTF-8 encoding of UCS-4.  Characters that are	"undefined" in Windows-1252 are replaced with question-mark character. */char *it_convert_windows2utf8(pool p, const char *windows_str){    int n, i = 0;    char *result = NULL;    int q;    size_t numconv;    size_t inbytesleft, outbytesleft;    char *inbuf, *outbuf;	log_notice( ZONE, "it_convert_windows2utf8");    if(windows_str==NULL) return NULL;    /* for now, allocate more than enough space... */    result = (char*)pmalloc(p,strlen(windows_str)*4+1);    inbuf = (char *)windows_str;    outbuf = result;    inbytesleft = strlen(windows_str);    outbytesleft = strlen(windows_str)*4;    q = 1;    while(q)    {        numconv = iconv(toutf8, &inbuf, &inbytesleft, &outbuf, &outbytesleft);        if(numconv == (size_t)(-1))        {            switch(errno)            {                case EILSEQ:                case EINVAL:                    inbytesleft--;                    outbytesleft--;                    inbuf++;                    *outbuf++ = UTF8SUB;                    break;                case E2BIG:                default:                    q = 0;                    break;            }        }        else        {            q = 0;        }    }    *outbuf = NULCHR;    return result;}/*  Converts a string encoded in the UTF-8 encoding of UCS-4 to the eight-bit	encoding.  Characters that cannot be	converted to this encoding are replaced with the question-mark	character. */char *it_convert_utf82windows(pool p, const char *utf8_str){    int q = 1;    size_t numconv;    size_t inbytesleft, outbytesleft;    unsigned char *result = NULL;    char *inbuf, *outbuf;	log_notice( ZONE, "it_convert_utf82windows");    if(utf8_str==NULL) return NULL;    result=(unsigned char*)pmalloc(p,strlen(utf8_str)+1);    inbuf = (char *)utf8_str;    outbuf = result;    inbytesleft = outbytesleft = strlen(utf8_str);    while(q)    {        numconv = iconv(fromutf8, &inbuf, &inbytesleft, &outbuf, &outbytesleft);        if(numconv == (size_t)(-1))        {            switch(errno)            {                case EILSEQ:                case EINVAL:                    inbytesleft--;                    outbytesleft--;                    inbuf++;                    *outbuf++ = WINSUB;                    while((*inbuf & BYTETEST_PF) == BYTEU8_PFIX)                    {                        inbytesleft--;                        inbuf++;                    }                    break;                case E2BIG:                default:                    q = 0;                    break;            }        }        else        {            q = 0;        }    }    *outbuf = NULCHR;    return result;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -