⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utf-8.c

📁 Newlib 嵌入式 C库 标准实现代码
💻 C
字号:
/* * Copyright (c) 2003-2004, Artem B. Bityuckiy * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in the *    documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */#include "cesbi.h"#if defined (ICONV_TO_UCS_CES_UTF_8) \ || defined (ICONV_FROM_UCS_CES_UTF_8)#include <_ansi.h>#include <reent.h>#include <sys/types.h>#include "../lib/local.h"#include "../lib/ucsconv.h"#define UTF8_MB_CUR_MAX 6/* * UTF-8 CES converter doesn't interpret BOM. Reject overlong sequences,  * U'FFFF, U'FFFE codes, UTF-16 surrogate codes and all codes > 0x7FFFFFFF. */#if defined (ICONV_FROM_UCS_CES_UTF_8)static size_t_DEFUN(convert_from_ucs, (data, in, outbuf, outbytesleft),                         _VOID_PTR data         _AND                         register ucs4_t in     _AND                         unsigned char **outbuf _AND                         size_t *outbytesleft){  register unsigned char *cp;  register size_t bytes;  if ((in  >= 0x0000D800 && in <= 0x0000DFFF)      || in > 0x7FFFFFFF || in == 0x0000FFFF || in == 0x0000FFFE)    return (size_t)ICONV_CES_INVALID_CHARACTER;  if (in < 0x80)    bytes = 1;  else if (in < 0x800)    bytes = 2;  else if (in < 0x10000)    bytes = 3;  else if (in < 0x200000)    bytes = 4;  else if (in < 0x4000000)    bytes = 5;  else    bytes = 6;  if (*outbytesleft < bytes)    return (size_t)ICONV_CES_NOSPACE;  cp = *outbuf;    switch (bytes)    {      case 1:        *cp = (unsigned char)in;        break;      case 2:        *cp++ = (unsigned char)((in >> 6) | 0x000000C0);        *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080);        break;      case 3:        *cp++ = (unsigned char)((in >> 12) | 0x000000E0);        *cp++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080);        *cp++ = (unsigned char)((in        & 0x0000003F) | 0x00000080);        break;      case 4:        *cp++ = (unsigned char)((in >> 18)  | 0x000000F0);        *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080);        *cp++ = (unsigned char)(((in >> 6)  & 0x0000003F) | 0x00000080);        *cp++ = (unsigned char)((in         & 0x0000003F) | 0x00000080);        break;      case 5:        *cp++ = (unsigned char)((in >> 24)  | 0x000000F8);        *cp++ = (unsigned char)(((in >> 18) & 0x0000003F) | 0x00000080);        *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080);        *cp++ = (unsigned char)(((in >> 6)  & 0x0000003F) | 0x00000080);        *cp++ = (unsigned char)((in         & 0x0000003F) | 0x00000080);        break;      case 6:        *cp++ = (unsigned char)((in >> 30)  | 0x000000FC);        *cp++ = (unsigned char)(((in >> 24) & 0x0000003F) | 0x00000080);        *cp++ = (unsigned char)(((in >> 18) & 0x0000003F) | 0x00000080);        *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080);        *cp++ = (unsigned char)(((in >> 6)  & 0x0000003F) | 0x00000080);        *cp++ = (unsigned char)((in         & 0x0000003F) | 0x00000080);        break;    }  *outbytesleft -= bytes;  *outbuf += bytes;    return bytes;}#endif /* ICONV_FROM_UCS_CES_UTF_8 */#if defined (ICONV_TO_UCS_CES_UTF_8)static ucs4_t_DEFUN(convert_to_ucs, (data, inbuf, inbytesleft),                       _VOID_PTR data               _AND                       _CONST unsigned char **inbuf _AND                       size_t *inbytesleft){  register _CONST unsigned char *in = *inbuf;  register size_t bytes;  ucs4_t res;  if (in[0] >= 0xC0)    {      if (in[0] < 0xE0)        {          if (*inbytesleft < (bytes = 2))            return (ucs4_t)ICONV_CES_BAD_SEQUENCE;          if (   ((in[0] & ~0x1F) == 0xC0)              && ((in[1] & 0xC0)  == 0x80))            res = ((ucs4_t)(in[0] & 0x1F) << 6)                | ((ucs4_t)(in[1] & 0x3F));          else            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;          if (res < 0x00000080) /* Overlong sequence */            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;        }              else if (in[0] < 0xF0)        {          if (*inbytesleft < (bytes = 3))            return (ucs4_t)ICONV_CES_BAD_SEQUENCE;          if (   ((in[0] & ~0x0F) == 0xE0)              && ((in[1] & 0xC0)  == 0x80)              && ((in[2] & 0xC0)  == 0x80))            res = ((ucs4_t)(in[0] & 0x0F) << 12)                | ((ucs4_t)(in[1] & 0x3F) << 6)                | ((ucs4_t)(in[2] & 0x3F));          else            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;          if (res < 0x00000800) /* Overlong sequence */            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;        }            else if (in[0] < 0xF8)        {          if (*inbytesleft < (bytes = 4))            return (ucs4_t)ICONV_CES_BAD_SEQUENCE;                      if (   ((in[0] & ~0x07) == 0xF0)              && ((in[1] & 0xC0)  == 0x80)              && ((in[2] & 0xC0)  == 0x80)              && ((in[3] & 0xC0)  == 0x80))            res = ((ucs4_t)(in[0] & 0x07) << 18)                | ((ucs4_t)(in[1] & 0x3F) << 12)                | ((ucs4_t)(in[2] & 0x3F) << 6)                | ((ucs4_t)(in[3] & 0x3F));          else            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;                      if (res < 0x00010000) /* Overlong sequence */            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;        }            else if (in[0] < 0xFC)        {          if (*inbytesleft < (bytes = 5))            return (ucs4_t)ICONV_CES_BAD_SEQUENCE;                      if (   ((in[0] & ~0x03) == 0xF8)              && ((in[1] & 0xC0)  == 0x80)              && ((in[2] & 0xC0)  == 0x80)              && ((in[3] & 0xC0)  == 0x80)              && ((in[4] & 0xC0)  == 0x80))            res = ((ucs4_t)(in[0] & 0x03) << 24)                | ((ucs4_t)(in[1] & 0x3F) << 18)                | ((ucs4_t)(in[2] & 0x3F) << 12)                | ((ucs4_t)(in[3] & 0x3F) << 6)                | ((ucs4_t)(in[4] & 0x3F));          else            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;                      if (res < 0x00200000) /* Overlong sequence */            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;        }      else if (in[0] <= 0xFD)        {          if (*inbytesleft < (bytes = 6))            return (ucs4_t)ICONV_CES_BAD_SEQUENCE;                      if (   ((in[0] & ~0x01) == 0xFC)              && ((in[1] & 0xC0)  == 0x80)              && ((in[2] & 0xC0)  == 0x80)              && ((in[3] & 0xC0)  == 0x80)              && ((in[4] & 0xC0)  == 0x80)              && ((in[5] & 0xC0)  == 0x80))              res = ((ucs4_t)(in[0] & 0x1)  << 30)                  | ((ucs4_t)(in[1] & 0x3F) << 24)                  | ((ucs4_t)(in[2] & 0x3F) << 18)                  | ((ucs4_t)(in[3] & 0x3F) << 12)                  | ((ucs4_t)(in[4] & 0x3F) << 6)                  | ((ucs4_t)(in[5] & 0x3F));          else            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;                      if (res < 0x04000000) /* Overlong sequence */            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;        }              else        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;    }  else if (in[0] & 0x80)    return (ucs4_t)ICONV_CES_INVALID_CHARACTER;  else    {      res = (ucs4_t)in[0];      bytes = 1;    }  if (  (res  >= 0x0000D800 && res <= 0x0000DFFF)      || res > 0x7FFFFFFF || res == 0x0000FFFF || res == 0x0000FFFE)    return (ucs4_t)ICONV_CES_INVALID_CHARACTER;  *inbytesleft -= bytes;  *inbuf += bytes;  return res;}#endif /* ICONV_TO_UCS_CES_UTF_8 */static int_DEFUN(get_mb_cur_max, (data),                       _VOID_PTR data){  return UTF8_MB_CUR_MAX;}#if defined (ICONV_TO_UCS_CES_UTF_8)_CONST iconv_to_ucs_ces_handlers_t_iconv_to_ucs_ces_handlers_utf_8 = {  NULL,  NULL,  get_mb_cur_max,  NULL,  NULL,  NULL,  convert_to_ucs};#endif#if defined (ICONV_FROM_UCS_CES_UTF_8)_CONST iconv_from_ucs_ces_handlers_t_iconv_from_ucs_ces_handlers_utf_8 ={  NULL,  NULL,  get_mb_cur_max,  NULL,  NULL,  NULL,  convert_from_ucs};#endif#endif /* ICONV_TO_UCS_CES_UTF_8 || ICONV_FROM_UCS_CES_UTF_8 */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -