⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utf-16.c

📁 KPIT GNU Tools is a set of GNU development tools for Renesas microcontrollers.
💻 C
字号:
/* * Copyright (c) 2003-2004, Artem B. Bityuckiy * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in the *    documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */#include "cesbi.h"#if defined (ICONV_TO_UCS_CES_UTF_16) \ || defined (ICONV_FROM_UCS_CES_UTF_16)#include <_ansi.h>#include <reent.h>#include <sys/types.h>#include <stdlib.h>#include <string.h>#include <wchar.h>#include "../lib/local.h"#include "../lib/ucsconv.h"#include "../lib/endian.h"/* * On input UTF-16 converter interpret BOM and uses Big Endian byte order if BOM * is absent. UTF-16 converter outputs in System Endian and adds correspondent * BOM as first code. UTF-16LE and UTF-16BE converters ignore BOM on input and * don't output BOM. */#define UTF16_UNDEFINED     0x00#define UTF16_BIG_ENDIAN    0x01#define UTF16_LITTLE_ENDIAN 0x02#define UTF16_SYSTEM_ENDIAN 0x04#define UTF16_BOM_WRITTEN   0x08#define UTF16_BOM 0xFEFF#define UTF_16   "utf_16"#define UTF_16BE "utf_16be"#define UTF_16LE "utf_16le"static size_t_DEFUN(utf_16_close, (rptr, data),                     struct _reent *rptr _AND                     _VOID_PTR data){  _free_r(rptr, data);  return 0;}#if defined (ICONV_FROM_UCS_CES_UTF_16)static _VOID_PTR_DEFUN(utf_16_init_from_ucs, (rptr, encoding),                             struct _reent *rptr _AND                             _CONST char *encoding){  int *data;    if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)    return (_VOID_PTR)NULL;    if (strcmp (encoding, UTF_16LE) == 0)    *data = UTF16_LITTLE_ENDIAN;  else if (strcmp (encoding, UTF_16BE) == 0)    *data = UTF16_BIG_ENDIAN;  else    *data = UTF16_SYSTEM_ENDIAN;       return (_VOID_PTR)data;}static size_t_DEFUN(utf_16_convert_from_ucs, (data, in, outbuf, outbytesleft),                                _VOID_PTR data         _AND                                register ucs4_t in     _AND                                unsigned char **outbuf _AND                                size_t *outbytesleft){  register ucs2_t *cp;  register size_t bytes;  register int *state;    if (in > 0x0010FFFF || (in >= 0x0000D800 && in <= 0x0000DFFF)      || in == 0x0000FFFF || in == 0x0000FFFE)    return (size_t)ICONV_CES_INVALID_CHARACTER;   state = (int *)data;  bytes = (*state == UTF16_SYSTEM_ENDIAN) ? sizeof (ucs2_t) * 2                                           : sizeof (ucs2_t);  if (in > 0x0000FFFF)    bytes += sizeof (ucs2_t);  if (*outbytesleft < bytes)    return (size_t)ICONV_CES_NOSPACE;  cp = (ucs2_t *)*outbuf;  if (*state == UTF16_SYSTEM_ENDIAN)    {      *cp++ = UTF16_BOM;      *state |= UTF16_BOM_WRITTEN;    }  if (in < 0x00010000)    {      switch (*state)        {          case UTF16_LITTLE_ENDIAN:            *cp = ICONV_HTOLES ((ucs2_t)in);            break;          case UTF16_BIG_ENDIAN:            *cp = ICONV_HTOBES ((ucs2_t)in);            break;          case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):            *cp = (ucs2_t)in;            break;        }    }  else    {      ucs2_t w1, w2;            /* Process surrogate pair */      in -= 0x00010000;      w1 = ((ucs2_t)((in >> 10)) & 0x03FF) | 0xD800;      w2 = (ucs2_t)(in & 0x000003FF) | 0xDC00;      switch (*state)        {          case UTF16_LITTLE_ENDIAN:            *cp++ = ICONV_HTOLES (w1);            *cp = ICONV_HTOLES (w2);            break;          case UTF16_BIG_ENDIAN:            *cp++ = ICONV_HTOBES (w1);            *cp = ICONV_HTOBES (w2);            break;          case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):            *cp++ = w1;            *cp = w2;            break;        }    }    *outbuf += bytes;  *outbytesleft -= bytes;  return bytes;}#endif /* ICONV_FROM_UCS_CES_UTF_16 */#if defined (ICONV_TO_UCS_CES_UTF_16)static _VOID_PTR_DEFUN(utf_16_init_to_ucs, (rptr, encoding),                           struct _reent *rptr _AND                           _CONST char *encoding){  int *data;    if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)    return (_VOID_PTR)NULL;    if (strcmp (encoding, UTF_16BE) == 0)    *data = UTF16_BIG_ENDIAN;  else if (strcmp (encoding, UTF_16LE) == 0)    *data = UTF16_LITTLE_ENDIAN;  else    *data = UTF16_UNDEFINED;       return (_VOID_PTR)data;}static ucs4_t_DEFUN(utf_16_convert_to_ucs, (data, inbuf, inbytesleft),                              _VOID_PTR data               _AND                              _CONST unsigned char **inbuf _AND                              size_t *inbytesleft){  register ucs2_t w1;  register ucs2_t w2;  register ucs2_t *cp;  int *state;  ucs4_t res;  int bytes = sizeof (ucs2_t);  if (*inbytesleft < bytes)    return (ucs4_t)ICONV_CES_BAD_SEQUENCE;    state = (int *)data;  cp = ((ucs2_t *)*inbuf);  if (*state == UTF16_UNDEFINED)    {      if (*cp == ICONV_HTOLES(UTF16_BOM))        *state = UTF16_LITTLE_ENDIAN;      else        *state = UTF16_BIG_ENDIAN;     if (   *cp == ICONV_HTOBES (UTF16_BOM)         || *cp == ICONV_HTOLES (UTF16_BOM))       {         if (*inbytesleft < (bytes += sizeof (ucs2_t)))           return (ucs4_t)ICONV_CES_BAD_SEQUENCE;         cp += 1;       }    }      if (*state == UTF16_LITTLE_ENDIAN)          w1 = ICONV_LETOHS (*cp);  else    w1 = ICONV_BETOHS (*cp);  if (w1  < 0xD800 || w1 > 0xDFFF)    {      if (w1 == 0xFFFF || w1 == 0xFFFE)        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;      res = (ucs4_t)w1;    }  else    {      /* Process surrogate pair */      if (*inbytesleft < (bytes += 2))        return (ucs4_t)ICONV_CES_BAD_SEQUENCE;          if (w1 > 0xDBFF)        /* Broken surrogate character */        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;              cp += 1;      if (*state == UTF16_LITTLE_ENDIAN)              w2 = ICONV_LETOHS (*cp);      else        w2 = ICONV_BETOHS (*cp);        if (w2 < 0xDC00 || w2 > 0xDFFF)        /* Broken surrogate character */        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;          res = (ucs4_t)(w2 & 0x03FF) | ((ucs4_t)(w1 & 0x03FF) << 10);      res += 0x00010000;    }  *inbuf += bytes;  *inbytesleft -= bytes;    return res;}#endif /* ICONV_TO_UCS_CES_UTF_16 */static int_DEFUN(utf_16_get_mb_cur_max, (data),                              _VOID_PTR data){  return 6;}#if defined (ICONV_TO_UCS_CES_UTF_16)_CONST iconv_to_ucs_ces_handlers_t_iconv_to_ucs_ces_handlers_utf_16 = {  utf_16_init_to_ucs,  utf_16_close,  utf_16_get_mb_cur_max,  NULL,  NULL,  NULL,  utf_16_convert_to_ucs};#endif#if defined (ICONV_FROM_UCS_CES_UTF_16)_CONST iconv_from_ucs_ces_handlers_t_iconv_from_ucs_ces_handlers_utf_16 ={  utf_16_init_from_ucs,  utf_16_close,  utf_16_get_mb_cur_max,  NULL,  NULL,  NULL,  utf_16_convert_from_ucs};#endif#endif /* ICONV_TO_UCS_CES_UTF_16 || ICONV_FROM_UCS_CES_UTF_16 */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -