⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wbxml_charset.c

📁 WAP Binary XML 简单地说
💻 C
字号:
/* * libwbxml, the WBXML Library. * Copyright (C) 2002-2005 Aymerick Jehanne <aymerick@jehanne.org> *  * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. *  * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Lesser General Public License for more details. *  * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA *  * LGPL v2.1: http://www.gnu.org/copyleft/lesser.txt *  * Contact: libwbxml@aymerick.com * Home: http://libwbxml.aymerick.com */ /** * @file wbxml_charset.c * @ingroup wbxml_charset * * @author Aymerick Jehanne <libwbxml@aymerick.com> * @date 04/03/24 * * @brief Charset Functions */#include "wbxml.h"/* Structures *//** WBXML Charset */typedef struct WBXMLCharsetEntry_s {    const WB_TINY       *name;     /**< Charset Name */    WBXMLCharsetMIBEnum  mib_enum; /**< Charset MIBEnum Value */} WBXMLCharsetEntry;/* Globals *//** * @brief Charset table * @note  From http://www.iana.org/assignments/character-sets */static const WBXMLCharsetEntry wbxml_charset_entries[] ={    { "US-ASCII",        WBXML_CHARSET_US_ASCII        },    { "ISO-8859-1",      WBXML_CHARSET_ISO_8859_1      },    { "ISO-8859-2",      WBXML_CHARSET_ISO_8859_2      },    { "ISO-8859-3",      WBXML_CHARSET_ISO_8859_3      },    { "ISO-8859-4",      WBXML_CHARSET_ISO_8859_4      },    { "ISO-8859-5",      WBXML_CHARSET_ISO_8859_5      },    { "ISO-8859-6",      WBXML_CHARSET_ISO_8859_6      },    { "ISO-8859-7",      WBXML_CHARSET_ISO_8859_7      },    { "ISO-8859-8",      WBXML_CHARSET_ISO_8859_8      },    { "ISO-8859-9",      WBXML_CHARSET_ISO_8859_9      },    { "Shift_JIS",       WBXML_CHARSET_SHIFT_JIS       },    { "UTF-8",           WBXML_CHARSET_UTF_8           },    { "ISO-10646-UCS-2", WBXML_CHARSET_ISO_10646_UCS_2 },    { "UTF-16",          WBXML_CHARSET_UTF_16          },    { "Big5",            WBXML_CHARSET_BIG5            }};/* Private Functions Prototypes */static WB_BOOL binary_search(const WB_TINY *in_buf,                             WB_ULONG       in_buf_len,                             const WB_TINY *in_seq,                             WB_ULONG       in_seq_len,                             WB_ULONG      *out_pos);/*************************************************** *    Public Functions */WBXML_DECLARE(WB_BOOL) wbxml_charset_get_mib(const WB_TINY       *name,                                             WBXMLCharsetMIBEnum *mib_enum){    WB_ULONG i = 0;      for (i = 0; i < WBXML_TABLE_SIZE(wbxml_charset_entries); i++) {        if (WBXML_STRCASECMP(name, wbxml_charset_entries[i].name) == 0) {            if (mib_enum != NULL) {                *mib_enum = wbxml_charset_entries[i].mib_enum;            }            return TRUE;        }    }    return FALSE;}WBXML_DECLARE(WB_BOOL) wbxml_charset_get_name(WBXMLCharsetMIBEnum   mib_enum,                                              const WB_TINY       **name){    WB_ULONG i = 0;      for (i = 0; i < WBXML_TABLE_SIZE(wbxml_charset_entries); i++) {        if (mib_enum == wbxml_charset_entries[i].mib_enum) {            if (name != NULL) {                *name = wbxml_charset_entries[i].name;            }                  return TRUE;        }    }      return FALSE;}WBXML_DECLARE(WBXMLError) wbxml_charset_conv(const WB_TINY        *in_buf,                                             WB_ULONG             *io_bytes,                                             WBXMLCharsetMIBEnum   in_charset,                                             WBXMLBuffer         **out_buf,                                             WBXMLCharsetMIBEnum   out_charset){    /**************************************************     * First, check for simple US-ASCII / UTF-8 cases     */    /* Are we dealing with US-ASCII or UTF-8 ? */    if (((in_charset  == WBXML_CHARSET_US_ASCII) || (in_charset  == WBXML_CHARSET_UTF_8)) &&        ((out_charset == WBXML_CHARSET_US_ASCII) || (out_charset == WBXML_CHARSET_UTF_8)))    {        /* Create a static buffer */        if ((*out_buf = wbxml_buffer_sta_create_from_cstr(in_buf)) == NULL) {            return WBXML_ERROR_NOT_ENOUGH_MEMORY;        }        /* US-ASCII and UTF-8 are NULL terminated */        *io_bytes -= WBXML_STRLEN(in_buf) + 1;            return WBXML_OK;    }      /**************************************     * Ok guys, we really have to convert     */  #if defined( HAVE_ICONV )      {        /**********************         * The iconv way         */            const WB_TINY * charset_to   = NULL;        const WB_TINY * charset_from = NULL;        WB_TINY       * tmp_buf      = NULL;        WB_TINY       * tmp_ptr      = NULL;        WB_ULONG        tmp_buf_len  = 0;        WB_ULONG        tmp_len_left = 0;        WBXMLError      ret          = WBXML_OK;        iconv_t         cd           = 0;        WB_UTINY        last_char    = 0;            /* Get Charsets names */        if (!wbxml_charset_get_name(in_charset, &charset_from)) {            return WBXML_ERROR_CHARSET_UNKNOWN;        }          if (!wbxml_charset_get_name(out_charset, &charset_to)) {            return WBXML_ERROR_CHARSET_UNKNOWN;        }            /* Init iconv */        if ((cd = iconv_open(charset_to, charset_from)) == (iconv_t)(-1))        {            /* Init failed */            return WBXML_ERROR_CHARSET_CONV_INIT;        }            /* Allocate maximum result buffer (4 bytes unicode) */        tmp_len_left = tmp_buf_len = 4 * (sizeof(WB_TINY) * (*io_bytes));            if ((tmp_buf = (WB_TINY *) wbxml_malloc(tmp_buf_len)) == NULL) {            iconv_close(cd);            return WBXML_ERROR_NOT_ENOUGH_MEMORY;        }        tmp_ptr = tmp_buf;            /* Convert ! */        (void) iconv(cd,                     &in_buf,                     (size_t*)io_bytes,                     &tmp_buf,                     (size_t*)&tmp_len_left);        /** @todo Check errno (but it doesn't seems to work on windows) */        if (tmp_buf_len > tmp_len_left) {            /* Create result buffer */            if ((*out_buf = wbxml_buffer_create(tmp_ptr,                                                tmp_buf_len - tmp_len_left,                                                tmp_buf_len - tmp_len_left)) == NULL)            {                /* Not enough memory */                ret = WBXML_ERROR_NOT_ENOUGH_MEMORY;            }            /* Remove trailing NULL char */            wbxml_buffer_remove_trailing_zeros(out_buf);        }        else        {            /* Not converted */            ret = WBXML_ERROR_CHARSET_CONV;        }            /* Shutdown iconv */        iconv_close(cd);            /* Clean-up */        wbxml_free(tmp_ptr);        return ret;    }  #else      {        /***************************************************         * Add your own charset conversion function here !         */            return WBXML_ERROR_NO_CHARSET_CONV;    }  #endif /* HAVE_ICONV */}WBXML_DECLARE(WBXMLError) wbxml_charset_conv_term(const WB_TINY        *in_buf,                                                  WB_ULONG             *io_bytes,                                                  WBXMLCharsetMIBEnum   in_charset,                                                  WBXMLBuffer         **out_buf,                                                  WBXMLCharsetMIBEnum   out_charset){    WB_ULONG   buf_len  = 0;    WB_ULONG   new_len  = 0;    WB_ULONG   term_len = 0;    WBXMLError ret      = WBXML_OK;      /* Find length of input buffer */    switch (in_charset)    {    case WBXML_CHARSET_ISO_10646_UCS_2 :    case WBXML_CHARSET_UTF_16 :        /* Terminated by two NULL char ("\0\0") */        term_len = 2;        if (!binary_search(in_buf, *io_bytes, "\0\0", 2, &buf_len)) {            return WBXML_ERROR_CHARSET_STR_LEN;        }        /* Add termination bytes length */        buf_len += term_len;        break;        default :        /* Terminated by a simple NULL char ('\0') */        term_len = 1;        buf_len = WBXML_STRLEN(in_buf) + term_len;        break;    }    /* Check length found */    if (buf_len > *io_bytes) {        return WBXML_ERROR_CHARSET_STR_LEN;    }    /* Use a temporary length var (because it is decreased) */    new_len = buf_len;      /* Convert ! */    ret = wbxml_charset_conv(in_buf,                              &new_len,                             in_charset,                             out_buf,                             out_charset);      /* Set input buffer length */               *io_bytes = buf_len;      return ret;}/*************************************************** *    Private Functions *//** * Binary search of a sequence of bytes into a buffer * * @param in_buf     Buffer to search in * @param in_buf_len Length of input buffer * @param in_seq     Sequence to search * @param in_seq_len Length of sequence * @param out_pos    Index of Sequence into Buffer * @return TRUE if found, FALSE otherwise */static WB_BOOL binary_search(const WB_TINY *in_buf,                             WB_ULONG       in_buf_len,                             const WB_TINY *in_seq,                             WB_ULONG       in_seq_len,                             WB_ULONG      *out_pos){    /** @todo binary_search() / See wbxml_buffer_search() */    return FALSE;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -