iconvgnutransservice.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,336 行 · 第 1/3 页

CPP
1,336
字号
/* * Copyright 2002,2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Log: IconvGNUTransService.cpp,v $ * Revision 1.15  2004/09/08 13:56:45  peiyongz * Apache License Version 2.0 * * Revision 1.14  2004/07/23 15:29:09  amassari * transcode was badly terminating the converted string (jira#1206) * * Revision 1.13  2004/07/23 14:35:03  amassari * A global mutex was not cleaned up * * Revision 1.12  2004/02/25 14:53:24  peiyongz * Bug#27209: Xerces 2.5.0 does not build with option -t IconvGNU because of syntax errors! * * Revision 1.11  2003/12/24 15:24:15  cargilld * More updates to memory management so that the static memory manager. * * Revision 1.10  2003/08/19 14:01:41  neilg * fix for bug 22537 * * Revision 1.9  2003/05/17 16:32:18  knoaman * Memory manager implementation : transcoder update. * * Revision 1.8  2003/05/16 21:37:00  knoaman * Memory manager implementation: Modify constructors to pass in the memory manager. * * Revision 1.7  2003/05/15 18:47:05  knoaman * Partial implementation of the configurable memory manager. * * Revision 1.6  2003/04/07 16:52:13  peiyongz * Bug# 18672: IconvGNUTranscoder can't be build when namespaces is on. *                       Patch from Bacek@yandex-team.ru (Vasily Tchekalkin) * * Revision 1.5  2003/03/09 17:03:25  peiyongz * PanicHandler * * Revision 1.4  2002/12/31 18:42:54  tng * [Bug 15608] IconvLCPTranscoder::transcode() is wrong at wcstombs() usage. * * Revision 1.3  2002/11/04 15:14:34  tng * C++ Namespace Support. * * Revision 1.2  2002/09/27 13:33:43  tng * [Bug 12547] Xerces C++ 2.1 fails to build on Linux 64 bits arch with -tlinux.  Patch from Guillaume Morin. * * Revision 1.1  2002/08/19 19:38:18  tng * [Bug 11771] Linux specific IconvGNU transcoder.  Patch from Vasily Tchekalkin. * */// ---------------------------------------------------------------------------//  Includes// ---------------------------------------------------------------------------#include <ctype.h>#include <locale.h>#include <iconv.h>#include <errno.h>#include <endian.h>#include <xercesc/util/XMLString.hpp>#include <xercesc/util/XMLUniDefs.hpp>#include <xercesc/util/XMLUni.hpp>#include <xercesc/util/PlatformUtils.hpp>#include <xercesc/util/TranscodingException.hpp>#include "IconvGNUTransService.hpp"#if !defined(APP_NO_THREADS)#include <xercesc/util/Mutexes.hpp>#include <xercesc/util/XMLRegisterCleanup.hpp>#endif /* !APP_NO_THREADS */XERCES_CPP_NAMESPACE_BEGIN#if !defined(APP_NO_THREADS)// Iconv() access syncronization pointstatic XMLMutex    *gIconvMutex = NULL;static XMLRegisterCleanup IconvGNUMutexCleanup;#  define ICONV_LOCK    XMLMutexLock lockConverter(gIconvMutex);#else /* APP_NO_THREADS */# define ICONV_LOCK#endif /* !APP_NO_THREADS */// ---------------------------------------------------------------------------// Description of encoding schemas, supported by iconv()// ---------------------------------------------------------------------------typedef struct __IconvGNUEncoding {    const char*    fSchema;    // schema name    size_t    fUChSize;    // size of the character    unsigned int fUBO;        // byte order, relative to the host} IconvGNUEncoding;static const IconvGNUEncoding    gIconvGNUEncodings[] = {    { "UCS-2LE",        2,    LITTLE_ENDIAN },    { "ucs-2-internal",        2,    LITTLE_ENDIAN },    { NULL, 0,    0 }};//--------------------------------------------------// Macro-definitions to translate "native unicode"// characters <-> XMLCh with different host byte order// and encoding schemas.# if BYTE_ORDER == LITTLE_ENDIAN#  define IXMLCh2WC16(x,w)            \    *(w) = ((*(x)) >> 8) & 0xFF;        \    *((w)+1) = (*(x)) & 0xFF#  define IWC162XMLCh(w,x)    *(x) = ((*(w)) << 8) | (*((w)+1))#  define XMLCh2WC16(x,w)            \    *(w) = (*(x)) & 0xFF;            \    *((w)+1) = ((*(x)) >> 8) & 0xFF#  define WC162XMLCh(w,x)    *(x) = ((*((w)+1)) << 8) | (*(w))#  define IXMLCh2WC32(x,w)            \    *(w) = ((*(x)) >> 24) & 0xFF;        \    *((w)+1) = ((*(x)) >> 16) & 0xFF;    \    *((w)+2) = ((*(x)) >> 8) & 0xFF;    \    *((w)+3) = (*(x)) & 0xFF#  define IWC322XMLCh(w,x)                \      *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) |    \          ((*((w)+2)) << 8) | (*((w)+3))#  define XMLCh2WC32(x,w)            \    *((w)+3) = ((*(x)) >> 24) & 0xFF;    \    *((w)+2) = ((*(x)) >> 16) & 0xFF;    \    *((w)+1) = ((*(x)) >> 8) & 0xFF;    \    *(w) = (*(x)) & 0xFF#  define WC322XMLCh(w,x)                    \      *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) |    \        ((*((w)+1)) << 8) | (*(w))# else /* BYTE_ORDER != LITTLE_ENDIAN */#  define XMLCh2WC16(x,w)            \    *(w) = ((*(x)) >> 8) & 0xFF;        \    *((w)+1) = (*(x)) & 0xFF#  define WC162XMLCh(w,x)    *(x) = ((*(w)) << 8) | (*((w)+1))#  define IXMLCh2WC16(x,w)            \    *(w) = (*(x)) & 0xFF;            \    *((w)+1) = ((*(x)) >> 8) & 0xFF#  define IWC162XMLCh(w,x)    *(x) = ((*((w)+1)) << 8) | (*(w))#  define XMLCh2WC32(x,w)            \    *(w) = ((*(x)) >> 24) & 0xFF;        \    *((w)+1) = ((*(x)) >> 16) & 0xFF;    \    *((w)+2) = ((*(x)) >> 8) & 0xFF;    \    *((w)+3) = (*(x)) & 0xFF#  define WC322XMLCh(w,x)                \      *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) |    \          ((*((w)+2)) << 8) | (*((w)+3))#  define IXMLCh2WC32(x,w)            \    *((w)+3) = ((*(x)) >> 24) & 0xFF;    \    *((w)+2) = ((*(x)) >> 16) & 0xFF;    \    *((w)+1) = ((*(x)) >> 8) & 0xFF;    \    *(w) = (*(x)) & 0xFF#  define IWC322XMLCh(w,x)                    \      *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) |    \        ((*((w)+1)) << 8) | (*(w))# endif /* BYTE_ORDER == LITTLE_ENDIAN */#include <wchar.h>#include <string.h>#include <stdlib.h>#include <stdio.h>// ---------------------------------------------------------------------------//  Local, const data// ---------------------------------------------------------------------------static const unsigned int    gTempBuffArraySize = 4096;static const XMLCh        gMyServiceId[] ={    chLatin_I, chLatin_C, chLatin_o, chLatin_n, chLatin_v, chNull};// ---------------------------------------------------------------------------//  Local methods// ---------------------------------------------------------------------------static unsigned int getWideCharLength(const XMLCh* const src){    if (!src)        return 0;    unsigned int len = 0;    const XMLCh* pTmp = src;    while (*pTmp++)        len++;    return len;}//----------------------------------------------------------------------------// There is implementation of the libiconv for FreeBSD (available through the// ports collection). The following is a wrapper around the iconv().//----------------------------------------------------------------------------IconvGNUWrapper::IconvGNUWrapper ()    : fUChSize(0), fUBO(LITTLE_ENDIAN),      fCDTo((iconv_t)-1), fCDFrom((iconv_t)-1){}IconvGNUWrapper::IconvGNUWrapper ( iconv_t    cd_from,               iconv_t    cd_to,               size_t    uchsize,               unsigned int    ubo )    : fUChSize(uchsize), fUBO(ubo),      fCDTo(cd_to), fCDFrom(cd_from){    if (fCDFrom == (iconv_t) -1 || fCDTo == (iconv_t) -1) {    XMLPlatformUtils::panic (PanicHandler::Panic_NoTransService);    }}IconvGNUWrapper::~IconvGNUWrapper(){}// Convert "native unicode" character into XMLChvoid    IconvGNUWrapper::mbcToXMLCh (const char *mbc, XMLCh *toRet) const{    if (fUBO == LITTLE_ENDIAN) {        if (fUChSize == sizeof(XMLCh))            *toRet = *((XMLCh*) mbc);        else if (fUChSize == 2) {            WC162XMLCh( mbc, toRet );        } else {            WC322XMLCh( mbc, toRet );        }    } else {        if (fUChSize == 2) {            IWC162XMLCh( mbc, toRet );        } else {            IWC322XMLCh( mbc, toRet );        }    }}// Convert XMLCh into "native unicode" charactervoid    IconvGNUWrapper::xmlChToMbc (XMLCh xch, char *mbc) const{    if (fUBO == LITTLE_ENDIAN) {        if (fUChSize == sizeof(XMLCh)) {            memcpy (mbc, &xch, fUChSize);            return;        }        if (fUChSize == 2) {            XMLCh2WC16( &xch, mbc );        } else {            XMLCh2WC32( &xch, mbc );        }    } else {        if (fUChSize == 2) {            IXMLCh2WC16( &xch, mbc );        } else {            IXMLCh2WC32( &xch, mbc );        }    }}// Return uppercase equivalent for XMLChXMLCh    IconvGNUWrapper::toUpper (const XMLCh ch) const{    if (ch <= 0x7F)        return toupper(ch);    char    wcbuf[fUChSize * 2];    xmlChToMbc (ch, wcbuf);    char    tmpArr[4];    char*    ptr = wcbuf;    size_t    len = fUChSize;    char    *pTmpArr = tmpArr;    size_t    bLen = 2;    ICONV_LOCK;    if (::iconv (fCDTo, &ptr, &len,         &pTmpArr, &bLen) == (size_t) -1)    return 0;    tmpArr[1] = toupper (*((unsigned char *)tmpArr));    *tmpArr = tmpArr[1];    len = 1;    pTmpArr = wcbuf;    bLen = fUChSize;    ptr = tmpArr;    if (::iconv (fCDFrom, &ptr, &len,         &pTmpArr, &bLen) == (size_t) -1)    return 0;    mbcToXMLCh (wcbuf, (XMLCh*) &ch);    return ch;}// Return lowercase equivalent for XMLChXMLCh    IconvGNUWrapper::toLower (const XMLCh ch) const{    if (ch <= 0x7F)        return tolower(ch);    char    wcbuf[fUChSize * 2];    xmlChToMbc (ch, wcbuf);    char    tmpArr[4];    char*    ptr = wcbuf;    size_t    len = fUChSize;    char    *pTmpArr = tmpArr;    size_t    bLen = 2;    ICONV_LOCK;    if (::iconv (fCDTo, &ptr, &len,         &pTmpArr, &bLen) == (size_t) -1)    return 0;    tmpArr[1] = tolower (*((unsigned char*)tmpArr));    *tmpArr = tmpArr[1];    len = 1;    pTmpArr = wcbuf;    bLen = fUChSize;    ptr = tmpArr;    if (::iconv (fCDFrom, &ptr, &len,         &pTmpArr, &bLen) == (size_t) -1)    return 0;    mbcToXMLCh (wcbuf, (XMLCh*) &ch);    return ch;}// Check if passed characters belongs to the :space: classbool    IconvGNUWrapper::isSpace(const XMLCh toCheck) const{    if (toCheck <= 0x7F)        return isspace(toCheck);    char    wcbuf[fUChSize * 2];    char    tmpArr[4];    xmlChToMbc (toCheck, wcbuf);    char*    ptr = wcbuf;    size_t    len = fUChSize;    char    *pTmpArr = tmpArr;    size_t    bLen = 2;    {        ICONV_LOCK;        if (::iconv (fCDTo, &ptr, &len,                 &pTmpArr, &bLen) == (size_t) -1)            return 0;    }    return isspace(*tmpArr);}// Fill array of XMLCh characters with data, supplyed in the array// of "native unicode" characters.XMLCh*    IconvGNUWrapper::mbsToXML(    const char*        mbs_str    ,      size_t    mbs_cnt    ,      XMLCh*    xml_str    ,      size_t    xml_cnt) const{    if (mbs_str == NULL || mbs_cnt == 0 || xml_str == NULL || xml_cnt == 0)        return NULL;    size_t    cnt = (mbs_cnt < xml_cnt) ? mbs_cnt : xml_cnt;    if (fUBO == LITTLE_ENDIAN) {        if (fUChSize == sizeof(XMLCh)) {            // null-transformation            memcpy (xml_str, mbs_str, fUChSize * cnt);            return xml_str;        }        if (fUChSize == 2)            for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {                WC162XMLCh( mbs_str, xml_str + i);            }        else            for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {                WC322XMLCh( mbs_str, xml_str + i );            }    } else {        if (fUChSize == 2)            for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {                IWC162XMLCh( mbs_str, xml_str + i );            }        else            for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {                IWC322XMLCh( mbs_str, xml_str + i );            }    }    return xml_str;}// Fill array of "native unicode" characters with data, supplyed// in the array of XMLCh characters.char*    IconvGNUWrapper::xmlToMbs(    const XMLCh*    xml_str    ,      size_t    xml_cnt    ,      char*        mbs_str    ,      size_t    mbs_cnt) const{    if (mbs_str == NULL || mbs_cnt == 0 || xml_str == NULL || xml_cnt == 0)        return NULL;    size_t    cnt = (mbs_cnt < xml_cnt) ? mbs_cnt : xml_cnt;    char    *toReturn = mbs_str;    if (fUBO == LITTLE_ENDIAN) {        if (fUChSize == sizeof(XMLCh)) {            // null-transformation            memcpy (mbs_str, xml_str, fUChSize * cnt);            return toReturn;        }        if (fUChSize == 2)            for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {                XMLCh2WC16( xml_str, mbs_str );            }        else            for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {                XMLCh2WC32( xml_str, mbs_str );            }    } else {        if (fUChSize == 2)            for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {                IXMLCh2WC16( xml_str, mbs_str );            }        else            for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {                IXMLCh2WC32( xml_str, mbs_str );            }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?