iconvfbsdtransservice.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,778 行 · 第 1/4 页

CPP
1,778
字号
/* * Copyright 2001,2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Log: IconvFBSDTransService.cpp,v $ * Revision 1.17  2004/09/08 13:56:45  peiyongz * Apache License Version 2.0 * * Revision 1.16  2004/07/23 16:05:50  amassari * transcode was badly terminating the converted string (jira#1179) * * Revision 1.15  2003/12/24 15:24:15  cargilld * More updates to memory management so that the static memory manager. * * Revision 1.14  2003/05/17 16:32:17  knoaman * Memory manager implementation : transcoder update. * * Revision 1.13  2003/05/16 21:37:00  knoaman * Memory manager implementation: Modify constructors to pass in the memory manager. * * Revision 1.12  2003/05/15 18:47:04  knoaman * Partial implementation of the configurable memory manager. * * Revision 1.11  2003/03/09 17:02:57  peiyongz * PanicHandler * * Revision 1.10  2003/03/07 15:08:57  tng * [Bug 17571] fix building IconvFBSD (namespaces) .  Patch from Bjoern A. Zeeb. * * Revision 1.9  2003/03/07 14:42:45  tng * [Bug 17570] IconvFBSD build on alpha,sparc.  Patch from Bjoern A. Zeeb. * * Revision 1.8  2003/02/25 08:15:42  gareth * Patch to fix compile problem in bug #17358. Patch by Michael Cahill. * * Revision 1.7  2002/12/31 18:42:43  tng * [Bug 15608] IconvLCPTranscoder::transcode() is wrong at wcstombs() usage. * * Revision 1.6  2002/11/04 15:14:34  tng * C++ Namespace Support. * * Revision 1.5  2002/07/04 18:20:18  tng * [Bug 10253] Bugfix for the IconvFBSD transcoder.   Patch from Max Gotlib. * * Revision 1.4  2002/04/11 15:38:05  knoaman * String lower case support for FreeBSD by Max Gotlib * * Revision 1.3  2002/04/09 15:44:00  knoaman * Add lower case string support. * * Revision 1.2  2002/03/18 13:39:11  tng * [Bug 7162 ] IconvFreeBSDTransService.cpp needs an #include statement fixed to use xercesc. * * Revision 1.1.1.1  2002/02/01 22:22:36  peiyongz * sane_include * * Revision 1.4  2002/01/14 19:45:15  tng * Support IconvFBSD in multi-threading environment with all the possible combinations of threading and transcoding options.  By Max Gotlib. * * Revision 1.3  2001/12/12 14:48:16  tng * More bug fixes in IconvFBSD Transcoder.  By Max Gotlib. * * Revision 1.2  2001/12/11 15:10:14  tng * More changes to IconvFBSDTransService.  Allow using "old" TransServece implementation (via '-t native' option to runConfigure) or * to employ libiconv (it is a part of FreeBSD ports-collection) services.  By Max Gotlib. * * Revision 1.1  2001/12/03 14:45:11  tng * FreeBSD native transcoder (IconvFBSD) added by Max Gotlib. * */// ---------------------------------------------------------------------------//  Includes// ---------------------------------------------------------------------------#include <ctype.h>#ifdef XML_USE_LIBICONV# include <locale.h># include <iconv.h># include <errno.h># include <machine/endian.h>// ---------------------------------------------------------------------------// Description of encoding schemas, supported by iconv()// ---------------------------------------------------------------------------typedef struct __IconvFBSDEncoding {    const char*    fSchema;    // schema name    size_t    fUChSize;    // size of the character    unsigned int fUBO;        // byte order, relative to the host} IconvFBSDEncoding;static const IconvFBSDEncoding    gIconvFBSDEncodings[] = {    { "ucs-2-internal",        2,    LITTLE_ENDIAN },    { "ucs2-internal",        2,    LITTLE_ENDIAN },    { "ucs-4-internal",        4,    LITTLE_ENDIAN },    { "ucs4-internal",        4,    LITTLE_ENDIAN },    { "UNICODELITTLE",        2,    LITTLE_ENDIAN },    { "UNICODEBIG",        2,    BIG_ENDIAN },    { "iso-10646-ucs-2",    4,    BIG_ENDIAN },    { "iso-10646-ucs-4",    4,    BIG_ENDIAN },    /* { "iso-10646-utf-16",    2,    BIG_ENDIAN }, */    { NULL, 0,     0 }};//--------------------------------------------------// Macro-definitions to translate "native unicode"// characters <-> XMLCh with different host byte order// and encoding schemas.# if BYTE_ORDER == LITTLE_ENDIAN#  define IXMLCh2WC16(x,w)            \    *(w) = ((*(x)) >> 8) & 0xFF;        \    *((w)+1) = (*(x)) & 0xFF#  define IWC162XMLCh(w,x)    *(x) = ((*(w)) << 8) | (*((w)+1))#  define XMLCh2WC16(x,w)            \    *(w) = (*(x)) & 0xFF;            \    *((w)+1) = ((*(x)) >> 8) & 0xFF#  define WC162XMLCh(w,x)    *(x) = ((*((w)+1)) << 8) | (*(w))#  define IXMLCh2WC32(x,w)            \    *(w) = ((*(x)) >> 24) & 0xFF;        \    *((w)+1) = ((*(x)) >> 16) & 0xFF;    \    *((w)+2) = ((*(x)) >> 8) & 0xFF;    \    *((w)+3) = (*(x)) & 0xFF#  define IWC322XMLCh(w,x)                \      *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) |    \          ((*((w)+2)) << 8) | (*((w)+3))#  define XMLCh2WC32(x,w)            \    *((w)+3) = ((*(x)) >> 24) & 0xFF;    \    *((w)+2) = ((*(x)) >> 16) & 0xFF;    \    *((w)+1) = ((*(x)) >> 8) & 0xFF;    \    *(w) = (*(x)) & 0xFF#  define WC322XMLCh(w,x)                    \      *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) |    \        ((*((w)+1)) << 8) | (*(w))# else /* BYTE_ORDER != LITTLE_ENDIAN */#  define XMLCh2WC16(x,w)            \    *(w) = ((*(x)) >> 8) & 0xFF;        \    *((w)+1) = (*(x)) & 0xFF#  define WC162XMLCh(w,x)    *(x) = ((*(w)) << 8) | (*((w)+1))#  define IXMLCh2WC16(x,w)            \    *(w) = (*(x)) & 0xFF;            \    *((w)+1) = ((*(x)) >> 8) & 0xFF#  define IWC162XMLCh(w,x)    *(x) = ((*((w)+1)) << 8) | (*(w))#  define XMLCh2WC32(x,w)            \    *(w) = ((*(x)) >> 24) & 0xFF;        \    *((w)+1) = ((*(x)) >> 16) & 0xFF;    \    *((w)+2) = ((*(x)) >> 8) & 0xFF;    \    *((w)+3) = (*(x)) & 0xFF#  define WC322XMLCh(w,x)                \      *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) |    \          ((*((w)+2)) << 8) | (*((w)+3))#  define IXMLCh2WC32(x,w)            \    *((w)+3) = ((*(x)) >> 24) & 0xFF;    \    *((w)+2) = ((*(x)) >> 16) & 0xFF;    \    *((w)+1) = ((*(x)) >> 8) & 0xFF;    \    *(w) = (*(x)) & 0xFF#  define IWC322XMLCh(w,x)                    \      *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) |    \        ((*((w)+1)) << 8) | (*(w))# endif /* BYTE_ORDER == LITTLE_ENDIAN */#else /* !XML_USE_LIBICONV */# if __FreeBSD_cc_version > 430000#  include <wchar.h># else#  define wint_t XMLCh# endif#endif /* XML_USE_LIBICONV */#include <xercesc/util/XMLString.hpp>#include <xercesc/util/XMLUniDefs.hpp>#include <xercesc/util/XMLUni.hpp>#include <xercesc/util/PlatformUtils.hpp>#include <xercesc/util/TranscodingException.hpp>#include "IconvFBSDTransService.hpp"#include <string.h>#include <stdlib.h>#include <stdio.h>#if !defined(APP_NO_THREADS)#include <xercesc/util/Mutexes.hpp>#endifXERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------//  Local, const data// ---------------------------------------------------------------------------static const unsigned int    gTempBuffArraySize = 4096;static const XMLCh          gMyServiceId[] ={    chLatin_I, chLatin_C, chLatin_o, chLatin_n, chLatin_v, chNull};// ---------------------------------------------------------------------------//  Local methods// ---------------------------------------------------------------------------static unsigned int getWideCharLength(const XMLCh* const src){    if (!src)        return 0;    unsigned int len = 0;    const XMLCh* pTmp = src;    while (*pTmp++)        len++;    return len;}#ifndef XML_USE_LIBICONV// ---------------------------------------------------------------------------// FreeBSD got the wide-characters support since 4.0 version. But (at least// up to the 4.4) this support differs from "others" in that the xxstoyys()// does not handle the NULL-dest argument properly. So the custom functions// are provided.// ---------------------------------------------------------------------------#define __TMP_ARRAY_SIZE__      4static size_t fbsd_wcstombs(char *dest, const wchar_t *src, size_t n){    char        tarr[ __TMP_ARRAY_SIZE__ + 1 ];    size_t      len = 0, lent = 0;    char*       ptr;    size_t      slen;    wchar_t*    wptr;    if (dest)        return ::wcstombs(dest, src, n);    if (!src)        return 0;    for (wptr = (wchar_t *) src, slen = 0; *wptr; wptr++, slen++);    if (slen == 0)        return 0;    wptr = (wchar_t *) src;    ptr = dest;    while ( (len = ::wcstombs(tarr, wptr, __TMP_ARRAY_SIZE__)) > 0 ) {        wptr += len;        lent += len;    }    if (len == (unsigned) -1)        return 0;    return lent;}static size_t fbsd_mbstowcs(wchar_t *dest, const char *src, size_t n){    wchar_t     tarr[ __TMP_ARRAY_SIZE__ + 1 ];    size_t      len = 0, lent = 0;    char*       ptr;    if (dest)        return ::mbstowcs(dest, src, n);    ptr = (char*) src;    if (!src || strlen(src) == 0)        return 0;    while ( (len = ::mbstowcs(tarr, ptr, __TMP_ARRAY_SIZE__)) > 0 ) {        ptr += len;        lent += len;    }    if (len == (unsigned) -1)        return 0;    return lent;}static wint_t fbsd_towupper(wint_t ch){    if (ch <= 0x7F)    return toupper(ch);    unsigned char    buf[16];    wchar_t    wc = wchar_t(ch);    wcstombs((char*)buf, &wc, 16);    return toupper(*buf);}static wint_t fbsd_towlower(wint_t ch){    if (ch <= 0x7F)    return tolower(ch);    unsigned char    buf[16];    wchar_t    wc = wchar_t(ch);    wcstombs((char *)buf, &wc, 16);    return tolower(*buf);}#else /* XML_USE_LIBICONV */#if !defined(APP_NO_THREADS)// Iconv() access syncronization pointstatic XMLMutex    *gIconvMutex = NULL;#  define ICONV_LOCK    XMLMutexLock lockConverter(gIconvMutex);#else /* APP_NO_THREADS */# define ICONV_LOCK#endif /* !APP_NO_THREADS *///----------------------------------------------------------------------------// There is implementation of the libiconv for FreeBSD (available through the// ports collection). The following is a wrapper around the iconv().//----------------------------------------------------------------------------IconvFBSDCD::IconvFBSDCD ()    : fUChSize(0), fUBO(LITTLE_ENDIAN),      fCDTo((iconv_t)-1), fCDFrom((iconv_t)-1){}IconvFBSDCD::IconvFBSDCD ( iconv_t    cd_from,               iconv_t    cd_to,               size_t    uchsize,               unsigned int    ubo )    : fUChSize(uchsize), fUBO(ubo),      fCDTo(cd_to), fCDFrom(cd_from){    if (fCDFrom == (iconv_t) -1 || fCDTo == (iconv_t) -1) {    XMLPlatformUtils::panic (PanicHandler::Panic_NoTransService);    }}IconvFBSDCD::~IconvFBSDCD(){}// Convert "native unicode" character into XMLChvoid    IconvFBSDCD::mbcToXMLCh (const char *mbc, XMLCh *toRet) const{    if (fUBO == LITTLE_ENDIAN) {        if (fUChSize == sizeof(XMLCh))            *toRet = *((XMLCh*) mbc);        else if (fUChSize == 2) {            WC162XMLCh( mbc, toRet );        } else {            WC322XMLCh( mbc, toRet );        }    } else {        if (fUChSize == 2) {            IWC162XMLCh( mbc, toRet );        } else {            IWC322XMLCh( mbc, toRet );        }    }}// Convert XMLCh into "native unicode" charactervoid    IconvFBSDCD::xmlChToMbc (XMLCh xch, char *mbc) const{    if (fUBO == LITTLE_ENDIAN) {        if (fUChSize == sizeof(XMLCh)) {            memcpy (mbc, &xch, fUChSize);            return;        }        if (fUChSize == 2) {            XMLCh2WC16( &xch, mbc );        } else {            XMLCh2WC32( &xch, mbc );        }    } else {        if (fUChSize == 2) {            IXMLCh2WC16( &xch, mbc );        } else {            IXMLCh2WC32( &xch, mbc );        }    }}// Return uppercase equivalent for XMLChXMLCh    IconvFBSDCD::toUpper (const XMLCh ch) const{    if (ch <= 0x7F)        return toupper(ch);    char    wcbuf[fUChSize * 2];    xmlChToMbc (ch, wcbuf);    char    tmpArr[4];    char*    ptr = wcbuf;    size_t    len = fUChSize;    char    *pTmpArr = tmpArr;    size_t    bLen = 2;    ICONV_LOCK;    if (::iconv (fCDTo, (const char**) &ptr, &len,         &pTmpArr, &bLen) == (size_t) -1)    return 0;    tmpArr[1] = toupper (*((unsigned char *)tmpArr));    *tmpArr = tmpArr[1];    len = 1;    pTmpArr = wcbuf;    bLen = fUChSize;    ptr = tmpArr;    if (::iconv (fCDFrom, (const char **)&ptr, &len,         &pTmpArr, &bLen) == (size_t) -1)    return 0;    mbcToXMLCh (wcbuf, (XMLCh*) &ch);    return ch;}// Return lowercase equivalent for XMLChXMLCh    IconvFBSDCD::toLower (const XMLCh ch) const{    if (ch <= 0x7F)        return tolower(ch);    char    wcbuf[fUChSize * 2];    xmlChToMbc (ch, wcbuf);    char    tmpArr[4];    char*    ptr = wcbuf;    size_t    len = fUChSize;    char    *pTmpArr = tmpArr;    size_t    bLen = 2;    ICONV_LOCK;    if (::iconv (fCDTo, (const char**) &ptr, &len,         &pTmpArr, &bLen) == (size_t) -1)    return 0;    tmpArr[1] = tolower (*((unsigned char*)tmpArr));    *tmpArr = tmpArr[1];    len = 1;    pTmpArr = wcbuf;    bLen = fUChSize;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?