icutransservice.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,358 行 · 第 1/3 页

CPP
1,358
字号
/* * Copyright 1999-2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: ICUTransService.cpp,v 1.16 2004/09/08 13:56:44 peiyongz Exp $ */// ---------------------------------------------------------------------------//  Includes// ---------------------------------------------------------------------------#include <xercesc/util/Janitor.hpp>#include <xercesc/util/TranscodingException.hpp>#include <xercesc/util/XMLString.hpp>#include <xercesc/util/XMLUniDefs.hpp>#include "ICUTransService.hpp"#include <string.h>#include <unicode/uloc.h>#include <unicode/uchar.h>#include <unicode/ucnv.h>#include <unicode/ucnv_err.h>#include <unicode/ustring.h>#include <unicode/udata.h>#if (U_ICU_VERSION_MAJOR_NUM >= 2)    #include <unicode/uclean.h>#endif#if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)// Forward reference the symbol which points to the ICU converter data.#if (U_ICU_VERSION_MAJOR_NUM < 2)extern "C" const uint8_t U_IMPORT icudata_dat[];#endif#endifXERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------//  Local, const data// ---------------------------------------------------------------------------static const XMLCh gMyServiceId[] ={    chLatin_I, chLatin_C, chLatin_U, chNull};static const XMLCh gS390Id[] ={    chLatin_S, chDigit_3, chDigit_9, chDigit_0, chNull};static const XMLCh gs390Id[] ={    chLatin_s, chDigit_3, chDigit_9, chDigit_0, chNull};static const XMLCh gswaplfnlId[] ={    chComma, chLatin_s, chLatin_w, chLatin_a, chLatin_p,    chLatin_l, chLatin_f, chLatin_n, chLatin_l, chNull};// ---------------------------------------------------------------------------//  Local functions// ---------------------------------------------------------------------------////  When XMLCh and ICU's UChar are not the same size, we have to do a temp//  conversion of all strings. These local helper methods make that easier.//static UChar* convertToUChar( const   XMLCh* const    toConvert                            , const unsigned int    srcLen = 0                            , MemoryManager* const manager = 0){    const unsigned int actualLen = srcLen                                   ? srcLen : XMLString::stringLen(toConvert);    UChar* tmpBuf = (manager)        ? (UChar*) manager->allocate((actualLen + 1) * sizeof(UChar))		: new UChar[actualLen + 1];    const XMLCh* srcPtr = toConvert;    UChar* outPtr = tmpBuf;    while (*srcPtr)        *outPtr++ = UChar(*srcPtr++);    *outPtr = 0;    return tmpBuf;}static XMLCh* convertToXMLCh( const UChar* const toConvert,                            MemoryManager* const manager = 0){    const unsigned int srcLen = u_strlen(toConvert);    XMLCh* retBuf = (manager)        ? (XMLCh*) manager->allocate((srcLen+1) * sizeof(XMLCh))        : new XMLCh[srcLen + 1];    XMLCh* outPtr = retBuf;    const UChar* srcPtr = toConvert;    while (*srcPtr)        *outPtr++ = XMLCh(*srcPtr++);    *outPtr = 0;    return retBuf;}// ---------------------------------------------------------------------------//  ICUTransService: Constructors and Destructor// ---------------------------------------------------------------------------ICUTransService::ICUTransService(){#if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)#if (U_ICU_VERSION_MAJOR_NUM < 2)    // Starting with ICU 2.0, ICU itself includes a static reference to the data    // entrypoint symbol.    //    // ICU 1.8 (and previous) did not include a static reference, but would    // dynamically load the data dll when it was first needed, however this dynamic    // loading proved unreliable in some of the odd environments that Xerces needed    // to run in.  Hence, the static reference.    // Pass the location of the converter data to ICU. By doing so, we are    // forcing the load of ICU converter data DLL, after the Xerces-C DLL is    // loaded. This implies that Xerces-C, now has to explicitly link with the    // ICU converter dll. However, the advantage is that we no longer depend    // on the code which does demand dynamic loading of DLL's. The demand    // loading is highly system dependent and was a constant source of support    // calls.    UErrorCode uerr = U_ZERO_ERROR;    udata_setCommonData((void *) icudata_dat, &uerr);#endif#endif}ICUTransService::~ICUTransService(){    /*     * commented out the following clean up code     * in case users use ICU outside of the parser     * if we clean up here, users' code may crash     *    #if (U_ICU_VERSION_MAJOR_NUM >= 2)        // release all lasily allocated data        u_cleanup();    #endif    */}// ---------------------------------------------------------------------------//  ICUTransService: The virtual transcoding service API// ---------------------------------------------------------------------------int ICUTransService::compareIString(const   XMLCh* const    comp1                                    , const XMLCh* const    comp2){    const XMLCh* psz1 = comp1;    const XMLCh* psz2 = comp2;    unsigned int curCount = 0;    while (true)    {        //        //  If an inequality, then return the difference. Note that the XMLCh        //  might be bigger physically than UChar, but it won't hold anything        //  larger than 0xFFFF, so our cast here will work for both possible        //  sizes of XMLCh.        //        if (u_toupper(UChar(*psz1)) != u_toupper(UChar(*psz2)))            return int(*psz1) - int(*psz2);        // If either has ended, then they both ended, so equal        if (!*psz1 || !*psz2)            break;        // Move upwards for the next round        psz1++;        psz2++;    }    return 0;}int ICUTransService::compareNIString(const  XMLCh* const    comp1                                    , const XMLCh* const    comp2                                    , const unsigned int    maxChars){    const XMLCh* psz1 = comp1;    const XMLCh* psz2 = comp2;    unsigned int curCount = 0;    while (true)    {        //        //  If an inequality, then return the difference. Note that the XMLCh        //  might be bigger physically than UChar, but it won't hold anything        //  larger than 0xFFFF, so our cast here will work for both possible        //  sizes of XMLCh.        //        if (u_toupper(UChar(*psz1)) != u_toupper(UChar(*psz2)))            return int(*psz1) - int(*psz2);        // If either ended, then both ended, so equal        if (!*psz1 || !*psz2)            break;        // Move upwards to next chars        psz1++;        psz2++;        //        //  Bump the count of chars done. If it equals the count then we        //  are equal for the requested count, so break out and return        //  equal.        //        curCount++;        if (maxChars == curCount)            break;    }    return 0;}const XMLCh* ICUTransService::getId() const{    return gMyServiceId;}bool ICUTransService::isSpace(const XMLCh toCheck) const{    //    //  <TBD>    //  For now, we short circuit some of the control chars because ICU    //  is not correctly reporting them as space. Later, when they change    //  this, we can get rid of this special case.    //    if ((toCheck == 0x09)    ||  (toCheck == 0x0A)    ||  (toCheck == 0x0D))    {        return true;    }    return (u_isspace(UChar(toCheck)) != 0);}XMLLCPTranscoder* ICUTransService::makeNewLCPTranscoder(){    //    //  Try to create a default converter. If it fails, return a null    //  pointer which will basically cause the system to give up because    //  we really can't do anything without one.    //    UErrorCode uerr = U_ZERO_ERROR;    UConverter* converter = ucnv_open(NULL, &uerr);    if (!converter)        return 0;    // That went ok, so create an ICU LCP transcoder wrapper and return it    return new ICULCPTranscoder(converter);}bool ICUTransService::supportsSrcOfs() const{    // This implementation supports source offset information    return true;}void ICUTransService::upperCase(XMLCh* const toUpperCase) const{    XMLCh* outPtr = toUpperCase;    while (*outPtr)    {        *outPtr = XMLCh(u_toupper(UChar(*outPtr)));        outPtr++;    }}void ICUTransService::lowerCase(XMLCh* const toLowerCase) const{    XMLCh* outPtr = toLowerCase;    while (*outPtr)    {        *outPtr = XMLCh(u_tolower(UChar(*outPtr)));        outPtr++;    }}// ---------------------------------------------------------------------------//  ICUTransService: The protected virtual transcoding service API// ---------------------------------------------------------------------------XMLTranscoder* ICUTransService::makeNewXMLTranscoder(const  XMLCh* const            encodingName                    ,       XMLTransService::Codes& resValue                    , const unsigned int            blockSize                    ,       MemoryManager* const    manager){    //      //  For encodings that end with "s390" we need to strip off the "s390"     //  from the encoding name and add ",swaplfnl" to the encoding name	    //  that we pass into ICU on the ucnv_openU.      //      XMLCh* encodingNameToUse = (XMLCh*) encodingName;    XMLCh* workBuffer = 0;    if ( (XMLString::endsWith(encodingNameToUse, gs390Id)) ||         (XMLString::endsWith(encodingNameToUse, gS390Id)) )    {       int workBufferSize = (XMLString::stringLen(encodingNameToUse) + XMLString::stringLen(gswaplfnlId) - XMLString::stringLen(gS390Id) + 1);       workBuffer = (XMLCh*) manager->allocate(workBufferSize * sizeof(XMLCh));       int moveSize = XMLString::stringLen(encodingNameToUse) - XMLString::stringLen(gS390Id);       XMLString::moveChars(workBuffer, encodingNameToUse, moveSize);       XMLString::moveChars((workBuffer + moveSize), gswaplfnlId, XMLString::stringLen(gswaplfnlId));       encodingNameToUse = workBuffer;    }    //    //  If UChar and XMLCh are not the same size, then we have premassage the    //  encoding name into a UChar type string.    //    const UChar* actualName;    UChar* tmpName = 0;    if (sizeof(UChar) == sizeof(XMLCh))    {        actualName = (const UChar*)encodingNameToUse;    }    else    {        tmpName = convertToUChar(encodingNameToUse, 0, manager);        actualName = tmpName;    }    ArrayJanitor<UChar> janTmp(tmpName, manager);    ArrayJanitor<XMLCh> janTmp1(workBuffer, manager);    UErrorCode uerr = U_ZERO_ERROR;    UConverter* converter = ucnv_openU(actualName, &uerr);    if (!converter)    {        resValue = XMLTransService::UnsupportedEncoding;        return 0;    }    return new (manager) ICUTranscoder(encodingName, converter, blockSize, manager);}// ---------------------------------------------------------------------------//  ICUTranscoder: Constructors and Destructor// ---------------------------------------------------------------------------ICUTranscoder::ICUTranscoder(const  XMLCh* const        encodingName                            ,       UConverter* const   toAdopt                            , const unsigned int        blockSize                            , MemoryManager* const      manager) :    XMLTranscoder(encodingName, blockSize, manager)    , fConverter(toAdopt)    , fFixed(false)    , fSrcOffsets(0){    // If there is a block size, then allocate our source offset array    if (blockSize)        fSrcOffsets = (XMLUInt32*) manager->allocate        (            blockSize * sizeof(XMLUInt32)        );//new XMLUInt32[blockSize];    // Remember if its a fixed size encoding    fFixed = (ucnv_getMaxCharSize(fConverter) == ucnv_getMinCharSize(fConverter));}ICUTranscoder::~ICUTranscoder(){    getMemoryManager()->deallocate(fSrcOffsets);//delete [] fSrcOffsets;    // If there is a converter, ask ICU to clean it up    if (fConverter)    {        // <TBD> Does this actually delete the structure???        ucnv_close(fConverter);        fConverter = 0;    }}// ---------------------------------------------------------------------------//  ICUTranscoder: The virtual transcoder API// ---------------------------------------------------------------------------unsigned intICUTranscoder::transcodeFrom(const  XMLByte* const          srcData                            , const unsigned int            srcCount                            ,       XMLCh* const            toFill                            , const unsigned int            maxChars                            ,       unsigned int&           bytesEaten                            ,       unsigned char* const    charSizes){    // If debugging, insure the block size is legal    #if defined(XERCES_DEBUG)    checkBlockSize(maxChars);    #endif    // Set up pointers to the start and end of the source buffer    const XMLByte*  startSrc = srcData;    const XMLByte*  endSrc = srcData + srcCount;    //    //  And now do the target buffer. This works differently according to    //  whether XMLCh and UChar are the same size or not.    //    UChar* startTarget;    if (sizeof(XMLCh) == sizeof(UChar))        startTarget = (UChar*)toFill;     else        startTarget = (UChar*) getMemoryManager()->allocate        (            maxChars * sizeof(UChar)        );//new UChar[maxChars];    UChar* orgTarget = startTarget;    //    //  Transoode the buffer.  Buffer overflow errors are normal, occuring    //  when the raw input buffer holds more characters than will fit in    //  the Unicode output buffer.    //    UErrorCode  err = U_ZERO_ERROR;    ucnv_toUnicode    (        fConverter        , &startTarget        , startTarget + maxChars        , (const char**)&startSrc        , (const char*)endSrc        , (fFixed ? 0 : (int32_t*)fSrcOffsets)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?