iconv400transservice.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,042 行 · 第 1/2 页

CPP
1,042
字号
/* * Copyright 1999-2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//** * @01A D998714.1 V5R2M0    100301   Swan    :Fix error return flags * @02A           V5R2M0    200419   jrhansen : support lowercase function * $Id: Iconv400TransService.cpp,v 1.13 2004/09/08 13:56:45 peiyongz Exp $ */// ---------------------------------------------------------------------------//  Includes// ---------------------------------------------------------------------------#include <xercesc/util/TranscodingException.hpp>#include "Iconv400TransService.hpp"#include <string.h>#include <qlgcase.h>#include "iconv_cnv.hpp"#include "iconv_util.hpp"#include <qusec.h>#include <xercesc/util/XMLUniDefs.hpp>#include <xercesc/util/XMLString.hpp>#include <xercesc/util/Janitor.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------//  Local functions// ---------------------------------------------------------------------------////  When XMLCh and ICU's UChar are not the same size, we have to do a temp//  conversion of all strings. These local helper methods make that easier.//static UChar* convertToUChar( const   XMLCh* const toConvert                            , const unsigned int   srcLen = 0                            , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager){    const unsigned int actualLen = srcLen                                   ? srcLen : XMLString::stringLen(toConvert);    UChar* tmpBuf = (UChar*) manager->allocate    (        (srcLen + 1) * sizeof(UChar)    );//new UChar[srcLen + 1];    const XMLCh* srcPtr = toConvert;    UChar* outPtr = tmpBuf;    while (*srcPtr)        *outPtr++ = UChar(*srcPtr++);    *outPtr = 0;    return tmpBuf;}// ---------------------------------------------------------------------------//  Local, const data// ---------------------------------------------------------------------------static const XMLCh gMyServiceId[] ={    chLatin_I, chLatin_C, chLatin_O, chLatin_V, chDigit_4, chDigit_0, chDigit_0, chNull};// ---------------------------------------------------------------------------//  IconvTransService: Constructors and Destructor// ---------------------------------------------------------------------------Iconv400TransService::Iconv400TransService(){    memset((char*)&convertCtlblkUpper,'\0',sizeof(convertCtlblkUpper));    convertCtlblkUpper.Type_of_Request = 1;    convertCtlblkUpper.Case_Request = 0;   // upper case    convertCtlblkUpper.CCSID_of_Input_Data = 13488;    memset((char*)&convertCtlblkLower,'\0',sizeof(convertCtlblkLower));    convertCtlblkLower.Type_of_Request = 1;    convertCtlblkLower.Case_Request = 1;    convertCtlblkLower.CCSID_of_Input_Data = 13488;}Iconv400TransService::~Iconv400TransService(){}// ---------------------------------------------------------------------------//  Iconv400TransService: The virtual transcoding service API// ---------------------------------------------------------------------------int Iconv400TransService::compareIString(const   XMLCh* const    comp1                                         , const XMLCh* const    comp2){    const XMLCh* psz1 = comp1;    const XMLCh* psz2 = comp2;    while (true)    {        if (toUnicodeUpper(*psz1) != toUnicodeUpper(*psz2))            return int(*psz1) - int(*psz2);        // If either has ended, then they both ended, so equal        if (!*psz1 || !*psz2)            break;        // Move upwards for the next round        psz1++;        psz2++;    }    return 0;}int Iconv400TransService::compareNIString(const  XMLCh* const    comp1                                          , const XMLCh* const    comp2                                          , const unsigned int    maxChars){    const XMLCh* psz1 = comp1;    const XMLCh* psz2 = comp2;    unsigned int curCount = 0;    while (true)    {        // If an inequality, then return the difference        // If an inequality, then return difference        if (toUnicodeUpper(*psz1) != toUnicodeUpper(*psz2))            return int(*psz1) - int(*psz2);        // If either ended, then both ended, so equal        if (!*psz1 || !*psz2)            break;        // Move upwards to next chars        psz1++;        psz2++;        //        //  Bump the count of chars done. If it equals the count then we        //  are equal for the requested count, so break out and return        //  equal.        //        curCount++;        if (maxChars == curCount)            break;    }    return 0;}const XMLCh* Iconv400TransService::getId() const{    return gMyServiceId;}bool Iconv400TransService::isSpace(const XMLCh toCheck) const{    //   The following are Unicode Space characters    //    if ((toCheck == 0x09)    ||  (toCheck == 0x0A)    ||  (toCheck == 0x0D)    ||  (toCheck == 0x20)    ||  (toCheck == 0xA0)    ||  ((toCheck >= 0x2000) && (toCheck <= 0x200B))    ||  (toCheck == 0x3000)    ||  (toCheck == 0xFEFF))    {        return true;    }    else return false;}XMLLCPTranscoder* Iconv400TransService::makeNewLCPTranscoder(){    //    //  Try to create a default converter. If it fails, return a null pointer    //  which will basically cause the system to give up because we really can't    //  do anything without one.    //    UErrorCode uerr = U_ZERO_ERROR;    UConverter* converter = ucnv_open(NULL, &uerr);    if (!converter)        return 0;    // That went ok, so create an Iconv LCP transcoder wrapper and return it    return new Iconv400LCPTranscoder(converter);}bool Iconv400TransService::supportsSrcOfs() const{    // This implementation supports source offset information    return true;}void Iconv400TransService::upperCase(XMLCh* const toUpperCase) const{    XMLCh* outPtr = toUpperCase;    while (*outPtr)    {        *outPtr = toUnicodeUpper(*outPtr);        outPtr++;    }}void Iconv400TransService::lowerCase(XMLCh* const toLowerCase) const{    XMLCh* outPtr = toLowerCase;    while (*outPtr)    {        *outPtr = toUnicodeLower(*outPtr);        outPtr++;    }}// ---------------------------------------------------------------------------//  Iconv400TransService: The virtual transcoding service API// ---------------------------------------------------------------------------XMLCh Iconv400TransService::toUnicodeUpper(XMLCh comp1) const{    XMLCh chRet;    struct {             int bytes_available;             int bytes_used;             char exception_id[7];             char reserved;             char exception_data[15];            } error_code;     error_code.bytes_available = sizeof(error_code);    long charlen =2;    QlgConvertCase((char*)&convertCtlblkUpper,                       (char*)&comp1,                       (char*)&chRet,                       (long*)&charlen,                       (char*)&error_code);    return chRet;}XMLCh Iconv400TransService::toUnicodeLower(XMLCh comp1) const{    XMLCh chRet;    struct {             int bytes_available;             int bytes_used;             char exception_id[7];             char reserved;             char exception_data[15];            } error_code;     error_code.bytes_available = sizeof(error_code);    long charlen =2;    QlgConvertCase((char*)&convertCtlblkLower,                       (char*)&comp1,                       (char*)&chRet,                       (long*)&charlen,                       (char*)&error_code);    return chRet;}// ---------------------------------------------------------------------------//  Iconv400TransService: The protected virtual transcoding service API// ---------------------------------------------------------------------------XMLTranscoder*Iconv400TransService::makeNewXMLTranscoder(  const   XMLCh* const            encodingName                                        ,       XMLTransService::Codes& resValue                                        , const unsigned int            blockSize                                        ,       MemoryManager* const    manager){    UErrorCode uerr = U_ZERO_ERROR;    UConverter* converter = ucnv_openU(encodingName, &uerr);    if (!converter)    {        resValue = XMLTransService::UnsupportedEncoding;        return 0;    }    return new (manager) Iconv400Transcoder(encodingName, converter, blockSize, manager);}// ---------------------------------------------------------------------------//  IconvTranscoder: Constructors and Destructor// ---------------------------------------------------------------------------Iconv400Transcoder::Iconv400Transcoder( const XMLCh* const         encodingName                                      ,       UConverter* const    toAdopt                                      , const unsigned int         blockSize                                      ,       MemoryManager* const manager) :    XMLTranscoder(encodingName, blockSize, manager)    , fConverter(toAdopt)    , fFixed(false)    , fSrcOffsets(0){    // If there is a block size, then allocate our source offset array    if (blockSize)        fSrcOffsets = (long*) manager->allocate(blockSize * sizeof(long));//new long[blockSize];    // Remember if its a fixed size encoding    fFixed = (ucnv_getMaxCharSize(fConverter) == ucnv_getMinCharSize(fConverter));}Iconv400Transcoder::~Iconv400Transcoder(){    getMemoryManager()->deallocate(fSrcOffsets);//delete [] fSrcOffsets;    // If there is a converter, ask Iconv400 to clean it up    if (fConverter)    {        // <TBD> Does this actually delete the structure???        ucnv_close(fConverter);        fConverter = 0;    }}// ---------------------------------------------------------------------------//  Iconv400Transcoder: The virtual transcoder API// ---------------------------------------------------------------------------unsigned intIconv400Transcoder::transcodeFrom(const  XMLByte* const          srcData                            , const unsigned int            srcCount                            ,       XMLCh* const            toFill                            , const unsigned int            maxChars                            ,       unsigned int&           bytesEaten                            ,       unsigned char* const    charSizes){    // If debugging, insure the block size is legal    // Set up pointers to the start and end of the source buffer    const XMLByte*  startSrc = srcData;    const XMLByte*  endSrc = srcData + srcCount;    //    //  And now do the target buffer. This works differently according to    //  whether XMLCh and UChar are the same size or not.    //    UChar* startTarget;    if (sizeof(XMLCh) == sizeof(UChar))        startTarget = (UChar*)toFill;     else        startTarget = (UChar*) getMemoryManager()->allocate        (            maxChars * sizeof(UChar)        );//new UChar[maxChars];    UChar* orgTarget = startTarget;    //    //  Transoode the buffer.  Buffer overflow errors are normal, occuring    //  when the raw input buffer holds more characters than will fit in    //  the Unicode output buffer.    //    UErrorCode  err = U_ZERO_ERROR;    ucnv_toUnicode    (        fConverter        , &startTarget        , startTarget + maxChars        , (const char**)&startSrc        , (const char*)endSrc        , (fFixed ? 0 : (int32_t*)fSrcOffsets)        , false        , &err    );    if ((err != U_ZERO_ERROR) && (err != U_INDEX_OUTOFBOUNDS_ERROR))    {        if (orgTarget != (UChar*)toFill)            getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;        if (fFixed)        {            XMLCh tmpBuf[17];            XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16, getMemoryManager());            ThrowXMLwithMemMgr2            (                TranscodingException                , XMLExcepts::Trans_BadSrcCP                , tmpBuf                , getEncodingName()                , getMemoryManager()            );        }         else        {            ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());        }    }    // Calculate the bytes eaten and store in caller's param    bytesEaten = startSrc - srcData;    // And the characters decoded    const unsigned int charsDecoded = startTarget - orgTarget;    //    //  Translate the array of char offsets into an array of character    //  sizes, which is what the transcoder interface semantics requires.    //  If its fixed, then we can optimize it.    //    if (fFixed)    {        const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter);        memset(charSizes, fillSize, maxChars);    }     else    {        //        //  We have to convert the series of offsets into a series of        //  sizes. If just one char was decoded, then its the total bytes        //  eaten. Otherwise, do a loop and subtract out each element from        //  its previous element.        //        if (charsDecoded == 1)        {            charSizes[0] = (unsigned char)bytesEaten;        }         else        {            //  ICU does not return an extra element to allow us to figure            //  out the last char size, so we have to compute it from the            //  total bytes used.            unsigned int index;            for (index = 0; index < charsDecoded - 1; index++)            {                charSizes[index] = (unsigned char)(fSrcOffsets[index + 1]                                                    - fSrcOffsets[index]);            }            if( charsDecoded > 0 ) {                charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten                                              - fSrcOffsets[charsDecoded - 1]);            }        }    }    //    //  If XMLCh and UChar are not the same size, then we need to copy over    //  the temp buffer to the new one.    //    if (sizeof(UChar) != sizeof(XMLCh))    {        XMLCh* outPtr = toFill;        startTarget = orgTarget;        for (unsigned int index = 0; index < charsDecoded; index++)            *outPtr++ = XMLCh(*startTarget++);        // And delete the temp buffer        getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;    }    // Return the chars we put into the target buffer    return charsDecoded;}unsigned intIconv400Transcoder::transcodeTo( const   XMLCh* const    srcData                            , const unsigned int    srcCount                            ,       XMLByte* const  toFill                            , const unsigned int    maxBytes                            ,       unsigned int&   charsEaten                            , const UnRepOpts       options){    //    //  Get a pointer to the buffer to transcode. If UChar and XMLCh are    //  the same size here, then use the original. Else, create a temp    //  one and put a janitor on it.    //    const UChar* srcPtr;    UChar* tmpBufPtr = 0;    if (sizeof(XMLCh) == sizeof(UChar))    {        srcPtr = (const UChar*)srcData;    }     else    {        tmpBufPtr = convertToUChar(srcData, srcCount, getMemoryManager());        srcPtr = tmpBufPtr;    }    ArrayJanitor<UChar> janTmpBuf(tmpBufPtr, getMemoryManager());    //    //  Set the appropriate callback so that it will either fail or use    //  the rep char. Remember the old one so we can put it back.    //    UErrorCode  err = U_ZERO_ERROR;    //    //  Ok, lets transcode as many chars as we we can in one shot. The    //  ICU API gives enough info not to have to do this one char by char.    //    XMLByte*        startTarget = toFill;    const UChar*    startSrc = srcPtr;    err = U_ZERO_ERROR;    ucnv_fromUnicode    (        fConverter        , (char**)&startTarget        , (char*)(startTarget + maxBytes)        , &startSrc        , srcPtr + srcCount        , 0

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?