icutransservice.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,358 行 · 第 1/3 页

CPP
1,358
字号
        , false        , &err    );    if ((err != U_ZERO_ERROR) && (err != U_BUFFER_OVERFLOW_ERROR))    {        if (orgTarget != (UChar*)toFill)            getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;        if (fFixed)        {            XMLCh tmpBuf[17];            XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16, getMemoryManager());            ThrowXMLwithMemMgr2            (                TranscodingException                , XMLExcepts::Trans_BadSrcCP                , tmpBuf                , getEncodingName()                , getMemoryManager()            );        }        else        {            ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());        }    }    // Calculate the bytes eaten and store in caller's param    bytesEaten = startSrc - srcData;    // And the characters decoded    const unsigned int charsDecoded = startTarget - orgTarget;    //    //  Translate the array of char offsets into an array of character    //  sizes, which is what the transcoder interface semantics requires.    //  If its fixed, then we can optimize it.    //    if (fFixed)    {        const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter);        memset(charSizes, fillSize, maxChars);    }     else    {        //        //  We have to convert the series of offsets into a series of        //  sizes. If just one char was decoded, then its the total bytes        //  eaten. Otherwise, do a loop and subtract out each element from        //  its previous element.        //        if (charsDecoded == 1)        {            charSizes[0] = (unsigned char)bytesEaten;        }         else        {            //  ICU does not return an extra element to allow us to figure            //  out the last char size, so we have to compute it from the            //  total bytes used.            unsigned int index;            for (index = 0; index < charsDecoded - 1; index++)            {                charSizes[index] = (unsigned char)(fSrcOffsets[index + 1]                                                    - fSrcOffsets[index]);            }            if( charsDecoded > 0 ) {                charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten                                              - fSrcOffsets[charsDecoded - 1]);            }        }    }    //    //  If XMLCh and UChar are not the same size, then we need to copy over    //  the temp buffer to the new one.    //    if (sizeof(UChar) != sizeof(XMLCh))    {        XMLCh* outPtr = toFill;        startTarget = orgTarget;        for (unsigned int index = 0; index < charsDecoded; index++)            *outPtr++ = XMLCh(*startTarget++);        // And delete the temp buffer        getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;    }    // Return the chars we put into the target buffer    return charsDecoded;}unsigned intICUTranscoder::transcodeTo( const   XMLCh* const    srcData                            , const unsigned int    srcCount                            ,       XMLByte* const  toFill                            , const unsigned int    maxBytes                            ,       unsigned int&   charsEaten                            , const UnRepOpts       options){    //    //  Get a pointer to the buffer to transcode. If UChar and XMLCh are    //  the same size here, then use the original. Else, create a temp    //  one and put a janitor on it.    //    const UChar* srcPtr;    UChar* tmpBufPtr = 0;    if (sizeof(XMLCh) == sizeof(UChar))    {        srcPtr = (const UChar*)srcData;    }    else    {        tmpBufPtr = convertToUChar(srcData, srcCount, getMemoryManager());        srcPtr = tmpBufPtr;    }    ArrayJanitor<UChar> janTmpBuf(tmpBufPtr, getMemoryManager());    //    //  Set the appropriate callback so that it will either fail or use    //  the rep char. Remember the old one so we can put it back.    //    UErrorCode  err = U_ZERO_ERROR;    UConverterFromUCallback oldCB = NULL;    #if (U_ICU_VERSION_MAJOR_NUM < 2)    void* orgContent;    #else    const void* orgContent;    #endif    ucnv_setFromUCallBack    (        fConverter        , (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP                                   : UCNV_FROM_U_CALLBACK_SUBSTITUTE        , NULL        , &oldCB        , &orgContent        , &err    );    //    //  Ok, lets transcode as many chars as we we can in one shot. The    //  ICU API gives enough info not to have to do this one char by char.    //    XMLByte*        startTarget = toFill;    const UChar*    startSrc = srcPtr;    err = U_ZERO_ERROR;    ucnv_fromUnicode    (        fConverter        , (char**)&startTarget        , (char*)(startTarget + maxBytes)        , &startSrc        , srcPtr + srcCount        , 0        , false        , &err    );    // Rememember the status before we possibly overite the error code    const bool res = (err == U_ZERO_ERROR);    // Put the old handler back    err = U_ZERO_ERROR;    UConverterFromUCallback orgAction = NULL;    ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);    if (!res)    {        XMLCh tmpBuf[17];        XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16, getMemoryManager());        ThrowXMLwithMemMgr2        (            TranscodingException            , XMLExcepts::Trans_Unrepresentable            , tmpBuf            , getEncodingName()            , getMemoryManager()        );    }    // Fill in the chars we ate from the input    charsEaten = startSrc - srcPtr;    // Return the chars we stored    return startTarget - toFill;}bool ICUTranscoder::canTranscodeTo(const unsigned int toCheck) const{    //    //  If the passed value is really a surrogate embedded together, then    //  we need to break it out into its two chars. Else just one. While    //  we are ate it, convert them to UChar format if required.    //    UChar           srcBuf[2];    unsigned int    srcCount = 1;    if (toCheck & 0xFFFF0000)    {        srcBuf[0] = UChar((toCheck >> 10) + 0xD800);        srcBuf[1] = UChar(toCheck & 0x3FF) + 0xDC00;        srcCount++;    }     else    {        srcBuf[0] = UChar(toCheck);    }    //    //  Set the callback so that it will fail instead of using the rep char.    //  Remember the old one so we can put it back.    //     UErrorCode  err = U_ZERO_ERROR;     UConverterFromUCallback oldCB = NULL;     #if (U_ICU_VERSION_MAJOR_NUM < 2)     void* orgContent;     #else     const void* orgContent;     #endif     ucnv_setFromUCallBack         (         fConverter         , UCNV_FROM_U_CALLBACK_STOP         , NULL         , &oldCB         , &orgContent         , &err         );    // Set upa temp buffer to format into. Make it more than big enough    char            tmpBuf[64];    char*           startTarget = tmpBuf;    const UChar*    startSrc = srcBuf;    err = U_ZERO_ERROR;    ucnv_fromUnicode    (        fConverter        , &startTarget        , startTarget + 64        , &startSrc        , srcBuf + srcCount        , 0        , false        , &err    );    // Save the result before we overight the error code    const bool res = (err == U_ZERO_ERROR);    // Put the old handler back    err = U_ZERO_ERROR;    UConverterFromUCallback orgAction = NULL;    ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);    return res;}// ---------------------------------------------------------------------------//  ICULCPTranscoder: Constructors and Destructor// ---------------------------------------------------------------------------ICULCPTranscoder::ICULCPTranscoder(UConverter* const toAdopt) :    fConverter(toAdopt){}ICULCPTranscoder::~ICULCPTranscoder(){    // If there is a converter, ask ICU to clean it up    if (fConverter)    {        // <TBD> Does this actually delete the structure???        ucnv_close(fConverter);        fConverter = 0;    }}// ---------------------------------------------------------------------------//  ICULCPTranscoder: Constructors and Destructor// ---------------------------------------------------------------------------unsigned int ICULCPTranscoder::calcRequiredSize(const XMLCh* const srcText                                                , MemoryManager* const manager){    if (!srcText)        return 0;    //    //  We do two different versions of this, according to whether XMLCh    //  is the same size as UChar or not.    //    UErrorCode err = U_ZERO_ERROR;    int32_t targetCap;    if (sizeof(XMLCh) == sizeof(UChar))    {        // Use a faux scope to synchronize while we do this        {            XMLMutexLock lockConverter(&fMutex);            targetCap = ucnv_fromUChars            (                fConverter                , 0                , 0                , (const UChar*)srcText                , -1                , &err            );        }    }    else    {        // Copy the source to a local temp        UChar* tmpBuf = convertToUChar(srcText, 0, manager);        ArrayJanitor<UChar> janTmp(tmpBuf, manager);        // Use a faux scope to synchronize while we do this        {            XMLMutexLock lockConverter(&fMutex);            targetCap = ucnv_fromUChars            (                fConverter                , 0                , 0                , tmpBuf                , -1                , &err            );        }    }    if (err != U_BUFFER_OVERFLOW_ERROR)        return 0;    return (unsigned int)targetCap;}unsigned int ICULCPTranscoder::calcRequiredSize(const char* const srcText                                                , MemoryManager* const manager){    if (!srcText)        return 0;    int32_t targetCap;    UErrorCode err = U_ZERO_ERROR;    // Use a faux scope to synchronize while we do this    {        XMLMutexLock lockConverter(&fMutex);        targetCap = ucnv_toUChars        (            fConverter            , 0            , 0            , srcText            , strlen(srcText)            , &err        );    }    if (err != U_BUFFER_OVERFLOW_ERROR)        return 0;#if (U_ICU_VERSION_MAJOR_NUM < 2)    // Subtract one since it includes the terminator space    return (unsigned int)(targetCap - 1);#else    // Starting ICU 2.0, this is fixed and all ICU String functions have consistent NUL-termination behavior.    // The returned length is always the number of output UChar's, not counting an additional, terminating NUL.    return (unsigned int)(targetCap);#endif}char* ICULCPTranscoder::transcode(const XMLCh* const toTranscode){    char* retBuf = 0;    // Check for a couple of special cases    if (!toTranscode)        return retBuf;    if (!*toTranscode)    {        retBuf = new char[1];        retBuf[0] = 0;        return retBuf;    }    //    //  Get the length of the source string since we'll have to use it in    //  a couple places below.    //    const unsigned int srcLen = XMLString::stringLen(toTranscode);    //    //  If XMLCh and UChar are not the same size, then we have to make a    //  temp copy of the text to pass to ICU.    //    const UChar* actualSrc;    UChar* ncActual = 0;    if (sizeof(XMLCh) == sizeof(UChar))    {        actualSrc = (const UChar*)toTranscode;    }     else    {        // Allocate a non-const temp buf, but store it also in the actual        ncActual = convertToUChar(toTranscode, 0, XMLPlatformUtils::fgMemoryManager);        actualSrc = ncActual;    }    // Insure that the temp buffer, if any, gets cleaned up via the nc pointer    ArrayJanitor<UChar> janTmp(ncActual, XMLPlatformUtils::fgMemoryManager);    // Caculate a return buffer size not too big, but less likely to overflow    int32_t targetLen = (int32_t)(srcLen * 1.25);    // Allocate the return buffer    retBuf = new char[targetLen + 1];    //    //  Lock now while we call the converter. Use a faux block to do the    //  lock so that it unlocks immediately afterwards.    //    UErrorCode err = U_ZERO_ERROR;    int32_t targetCap;    {        XMLMutexLock lockConverter(&fMutex);        targetCap = ucnv_fromUChars        (            fConverter            , retBuf            , targetLen + 1            , actualSrc            , -1            , &err        );    }    // If targetLen is not enough then buffer overflow might occur    if ((err == U_BUFFER_OVERFLOW_ERROR) || (err == U_STRING_NOT_TERMINATED_WARNING))

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?