iconv400transservice.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,042 行 · 第 1/2 页
CPP
1,042 行
/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//** * @01A D998714.1 V5R2M0 100301 Swan :Fix error return flags * @02A V5R2M0 200419 jrhansen : support lowercase function * $Id: Iconv400TransService.cpp,v 1.13 2004/09/08 13:56:45 peiyongz Exp $ */// ---------------------------------------------------------------------------// Includes// ---------------------------------------------------------------------------#include <xercesc/util/TranscodingException.hpp>#include "Iconv400TransService.hpp"#include <string.h>#include <qlgcase.h>#include "iconv_cnv.hpp"#include "iconv_util.hpp"#include <qusec.h>#include <xercesc/util/XMLUniDefs.hpp>#include <xercesc/util/XMLString.hpp>#include <xercesc/util/Janitor.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------// Local functions// ---------------------------------------------------------------------------//// When XMLCh and ICU's UChar are not the same size, we have to do a temp// conversion of all strings. These local helper methods make that easier.//static UChar* convertToUChar( const XMLCh* const toConvert , const unsigned int srcLen = 0 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager){ const unsigned int actualLen = srcLen ? srcLen : XMLString::stringLen(toConvert); UChar* tmpBuf = (UChar*) manager->allocate ( (srcLen + 1) * sizeof(UChar) );//new UChar[srcLen + 1]; const XMLCh* srcPtr = toConvert; UChar* outPtr = tmpBuf; while (*srcPtr) *outPtr++ = UChar(*srcPtr++); *outPtr = 0; return tmpBuf;}// ---------------------------------------------------------------------------// Local, const data// ---------------------------------------------------------------------------static const XMLCh gMyServiceId[] ={ chLatin_I, chLatin_C, chLatin_O, chLatin_V, chDigit_4, chDigit_0, chDigit_0, chNull};// ---------------------------------------------------------------------------// IconvTransService: Constructors and Destructor// ---------------------------------------------------------------------------Iconv400TransService::Iconv400TransService(){ memset((char*)&convertCtlblkUpper,'\0',sizeof(convertCtlblkUpper)); convertCtlblkUpper.Type_of_Request = 1; convertCtlblkUpper.Case_Request = 0; // upper case convertCtlblkUpper.CCSID_of_Input_Data = 13488; memset((char*)&convertCtlblkLower,'\0',sizeof(convertCtlblkLower)); convertCtlblkLower.Type_of_Request = 1; convertCtlblkLower.Case_Request = 1; convertCtlblkLower.CCSID_of_Input_Data = 13488;}Iconv400TransService::~Iconv400TransService(){}// ---------------------------------------------------------------------------// Iconv400TransService: The virtual transcoding service API// ---------------------------------------------------------------------------int Iconv400TransService::compareIString(const XMLCh* const comp1 , const XMLCh* const comp2){ const XMLCh* psz1 = comp1; const XMLCh* psz2 = comp2; while (true) { if (toUnicodeUpper(*psz1) != toUnicodeUpper(*psz2)) return int(*psz1) - int(*psz2); // If either has ended, then they both ended, so equal if (!*psz1 || !*psz2) break; // Move upwards for the next round psz1++; psz2++; } return 0;}int Iconv400TransService::compareNIString(const XMLCh* const comp1 , const XMLCh* const comp2 , const unsigned int maxChars){ const XMLCh* psz1 = comp1; const XMLCh* psz2 = comp2; unsigned int curCount = 0; while (true) { // If an inequality, then return the difference // If an inequality, then return difference if (toUnicodeUpper(*psz1) != toUnicodeUpper(*psz2)) return int(*psz1) - int(*psz2); // If either ended, then both ended, so equal if (!*psz1 || !*psz2) break; // Move upwards to next chars psz1++; psz2++; // // Bump the count of chars done. If it equals the count then we // are equal for the requested count, so break out and return // equal. // curCount++; if (maxChars == curCount) break; } return 0;}const XMLCh* Iconv400TransService::getId() const{ return gMyServiceId;}bool Iconv400TransService::isSpace(const XMLCh toCheck) const{ // The following are Unicode Space characters // if ((toCheck == 0x09) || (toCheck == 0x0A) || (toCheck == 0x0D) || (toCheck == 0x20) || (toCheck == 0xA0) || ((toCheck >= 0x2000) && (toCheck <= 0x200B)) || (toCheck == 0x3000) || (toCheck == 0xFEFF)) { return true; } else return false;}XMLLCPTranscoder* Iconv400TransService::makeNewLCPTranscoder(){ // // Try to create a default converter. If it fails, return a null pointer // which will basically cause the system to give up because we really can't // do anything without one. // UErrorCode uerr = U_ZERO_ERROR; UConverter* converter = ucnv_open(NULL, &uerr); if (!converter) return 0; // That went ok, so create an Iconv LCP transcoder wrapper and return it return new Iconv400LCPTranscoder(converter);}bool Iconv400TransService::supportsSrcOfs() const{ // This implementation supports source offset information return true;}void Iconv400TransService::upperCase(XMLCh* const toUpperCase) const{ XMLCh* outPtr = toUpperCase; while (*outPtr) { *outPtr = toUnicodeUpper(*outPtr); outPtr++; }}void Iconv400TransService::lowerCase(XMLCh* const toLowerCase) const{ XMLCh* outPtr = toLowerCase; while (*outPtr) { *outPtr = toUnicodeLower(*outPtr); outPtr++; }}// ---------------------------------------------------------------------------// Iconv400TransService: The virtual transcoding service API// ---------------------------------------------------------------------------XMLCh Iconv400TransService::toUnicodeUpper(XMLCh comp1) const{ XMLCh chRet; struct { int bytes_available; int bytes_used; char exception_id[7]; char reserved; char exception_data[15]; } error_code; error_code.bytes_available = sizeof(error_code); long charlen =2; QlgConvertCase((char*)&convertCtlblkUpper, (char*)&comp1, (char*)&chRet, (long*)&charlen, (char*)&error_code); return chRet;}XMLCh Iconv400TransService::toUnicodeLower(XMLCh comp1) const{ XMLCh chRet; struct { int bytes_available; int bytes_used; char exception_id[7]; char reserved; char exception_data[15]; } error_code; error_code.bytes_available = sizeof(error_code); long charlen =2; QlgConvertCase((char*)&convertCtlblkLower, (char*)&comp1, (char*)&chRet, (long*)&charlen, (char*)&error_code); return chRet;}// ---------------------------------------------------------------------------// Iconv400TransService: The protected virtual transcoding service API// ---------------------------------------------------------------------------XMLTranscoder*Iconv400TransService::makeNewXMLTranscoder( const XMLCh* const encodingName , XMLTransService::Codes& resValue , const unsigned int blockSize , MemoryManager* const manager){ UErrorCode uerr = U_ZERO_ERROR; UConverter* converter = ucnv_openU(encodingName, &uerr); if (!converter) { resValue = XMLTransService::UnsupportedEncoding; return 0; } return new (manager) Iconv400Transcoder(encodingName, converter, blockSize, manager);}// ---------------------------------------------------------------------------// IconvTranscoder: Constructors and Destructor// ---------------------------------------------------------------------------Iconv400Transcoder::Iconv400Transcoder( const XMLCh* const encodingName , UConverter* const toAdopt , const unsigned int blockSize , MemoryManager* const manager) : XMLTranscoder(encodingName, blockSize, manager) , fConverter(toAdopt) , fFixed(false) , fSrcOffsets(0){ // If there is a block size, then allocate our source offset array if (blockSize) fSrcOffsets = (long*) manager->allocate(blockSize * sizeof(long));//new long[blockSize]; // Remember if its a fixed size encoding fFixed = (ucnv_getMaxCharSize(fConverter) == ucnv_getMinCharSize(fConverter));}Iconv400Transcoder::~Iconv400Transcoder(){ getMemoryManager()->deallocate(fSrcOffsets);//delete [] fSrcOffsets; // If there is a converter, ask Iconv400 to clean it up if (fConverter) { // <TBD> Does this actually delete the structure??? ucnv_close(fConverter); fConverter = 0; }}// ---------------------------------------------------------------------------// Iconv400Transcoder: The virtual transcoder API// ---------------------------------------------------------------------------unsigned intIconv400Transcoder::transcodeFrom(const XMLByte* const srcData , const unsigned int srcCount , XMLCh* const toFill , const unsigned int maxChars , unsigned int& bytesEaten , unsigned char* const charSizes){ // If debugging, insure the block size is legal // Set up pointers to the start and end of the source buffer const XMLByte* startSrc = srcData; const XMLByte* endSrc = srcData + srcCount; // // And now do the target buffer. This works differently according to // whether XMLCh and UChar are the same size or not. // UChar* startTarget; if (sizeof(XMLCh) == sizeof(UChar)) startTarget = (UChar*)toFill; else startTarget = (UChar*) getMemoryManager()->allocate ( maxChars * sizeof(UChar) );//new UChar[maxChars]; UChar* orgTarget = startTarget; // // Transoode the buffer. Buffer overflow errors are normal, occuring // when the raw input buffer holds more characters than will fit in // the Unicode output buffer. // UErrorCode err = U_ZERO_ERROR; ucnv_toUnicode ( fConverter , &startTarget , startTarget + maxChars , (const char**)&startSrc , (const char*)endSrc , (fFixed ? 0 : (int32_t*)fSrcOffsets) , false , &err ); if ((err != U_ZERO_ERROR) && (err != U_INDEX_OUTOFBOUNDS_ERROR)) { if (orgTarget != (UChar*)toFill) getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget; if (fFixed) { XMLCh tmpBuf[17]; XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16, getMemoryManager()); ThrowXMLwithMemMgr2 ( TranscodingException , XMLExcepts::Trans_BadSrcCP , tmpBuf , getEncodingName() , getMemoryManager() ); } else { ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager()); } } // Calculate the bytes eaten and store in caller's param bytesEaten = startSrc - srcData; // And the characters decoded const unsigned int charsDecoded = startTarget - orgTarget; // // Translate the array of char offsets into an array of character // sizes, which is what the transcoder interface semantics requires. // If its fixed, then we can optimize it. // if (fFixed) { const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter); memset(charSizes, fillSize, maxChars); } else { // // We have to convert the series of offsets into a series of // sizes. If just one char was decoded, then its the total bytes // eaten. Otherwise, do a loop and subtract out each element from // its previous element. // if (charsDecoded == 1) { charSizes[0] = (unsigned char)bytesEaten; } else { // ICU does not return an extra element to allow us to figure // out the last char size, so we have to compute it from the // total bytes used. unsigned int index; for (index = 0; index < charsDecoded - 1; index++) { charSizes[index] = (unsigned char)(fSrcOffsets[index + 1] - fSrcOffsets[index]); } if( charsDecoded > 0 ) { charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten - fSrcOffsets[charsDecoded - 1]); } } } // // If XMLCh and UChar are not the same size, then we need to copy over // the temp buffer to the new one. // if (sizeof(UChar) != sizeof(XMLCh)) { XMLCh* outPtr = toFill; startTarget = orgTarget; for (unsigned int index = 0; index < charsDecoded; index++) *outPtr++ = XMLCh(*startTarget++); // And delete the temp buffer getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget; } // Return the chars we put into the target buffer return charsDecoded;}unsigned intIconv400Transcoder::transcodeTo( const XMLCh* const srcData , const unsigned int srcCount , XMLByte* const toFill , const unsigned int maxBytes , unsigned int& charsEaten , const UnRepOpts options){ // // Get a pointer to the buffer to transcode. If UChar and XMLCh are // the same size here, then use the original. Else, create a temp // one and put a janitor on it. // const UChar* srcPtr; UChar* tmpBufPtr = 0; if (sizeof(XMLCh) == sizeof(UChar)) { srcPtr = (const UChar*)srcData; } else { tmpBufPtr = convertToUChar(srcData, srcCount, getMemoryManager()); srcPtr = tmpBufPtr; } ArrayJanitor<UChar> janTmpBuf(tmpBufPtr, getMemoryManager()); // // Set the appropriate callback so that it will either fail or use // the rep char. Remember the old one so we can put it back. // UErrorCode err = U_ZERO_ERROR; // // Ok, lets transcode as many chars as we we can in one shot. The // ICU API gives enough info not to have to do this one char by char. // XMLByte* startTarget = toFill; const UChar* startSrc = srcPtr; err = U_ZERO_ERROR; ucnv_fromUnicode ( fConverter , (char**)&startTarget , (char*)(startTarget + maxBytes) , &startSrc , srcPtr + srcCount , 0
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?