iconv_cnv.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 753 行 · 第 1/2 页
CPP
753 行
} /*Rigs targetCapacity to have at least one cell for zero termination */ /*Updates targetCapacity to contain the number of bytes written to target */ targetCapacity = 1; targetCapacity += myTarget - target; if (targetSize == 0) { *err = U_BUFFER_OVERFLOW_ERROR; } /* If the output buffer is exhausted, we need to stop writing * to it but if the input buffer is not exhausted, * we need to continue the conversion in order to store in targetSize * the number of bytes that was required */ if (*err == U_BUFFER_OVERFLOW_ERROR) { UChar target2[CHUNK_SIZE]; UChar *target2_alias = target2; const UChar *target2_limit = target2 + CHUNK_SIZE; /*We use a stack allocated buffer around which we loop (in case the output is greater than CHUNK_SIZE) */ while (*err == U_BUFFER_OVERFLOW_ERROR) { *err = U_ZERO_ERROR; target2_alias = target2; ucnv_toUnicode (&myConverter, &target2_alias, target2_limit, &mySource, mySource_limit, NULL, TRUE, err); /*updates the output parameter to contain the number of char required */ targetCapacity += target2_alias - target2 + 1; } (targetCapacity)--; /*adjust for last one */ if (U_SUCCESS (*err)) *err = U_BUFFER_OVERFLOW_ERROR; } return targetCapacity;}UChar ucnv_getNextUChar (UConverter * converter, const char **source, const char *sourceLimit, UErrorCode * err){ /*calls the specific conversion routines */ /*as dictated in a code review, avoids a switch statement */ return getNextUChar(converter,source,sourceLimit,err);}/*************************** Will convert a sequence of bytes from one codepage to another.* @param toConverterName: The name of the converter that will be used to encode the output buffer* @param fromConverterName: The name of the converter that will be used to decode the input buffer* @param target: Pointer to the output buffer* written* @param targetLength: on input contains the capacity of target, on output the number of bytes copied to target* @param source: Pointer to the input buffer* @param sourceLength: on input contains the capacity of source, on output the number of bytes processed in "source"* @param internal: used internally to store store state data across calls* @param err: fills in an error status*/voidT_UConverter_fromCodepageToCodepage (UConverter * outConverter, UConverter * inConverter, char **target, const char *targetLimit, const char **source, const char *sourceLimit, int32_t* offsets, int flush, UErrorCode * err){ UChar out_chunk[CHUNK_SIZE]; const UChar *out_chunk_limit = out_chunk + CHUNK_SIZE; UChar *out_chunk_alias; UChar const *out_chunk_alias2; if (U_FAILURE (*err)) return; /*loops until the input buffer is completely consumed *or if an error has be encountered *first we convert from inConverter codepage to Unicode *then from Unicode to outConverter codepage */ while ((*source != sourceLimit) && U_SUCCESS (*err)) { out_chunk_alias = out_chunk; ucnv_toUnicode (inConverter, &out_chunk_alias, out_chunk_limit, source, sourceLimit, NULL, flush, err); /*BUFFER_OVERFLOW_ERROR means that the output "CHUNK" is full *we will require at least another loop (it's a recoverable error) */ if (U_SUCCESS (*err) || (*err == U_BUFFER_OVERFLOW_ERROR)) { *err = U_ZERO_ERROR; out_chunk_alias2 = out_chunk; while ((out_chunk_alias2 != out_chunk_alias) && U_SUCCESS (*err)) { ucnv_fromUnicode (outConverter, target, targetLimit, &out_chunk_alias2, out_chunk_alias, NULL, TRUE, err); } } else break; } return;}int32_t ucnv_convert(const char *toConverterName, const char *fromConverterName, char *target, int32_t targetSize, const char *source, int32_t sourceSize, UErrorCode * err){ const char *mySource = source; const char *mySource_limit = source + sourceSize; int32_t mySourceLength = 0; UConverter *inConverter; UConverter *outConverter; char *myTarget = target; int32_t targetCapacity = 0; if (U_FAILURE (*err)) return 0; if ((targetSize < 0) || (sourceSize < 0)) { *err = U_ILLEGAL_ARGUMENT_ERROR; return 0; } /*if there is no input data, we're done */ if (sourceSize == 0) { /*in case the caller passed an output ptr *we update it */ return 0; } /*create the converters */ inConverter = ucnv_open (fromConverterName, err); if (U_FAILURE (*err)) return 0; outConverter = ucnv_open (toConverterName, err); if (U_FAILURE (*err)) { ucnv_close (inConverter); return 0; } if (targetSize > 0) { T_UConverter_fromCodepageToCodepage (outConverter, inConverter, &myTarget, target + targetSize, &mySource, mySource_limit, NULL, TRUE, err); } /*Updates targetCapacity to contain the number of bytes written to target */ targetCapacity = myTarget - target; if (targetSize == 0) { *err = U_BUFFER_OVERFLOW_ERROR; } /* If the output buffer is exhausted, we need to stop writing * to it but continue the conversion in order to store in targetSize * the number of bytes that was required*/ if (*err == U_BUFFER_OVERFLOW_ERROR) { char target2[CHUNK_SIZE]; char *target2_alias = target2; const char *target2_limit = target2 + CHUNK_SIZE; /*We use a stack allocated buffer around which we loop *(in case the output is greater than CHUNK_SIZE) */ while (*err == U_BUFFER_OVERFLOW_ERROR) { *err = U_ZERO_ERROR; target2_alias = target2; T_UConverter_fromCodepageToCodepage (outConverter, inConverter, &target2_alias, target2_limit, &mySource, mySource_limit, NULL, TRUE, err); /*updates the output parameter to contain the number of char required */ targetCapacity += (target2_alias - target2) + 1; } /*We will set the erro code to BUFFER_OVERFLOW_ERROR only if *nothing graver happened in the previous loop*/ (targetCapacity)--; if (U_SUCCESS (*err)) *err = U_BUFFER_OVERFLOW_ERROR; } ucnv_close (inConverter); ucnv_close (outConverter); return targetCapacity;}void Converter_fromUnicode(UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, int flush, UErrorCode * err){ int chardone; const UChar *mySource = *source; unsigned char *myTarget = (unsigned char *) *target; int32_t targetLength = targetLimit - (char *) myTarget; int32_t sourceLength = (sourceLimit - mySource) * 2; unsigned char targetChar = 0x00; /* pick up the iconv handle and perform the conversion */ errno = 0; chardone =iconv(_this->sharedData->fromiconv_handle,(char**)source, (size_t*) &sourceLength,target,(size_t *)&targetLength); if (errno!=0) if (errno == E2BIG) { *err = U_BUFFER_OVERFLOW_ERROR; return; } else if ((errno ==EBADDATA)|| (errno ==ECONVERT)) { char errno_id[7]; send_message(NULL,ICONV_CONVERT_PROBLEM,'d'); convert_errno(errno_id,errno); send_message(NULL,errno_id,'d'); *err = U_INVALID_CHAR_FOUND; return; } return; }void Convert_toUnicode(UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, int flush, UErrorCode * err){ char *mySource = (char *) *source; UChar *myTarget = *target; int32_t targetLength = (targetLimit - myTarget)*2; /* multiply by 2 */ int32_t sourceLength = (sourceLimit - (char *) mySource); int chardone; /* pick up the iconv handle */ errno = 0; chardone =iconv(_this->sharedData->toiconv_handle,(char**)source, (size_t*) &sourceLength,(char **)target,(size_t *)&targetLength); if (errno!=0) { if (errno == E2BIG) { *err = U_BUFFER_OVERFLOW_ERROR; return; } else if ((errno ==EBADDATA)|| (errno ==ECONVERT)) { char errno_id[7]; send_message(NULL,ICONV_CONVERT_PROBLEM,'d'); convert_errno(errno_id,errno); send_message(NULL,errno_id,'d'); *err = U_INVALID_CHAR_FOUND; return; }} return;}UChar getNextUChar(UConverter* converter, const char** source, const char* sourceLimit, UErrorCode* err){ UChar myUChar; UChar* myUCharptr; size_t numberibytes=sizeof(UChar); size_t numberobytes=sizeof(UChar); int chardone; if ((*source)+1 > sourceLimit) { *err = U_INDEX_OUTOFBOUNDS_ERROR; return 0xFFFD; } /*pick up the iconv handle */ /* convert the requested character - need to cache characters 6 will do - XMLReader is using this function to get header to process*/ myUCharptr = &myUChar; chardone =iconv(converter->sharedData->toiconv_handle,(char**)source, (size_t*) &numberibytes,(char **)&myUCharptr,(size_t *)&numberobytes); if (myUChar != 0xFFFD) return myUChar; else { UChar* myUCharPtr = &myUChar; const char* sourceFinal = *source; *err = U_INVALID_CHAR_FOUND; /*makes the internal caching transparent to the user*/ if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; return myUChar; }}XERCES_CPP_NAMESPACE_END
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?