📄 trnscode.c
字号:
case IANA_CHARSET_USASCII:
case IANA_CHARSET_LATIN1:
case IANA_CHARSET_UTF8:
case IANA_CHARSET_UCS2:
return TRUE;
default:
if ((g_pCanConvert != NULL) && ((g_pCanConvert)(iIANAcharset)) )
{
return TRUE;
}
else
{
return FALSE;
}
}
}
/*===========================================
Iana2Unicode_calcLen
---------------------------------------------
Returns the number of characters pbIanaStr contains.
The length of pbIanaStr is returned in the piIanaStrResultBytelLen
parameter.
Parameters:
-----------
pbIanaStr: the string to be transcoded.
iIANAcharset: The MIBenum IANA code that corresponds
to a specific character encoding.
fIsNullTerminated: If TRUE, pbIanaStr's size is not known.
To avoid searching infinitely after a termination,
set iIanaStrByteLen to a nonzero value. This will
then be the upper limit of bytes which will be read
in search of the termination.
iIanaStrByteLen: If fIsNullTerminated == TRUE this is the upper size limit
of pbIanaStr.
If fIsNullTerminated == FALSE this is actual size
(number of bytes) of pbIanaStr.
piIanaStrResultBytelLen: This parameter will after call to the function
contain the size of pbIanaStr (in number of bytes).
This parameter will thus be the same as iIanaStrByteLen
if fIsNullTerminated == FALSE. This parameter should
contain a correct result even if the calcLen function
failed due to incorrect characters in pbIanaStr.
If the byte length could not be decided, 0 should be
returned.
=============================================
Returns:
>= 0: the number of characters in pbIanaStr
-1: something went wrong (the piIanaStrResultBytelLen must still be valid)
============================================*/
INT32 Iana2Unicode_calcLen( BYTE *pbIanaStr, INT16 iIANAcharset, BOOL fIsNullTerminated,
UINT32 iIanaStrByteLen, UINT32 *piIanaStrResultBytelLen )
{
UINT32 len = 0;
BOOL fCompensateForBOM = 0;
UINT32 iIanaStrWchLen = 0;
if (iIANAcharset < 0)
{
return -1;
}
if (pbIanaStr == NULL)
{
*piIanaStrResultBytelLen = 0;
return 0;
}
switch (iIANAcharset)
{
case IANA_CHARSET_INVALID:
return -1;
/************************/
case IANA_CHARSET_USASCII:
case IANA_CHARSET_LATIN1:
if (! fIsNullTerminated)
{
*piIanaStrResultBytelLen = iIanaStrByteLen;
return iIanaStrByteLen;
}
else
{
if (iIanaStrByteLen == 0)
{
/* no upper limit => strlen */
*piIanaStrResultBytelLen = strlen( pbIanaStr );
return *piIanaStrResultBytelLen;
}
else
{
while ((len < iIanaStrByteLen) && (*pbIanaStr != 0))
{
len++;
pbIanaStr++;
}
*piIanaStrResultBytelLen = len;
if (len <= iIanaStrByteLen)
{
return len;
}
else
{
return -1;
}
}
}
case IANA_CHARSET_UCS2:
fCompensateForBOM = ((pbIanaStr[0] == 0xFE) && (pbIanaStr[1] == 0xFF)) || /* Normal BOM */
((pbIanaStr[0] == 0xFF) && (pbIanaStr[1] == 0xFE)); /* Reverse byte order BOM */
/* The Byte Order Mark (BOM) character is a signal character
only and is not a part of the actual string.
Thus we must subtract one from the length to compensate for the BOM. */
if (! fIsNullTerminated)
{
*piIanaStrResultBytelLen = iIanaStrByteLen;
if ((iIanaStrByteLen % 2) == 0)
{
if (fCompensateForBOM)
{
return (iIanaStrByteLen / 2) -1; /* 2 == sizeof(WCHAR) */
}
else
{
return (iIanaStrByteLen / 2); /* 2 == sizeof(WCHAR) */
}
}
else
{
return -1;
}
}
else
{
if (iIanaStrByteLen == 0)
{
/* no upper limit => strlen */
*piIanaStrResultBytelLen = ucs2len( pbIanaStr );
if (fCompensateForBOM)
{
return *piIanaStrResultBytelLen -1;
}
else
{
return *piIanaStrResultBytelLen;
}
}
else
{
iIanaStrWchLen = iIanaStrByteLen / 2;
while ((len < iIanaStrWchLen) && (*pbIanaStr || *(pbIanaStr+1)))
{
len++;
pbIanaStr += 2;
}
*piIanaStrResultBytelLen = len*2;
if (len <= iIanaStrWchLen)
{
if (fCompensateForBOM)
{
return len - 1;
}
else
{
return len;
}
}
else
{
return -1;
}
}
}
case IANA_CHARSET_UTF8:
*piIanaStrResultBytelLen = iIanaStrByteLen;
return UTF8Len( pbIanaStr, fIsNullTerminated, piIanaStrResultBytelLen );
/************************/
default:
if (g_pCalcLen)
{
return (g_pCalcLen)( pbIanaStr, iIANAcharset, fIsNullTerminated, iIanaStrByteLen, piIanaStrResultBytelLen );
}
else
{
return -1;
}
/************************/
}
}
/*===========================================
Iana2Unicode_convert
---------------------------------------------
Converts pbIanaStr into a unicode string.
Parameters:
-----------
pbIanaStr: the string to be transcoded.
iIANAcharset: The MIBenum IANA code that corresponds
to a specific character encoding.
iIanaStrByteLen: The actual size (number of bytes) of pbIanaStr.
pwchResultBuffer: A pointer to the buffer where the result will be placed.
iResultBufferLen: The length (in number of characters) of pwchResultBuffer.
=============================================
Returns:
TRUE: The conversion went ok.
FALSE: Something went wrong.
============================================*/
BOOL Iana2Unicode_convert( BYTE *pbIanaStr, INT16 iIANAcharset, UINT32 iIanaStrByteLen,
WCHAR *pwchResultBuffer, UINT32 iResultBufferLen )
{
UINT32 i = 0;
BYTE *pbSrc = NULL;
WCHAR *pwchDst = NULL;
BOOL fCompensateForBOM = 0;
BOOL fReverseByteOrder = 0;
if (pbIanaStr && iIanaStrByteLen)
{
if (iIANAcharset < 0)
{
return FALSE;
}
switch (iIANAcharset)
{
case IANA_CHARSET_INVALID:
return FALSE;
case IANA_CHARSET_USASCII:
case IANA_CHARSET_LATIN1:
if (iResultBufferLen >= iIanaStrByteLen)
{
for (i=1; i<=iIanaStrByteLen; i++ )
{
*pwchResultBuffer++ = (WCHAR) *pbIanaStr++;
}
return TRUE;
}
else
{
return FALSE;
}
case IANA_CHARSET_UCS2:
fCompensateForBOM = ((pbIanaStr[0] == 0xFE) && (pbIanaStr[1] == 0xFF)) || /* Normal BOM */
((pbIanaStr[0] == 0xFF) && (pbIanaStr[1] == 0xFE)); /* Reverse byte order BOM */
/* The Byte Order Mark (BOM) character is a signal character
only and is not a part of the actual string.
Thus we must subtract one from the length to compensate for the BOM. */
fReverseByteOrder = ( fCompensateForBOM && ((pbIanaStr[0] == 0xFF) && (pbIanaStr[1] == 0xFE)) );
if ( iResultBufferLen >= (fCompensateForBOM?((iIanaStrByteLen/2) -1):(iIanaStrByteLen/2)) )
{
if (fCompensateForBOM)
{ /* BOM character as first char in UCS2 */
if (fReverseByteOrder)
{ /* little endian UCS2 */
newReverseMemcpy( pwchResultBuffer, pbIanaStr+2, iIanaStrByteLen-2 );
}
else
{
newmemcpy( pwchResultBuffer, pbIanaStr+2, iIanaStrByteLen-2 );
}
}
else
{ /* normal big endian UCS2 */
newmemcpy( pwchResultBuffer, pbIanaStr, iIanaStrByteLen );
}
return TRUE;
}
else
{
return FALSE;
}
case IANA_CHARSET_UTF8:
pbSrc = pbIanaStr;
pwchDst = pwchResultBuffer;
return (ok == ConvertUTF8toUTF16( (UTF8**)(&pbSrc), (UTF8*)(pbSrc + iIanaStrByteLen),
(UTF16**)(&pwchDst), (UTF16*)(pwchDst + iResultBufferLen) ) );
default:
if (g_pConvert != NULL)
{
return (g_pConvert)(pbIanaStr, iIANAcharset, iIanaStrByteLen, pwchResultBuffer, iResultBufferLen);
}
else
{
return FALSE;
}
}
}
else
{
return TRUE;
}
}
/*===========================================
Iana2Unicode_getNullTermByteLen
---------------------------------------------
Returns the length (in bytes) that a string terminating character
occupies in a string encoded with iIANAcharset.
Parameters:
-----------
iIANAcharset: The MIBenum IANA code that corresponds
to a specific character encoding.
=============================================
Returns:
0: Uknown IANA code.
>0: The number of bytes that a string terminating character occupies.
============================================*/
UINT8 Iana2Unicode_getNullTermByteLen( INT16 iIANAcharset )
{
switch (iIANAcharset)
{
case IANA_CHARSET_INVALID:
return 0;
case IANA_CHARSET_USASCII:
case IANA_CHARSET_LATIN1:
case IANA_CHARSET_UTF8:
return 1;
case IANA_CHARSET_UCS2:
return 2;
default:
if (g_pNullLen != NULL)
{
return (g_pNullLen)(iIANAcharset);
}
else
{
return 0;
}
}
}
/*===========================================
Iana2Unicode_setFuncPtrs
---------------------------------------------
Provides function pointers to external transcoding functions.
These external functions will be used if the internal
functions can not perform the transcoding.
Parameters:
-----------
pCanConvert: a pointer to a external version of Iana2Unicode_canConvert.
pCalcLen: a pointer to a external version of Iana2Unicode_calcLen.
pConvert: a pointer to a external version of Iana2Unicode_convert.
pNullLen: a pointer to a external version of Iana2Unicode_getNullTermByteLen.
=============================================
Returns:
-
============================================*/
VOID Iana2Unicode_setFuncPtrs( fPtr_Iana2Unicode_canConvert pCanConvert,
fPtr_Iana2Unicode_calcLen pCalcLen,
fPtr_Iana2Unicode_convert pConvert,
fPtr_Iana2Unicode_getNullTermByteLen pNullLen )
{
if (pCanConvert && pCalcLen && pConvert && pNullLen )
{
g_pCanConvert = pCanConvert;
g_pCalcLen = pCalcLen;
g_pConvert = pConvert;
g_pNullLen = pNullLen;
}
}
/*===========================================
Unicode2UTF8_calcLen
---------------------------------------------
Returns the number of bytes it takes to store pwchStr in UTF8 format.
Parameters:
-----------
pwchStr: The unicode string.
nbrOfWChars: The number of unicode characters that should be included
in the calculation.
=============================================
Returns:
The number of bytes it takes to store pwchStr in UTF8 format.
============================================*/
UINT32 Unicode2UTF8_calcLen( WCHAR* pwchStr, UINT32 nbrOfWChars )
{
UINT32 resultByteLen = 0;
UINT32 iwchPos = 0;
if ((pwchStr == NULL) || (nbrOfWChars == 0))
{
return 0;
}
while ( iwchPos < nbrOfWChars )
{
if (pwchStr[ iwchPos ]< 0x80)
{
resultByteLen += 1;
}
else if (pwchStr[iwchPos]< 0x800)
{
resultByteLen += 2;
}
else
{
resultByteLen += 3;
}
iwchPos++;
}
return resultByteLen;
}
/*===========================================
Unicode2UTF8_convert
---------------------------------------------
Encodes pwchSrc (unicode) in a UTF8 string format.
Parameters:
-----------
pwchSrc: the unciode string to be encoded.
iSrcLen: the length (in nbr of characters) of pwchSrc.
pbDst: the destination buffer.
iDstLen: the length (in bytes) of pbDst.
ppbResultDstEnd: if the function succeds, this parameter will hold a pointer
to the first byte after the UTF8 result string in the
destination buffer.
=============================================
Returns:
TRUE: The encoding went ok.
FALSE: Something went wrong.
============================================*/
BOOL Unicode2UTF8_convert( WCHAR* pwchSrc, UINT32 iSrcLen,
BYTE* pbDst, UINT32 iDstLen, BYTE** ppbResultDstEnd )
{
if ( ((iSrcLen != 0) && (pwchSrc == NULL)) ||
((iDstLen != 0) && (pbDst == NULL)) )
{
return FALSE;
}
if (ok == ConvertUTF16toUTF8( &pwchSrc, pwchSrc + iSrcLen,
&pbDst, pbDst + iDstLen))
{
*ppbResultDstEnd = pbDst;
return TRUE;
}
else
{
return FALSE;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -