📄 plx_charset.c
字号:
}
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> lpGB2312Str = %p; chInGB2312 = %d; lpUTF8CharStr = %p; bufLenUTF8 = %d",
lpGB2312Str, chInGB2312, lpUTF8CharStr, bufLenUTF8);
#endif
if (chInGB2312 > 0)
{
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> source buffer length (chInGB2312) > 0 ...");
#endif
srcLen = plx_strlen(lpGB2312Str);
if (srcLen <= 0 )
{
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> source string's length (srcLen) < 0 ...");
#endif
return 0;
}
if (srcLen > chInGB2312)
{
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> srcLen > chInGB2312 ...");
#endif
srcBuf = plx_malloc(chInGB2312 + 1);
if (NULL == srcBuf)
{
return 0;
}
plx_memcpy(srcBuf, lpGB2312Str, chInGB2312);
srcBuf[chInGB2312] = 0;
}
else
{
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> srcLen <= chInGB2312 ...");
#endif
srcBuf = (char *)lpGB2312Str;
}
}
else
{
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> srource length (chInGB2312) <= 0 ...");
#endif
srcBuf = (char *)lpGB2312Str;
}
tmpBuf = plx_malloc(srcLen * 2 + 2);
if (NULL == tmpBuf)
{
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> tmpBuf malloc failed, return 0 ...");
#endif
if (srcLen > chInGB2312 && chInGB2312 > 0)
{
plx_free(srcBuf);
}
return 0;
}
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> from gb to ucs2, working ...");
WAP_DataOut((const unsigned char *)srcBuf, (unsigned short)srcLen, (const unsigned char *)"GB source content");
#endif
ret = mmi_chset_convert(MMI_CHSET_GB2312, MMI_CHSET_UCS2, srcBuf, tmpBuf, srcLen * 2 + 2);
if (ret <= 0)
{
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> srcLen > gb to ucs2, ret <= 0 ...");
#endif
if (srcLen > chInGB2312 && chInGB2312 > 0)
{
plx_free(srcBuf);
}
plx_free(tmpBuf);
return ret;
}
if (NULL == lpUTF8CharStr || bufLenUTF8 <= 0)
{
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> destination is null, caculate the converion length ...");
#endif
dstLen = srcLen * 3 + 1;
dstBuf = plx_malloc(dstLen);
if (NULL == dstBuf)
{
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> srcLen > destination buffer malloc failed, return 0 ...");
#endif
if (srcLen > chInGB2312 && chInGB2312 > 0)
{
plx_free(srcBuf);
}
return 0;
}
}
else
{
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> destination is valid ...");
#endif
dstLen = bufLenUTF8;
dstBuf = lpUTF8CharStr;
}
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> from ucs2 to utf8, working ...");
WAP_DataOut((const unsigned char *)tmpBuf, (unsigned short)(srcLen * 2 + 2), (const unsigned char *)"UCS2 tmpBuf content");
#endif
ret = mmi_chset_convert(MMI_CHSET_UCS2, MMI_CHSET_UTF8, tmpBuf, dstBuf, dstLen);
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> conversion result: ret = %d ...", ret);
WAP_DataOut((const unsigned char *)dstBuf, (unsigned short)dstLen, (const unsigned char *)"UTF8 result content");
#endif
if (srcLen > chInGB2312 && chInGB2312 > 0)
{
plx_free(srcBuf);
}
if (NULL == lpUTF8CharStr || bufLenUTF8 <= 0)
{
plx_free(dstBuf);
}
plx_free(tmpBuf);
#ifdef PLX_CHSET_DEBUG
PlxTrace("<PlxGB2312ToUTF8> finished ...");
#endif
return ret;
}
/****************************************************************************
* Function PlxUTF8ToGB2312
* Purpose convert utf8 to gb2312
* Params
* Return converted length
* Remarks
\****************************************************************************/
int PlxUTF8ToGB2312(const char */*IN*/lpUTF8CharStr, char */*OUT*/lpGB2312Str, int /*IN*/bufLenGB2312)
{
char *tmpBuf;
int ret;
char *dstBuf;
int dstLen;
if (NULL == lpUTF8CharStr)
{
return 0;
}
tmpBuf = plx_malloc(bufLenGB2312 * 2 + 2);
if (NULL == tmpBuf)
{
return 0;
}
ret = mmi_chset_convert(MMI_CHSET_UTF8, MMI_CHSET_UCS2, (char *)lpUTF8CharStr, tmpBuf, bufLenGB2312 * 2 + 2);
if (ret <= 0)
{
plx_free(tmpBuf);
return ret;
}
if (NULL == lpGB2312Str || bufLenGB2312 <= 0)
{
dstLen = ret / 2;
dstBuf = plx_malloc(dstLen);
if (NULL == dstBuf)
{
plx_free(tmpBuf);
return 0;
}
}
else
{
dstLen = bufLenGB2312;
dstBuf = lpGB2312Str;
}
ret = mmi_chset_convert(MMI_CHSET_UCS2, MMI_CHSET_GB2312, tmpBuf, dstBuf, dstLen);
if (NULL == lpGB2312Str || bufLenGB2312 <= 0)
{
plx_free(dstBuf);
}
plx_free(tmpBuf);
return ret;
}
#else/**********************************watershed*******************************/
#include "plxosal.h"
#undef ASSERT
#undef MemAlloc
#undef MemFree
#define ASSERT(expr)
#define MemAlloc(size) malloc(size)
#define MemFree(p) free(p)
#define GET16(p) (WORD) (((WORD)(*((BYTE*)(p) + 1)) << 8) + *((BYTE*)(p)))
#define ABS(val) ((val) > 0 ? (val) : (-(val)))
#define NUM_IN_FIELD_ZERO 21
#if defined(_MSC_VER)
#pragma warning( disable : 4100 4127 4244 4701 )
#endif // _MSC_VER
#define _HUGE
typedef _HUGE void* PVOIDHUGE;
typedef _HUGE unsigned char* PBYTEHUGE; /* by */
#include "charcode.dat"
static DWORD dwGb2Uni, dwOtherGb, dwUni2Gb;
typedef unsigned short WCHAR;
typedef WCHAR* LPWSTR;
typedef const unsigned short* LPCWSTR;
/**************************************************************************\
*
* Char code convert routines:
*
\**************************************************************************/
static WCHAR *hp_wcsncpy(WCHAR *strDestination, const WCHAR *strSource, int count);
static int hp_wcslen(const WCHAR *string);
BOOL InitConvChars(void)
{
static BOOL bInit = FALSE;
if (bInit)
return TRUE;
dwGb2Uni = sizeof(pFileGb2Uni);
dwUni2Gb = sizeof(pFileUni2Gb);
dwOtherGb = sizeof(pFileOtherGb);
bInit = TRUE;
return TRUE;
}
void ExitConvChars(void)
{
}
/**************************************************************************\
*
* structures and tables for supporting GB18030:
*
\**************************************************************************/
#define REMOVE_GB18030_SUPPORT
#if !defined(REMOVE_GB18030_SUPPORT)
typedef struct {
BYTE tblbegin;
BYTE tblend;
WORD tbloffset;
WORD algoffset;
} indextbl_t;
#endif // REMOVE_GB18030_SUPPORT
static const unsigned char gb2gsm[] =
{
0xA3, 0x00, 0xA5, 0x00, 0xE8, 0x00, 0xE9, 0x00,
0xF9, 0x00, 0xEC, 0x00, 0xF2, 0x00, 0xE7, 0x00,
0xD8, 0x00, 0xF8, 0x00, 0xC5, 0x00, 0xE5, 0x00,
0x94, 0x03, 0xA6, 0x03, 0x93, 0x03, 0x9B, 0x03,
0xA9, 0x03, 0xA0, 0x03, 0xA8, 0x03, 0xA3, 0x03,
0x98, 0x03, 0x9E, 0x03, 0x20, 0x00, 0xC6, 0x00,
/* 1) */
0xE6, 0x00, 0xDF, 0x00, 0xC9, 0x00, 0xA4, 0x00,
0xA1, 0x00, 0xC4, 0x00, 0xD6, 0x00, 0xD1, 0x00,
0xDC, 0x00, 0xA7, 0x00, 0xBF, 0x00, 0xE4, 0x00,
0xF6, 0x00, 0xF1, 0x00, 0xFC, 0x00, 0xE0, 0x00,
};
static const unsigned char sbuni2gb[] =
{
0x80, 0x00, 0x81, 0x00, 0x82, 0x00, 0x83, 0x00,
0x84, 0x00, 0x85, 0x00, 0x86, 0x00, 0x87, 0x00,
0x88, 0x00, 0x89, 0x00, 0x8A, 0x00, 0x8B, 0x00,
0x8C, 0x00, 0x8D, 0x00, 0x8E, 0x00, 0x8F, 0x00,
0x90, 0x00, 0x91, 0x00, 0x92, 0x00, 0x93, 0x00,
0x84, 0x00, 0x95, 0x00, 0x96, 0x00, 0x97, 0x00,
0x98, 0x00, 0x99, 0x00, 0x9A, 0x00, 0x9B, 0x00,
0x9C, 0x00, 0x9D, 0x00, 0x9E, 0x00, 0x9F, 0x00,
/* A4, A7, E0, E8, E9, EC, F2, F9, FC are characters
* coded in both areas(GB2312 and user-defined). */
0xA0, 0x00, 0xAA, 0xBD, 0xA2, 0x00, 0xAA, 0xA1,
0xA1, 0xE8, 0xAA, 0xA2, 0xA6, 0x00, 0xA1, 0xEC,
/* 0xAA, 0xBC, 0xAA, 0xC2, */
0xA1, 0xA7, 0xA9, 0x00, 0xAA, 0x00, 0xAB, 0x00,
0xAC, 0x00, 0xAD, 0x00, 0xAE, 0x00, 0xAF, 0x00,
0xA1, 0xE3, 0xA1, 0xC0, 0xB2, 0x00, 0xB3, 0x00,
0xB4, 0x00, 0xB5, 0x00, 0xB6, 0x00, 0xA1, 0xA4,
0xB8, 0x00, 0xB9, 0x00, 0xBA, 0x00, 0xBB, 0x00,
0xBC, 0x00, 0xBD, 0x00, 0xBE, 0x00, 0xAA, 0xC3,
0xC0, 0x00, 0xC1, 0x00, 0xC2, 0x00, 0xC3, 0x00,
0xAA, 0xBE, 0xAA, 0xAB, 0xAA, 0xB8, 0xC7, 0x00,
0xC8, 0x00, 0xAA, 0xBB, 0xCA, 0x00, 0xCB, 0x00,
0xCC, 0x00, 0xCD, 0x00, 0xCE, 0x00, 0xCF, 0x00,
0xD0, 0x00, 0xAA, 0xC0, 0xD2, 0x00, 0xD3, 0x00,
0xD4, 0x00, 0xD5, 0x00, 0xAA, 0xBF, 0xA1, 0xC1,
0xAA, 0xA9, 0xD9, 0x00, 0xDA, 0x00, 0xDB, 0x00,
0xAA, 0xC1, 0xDD, 0x00, 0xDE, 0x00, 0xAA, 0xBA,
0xA8, 0xA4, 0xA8, 0xA2, 0xE2, 0x00, 0xE3, 0x00,
/* 0xAA, 0xC8, */
0xAA, 0xC4, 0xAA, 0xAC, 0xAA, 0xB9, 0xAA, 0xA8,
0xA8, 0xA8, 0xA8, 0xA6, 0xA8, 0xBA, 0xEB, 0x00,
/* 0xAA, 0xA3, 0xAA, 0xA4, */
0xA8, 0xAC, 0xA8, 0xAA, 0xEE, 0x00, 0xEF, 0x00,
/* 0xAA, 0xA6, */
0xF0, 0x00, 0xAA, 0xC6, 0xA8, 0xB0, 0xA8, 0xAE,
/* 0xAA, 0xA7, */
0xF4, 0x00, 0xF5, 0x00, 0xAA, 0xC5, 0xA1, 0xC2,
0xAA, 0xAA, 0xA8, 0xB4, 0xA8, 0xB2, 0xFB, 0x00,
/* 0xAA, 0xA5, */
0xA8, 0xB9, 0xFD, 0x00, 0xFE, 0x00, 0xFF, 0x00
/* 0xAA, 0xC7, */
};
#define defch (-1)
static const short ascii2gsm[] =
{
defch , defch , defch , defch , defch , defch , defch , defch ,
defch , defch , 0x000A, defch , 0x1B0A, 0x000D, defch , defch ,
defch , defch , defch , defch , defch , defch , defch , defch ,
defch , defch , defch , defch , defch , defch , defch , defch ,
0x0020, 0x0021, 0x0022, 0x0023, 0x0002, 0x0025, 0x0026, 0x0027,
0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
0x0000, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
0x0058, 0x0059, 0x005A, 0x1B3C, 0x1B2F, 0x1B3E, 0x1B14, 0x0011,
defch , 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
0x0078, 0x0079, 0x007A, 0x1B28, 0x1B40, 0x1B29, 0x1B3D, defch ,
0x1B65
};
static const unsigned short gsm2multibyte[] =
{
0x0040, 0xAAA1, 0x0024, 0xAAA2, 0xAAA3, 0xAAA4, 0xAAA5, 0xAAA6,
0xAAA7, 0xAAA8, 0x000A, 0xAAA9, 0xAAAA, 0x000D, 0xAAAB, 0xAAAC,
0xAAAD, 0x005F, 0xAAAE, 0xAAAF, 0xAAB0, 0xAAB1, 0xAAB2, 0xAAB3,
0xAAB4, 0xAAB5, 0xAAB6, 0xAAB7, 0xAAB8, 0xAAB9, 0xAABA, 0xAABB,
0x0020, 0x0021, 0x0022, 0x0023, 0xAABC, 0x0025, 0x0026, 0x0027,
0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
0xAABD, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
0x0058, 0x0059, 0x005A, 0xAABE, 0xAABF, 0xAAC0, 0xAAC1, 0xAAC2,
0xAAC3, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
0x0078, 0x0079, 0x007A, 0xAAC4, 0xAAC5, 0xAAC6, 0xAAC7, 0xAAC8,
};
static const unsigned char multibyte2gsm[] =
{
0x01, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
0x0B, 0x0C, 0x0E, 0x0F, 0x10, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C,
0x1D, 0x1E, 0x1F, 0x24, 0x40, 0x5B, 0x5C, 0x5D,
0x5E, 0x5F, 0x60, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
};
static int CheckIsGbChar(char *lpCode, int length, BOOL bIsGb18030,
BYTE *ch1, BYTE *ch2, BYTE *ch3, BYTE *ch4)
{
*ch1 = (BYTE)*lpCode;
if (bIsGb18030)
{
if (*ch1 >= 0x81 && *ch1 <= 0xfe) // gb character
{
if (length < 2) // no enough characters
return -1;
*ch2 = (BYTE)*(lpCode + 1);
if (*ch2 < 0x30 || *ch2 == 0x7f || *ch2 > 0xfe)
return -1;
else if (*ch2 >= 0x40) // 2 byte code
return 2;
else // 4 byte code
{
if (length < 4) // no enough characters
return -1;
*ch3 = (BYTE)*(lpCode + 2);
if (*ch3 < 0x81 || *ch3 > 0xfe)
return -1;
*ch4 = (BYTE)*(lpCode + 3);
if (*ch4 < 0x30 || *ch4 > 0x39)
return -1;
return 4;
}
}
/*else if (*ch1 > 0xfe || *ch1 == 0x80)
return -1;*/
// ASCII character
return 1;
}
if (*ch1 >= 0x81 && *ch1 <= 0xfe) // gb character
{
if (length < 2) // no enough characters
return -1;
*ch2 = (BYTE)*(lpCode + 1);
if (*ch2 < 0x40 || *ch2 > 0xfe)
return -1;
return 2;
}
// ASCII character
return 1;
}
static void *memcpy_far(void *lpDst, PVOIDHUGE lpSrc, int length)
{
BYTE *dst = (BYTE *)lpDst;
PBYTEHUGE src = (PBYTEHUGE)lpSrc;
while (length--)
{
*dst++ = *src++;
}
return lpDst;
}
/**************************************************************************\
* Function : MultiByteToWideChar
* Purpose :
* maps a character string to a wide-character (Unicode) string.
* Params :
* codepage : Specifies the code page to be used to perform the
* conversion.
* dwFlags : reserved, must be zero.
* lpMultiByteStr: Points to the character string to be converted.
* cchMultiByte : Specifies the size in bytes of the string pointed to
* by the lpMultiByteStr parameter, or it can be -1 if
* the string is null terminated.
* lpWideCharStr : Points to a buffer that receives the translated
* string.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -