📄 plx_charset.c
字号:
* cchWideChar : Specifies the size, in wide characters, of the buffer
* pointed to by the lpWideCharStr parameter. If this
* value is zero, the function returns the required
* buffer size, in wide characters, and makes no use of
* the lpWideCharStr buffer.
* Return :
* If the function succeeds, and cchWideChar is nonzero, the return value
* is the number of wide characters written to the buffer pointed to by
* lpWideCharStr.
* If the function succeeds, and cchWideChar is zero, the return value is
* the required size, in wide characters, for a buffer that can receive
* the translated string.
* If the function fails, the return value is zero.
* Remarks
\**************************************************************************/
int PlxMultiByteToWideChar(UINT CodePage, DWORD dwFlags,
LPCSTR lpMultiByteStr, int cchMultiByte,
LPWSTR lpWideCharStr, int cchWideChar)
{
// TEXTMETRIC tm;
int nSrcChars;
int nDstChars;
int nBytes;
char* lpSrc = NULL;
WCHAR* lpDst = NULL;
LONG lDistance;
BOOL bForSize = FALSE;
BOOL bIsGb18030 = FALSE;
BOOL bInvalidCode = FALSE;
if (!InitConvChars())
{
ASSERT(0);
//SetLastError(1);
return 0;
}
if (CodePage != CP_ACP ||
dwFlags != 0 || lpMultiByteStr == NULL)
{
ASSERT(0);
// SetLastError(1);
return 0;
}
/* to get the required buffer size in wide characters */
if (cchWideChar == 0)
bForSize = TRUE;
if (!bForSize && lpWideCharStr == 0)
{
/* null pointer,function fails */
ASSERT(0);
//SetLastError(1);
return 0;
}
/* lpMultiByteStr is a null-terminated string */
if (cchMultiByte == -1)
{
nSrcChars = strlen(lpMultiByteStr) + 1;
}
else
nSrcChars = cchMultiByte;
if (nSrcChars <= 0)
return 0;
/* check if default font is GB18030 */
// if (GetTextMetrics(NULL, &tm))
{
#if !defined(REMOVE_GB18030_SUPPORT)
if (tm.tmCharSet == CHARSET_GB18030)
bIsGb18030 = TRUE;
#endif // REMOVE_GB18030_SUPPORT
}
/***********************************************************************
*
* In GBK scope:
* high byte of the multi-byte character is between 0x81 and 0xfe,
* low byte of the multi-byte character is between 0x40 and 0xfe.
*
**********************************************************************/
lpSrc = (char *)lpMultiByteStr;
lpDst = lpWideCharStr;
nBytes = 0;
nDstChars = 0;
while (1)
{
BYTE ch1, ch2, ch3, ch4;
int ret;
nDstChars++;
if (!bForSize)
{
if (nDstChars > cchWideChar) // no enough output buffer
return 0;
}
lbl_redo:
if (bInvalidCode)
{
// found invalid code, so consider all code as ascii character
ch1 = *lpSrc;
ret = 1;
bInvalidCode = FALSE;
}
else
ret = CheckIsGbChar(lpSrc, nSrcChars - nBytes, bIsGb18030,
&ch1, &ch2, &ch3, &ch4);
switch (ret)
{
case 1:
// ASCII character
if (!bForSize)
{
// no conversion under all charsets(2004-05-24)
if (ch1 == 0x80) // for unicode 0x20AC
{
*(BYTE *)lpDst = 0xAC;
*((BYTE *)lpDst + 1) = 0x20;
}
else
{
if (ch1 > 0x80) // wrong character
{
*(BYTE *)lpDst = 0x20;
*((BYTE *)lpDst + 1) = 0x00;
}
else
{
*(BYTE *)lpDst = ch1;
*((BYTE *)lpDst + 1) = 0x00;
}
}
lpDst++;
}
break;
case 2:
// 2 byte code
if (!bForSize)
{
if (ch1 == 0xaa && (ch2 >= 0xa1 && ch2 <= 0xc8))
{
// GSM character in user-defined code area
lDistance = (ch2 - 0xa1) * 2;
memcpy(lpDst, gb2gsm + lDistance, 2);
lpDst++;
break;
}
lDistance = (ch1 - 0x81) * 191L * 2L + (ch2 - 0x40) * 2;
memcpy_far(lpDst, (PBYTEHUGE)pFileGb2Uni + lDistance, 2);
lpDst++;
}
break;
#if !defined(REMOVE_GB18030_SUPPORT)
case 4:
// 4 byte code
if (!bForSize)
{
indextbl_t g2u;
WORD wch;
lDistance = (((ch1 - 0x81) * 10L + (ch2 - 0x30)) * 126L +
ch3 - 0x81) * 10L + ch4 - 0x30;
if (lDistance > 0x99FBL)
return 0;
/* GB+81308130 - GB+8431A439 */
g2u = __gb18030_to_ucs_index[lDistance >> 8];
if ((lDistance & 0xFF) >= g2u.tblbegin &&
(lDistance & 0xFF) <= g2u.tblend)
wch = __gb18030_4byte_to_ucs[lDistance - g2u.tbloffset];
else
wch = g2u.algoffset + (lDistance & 0xFF);
*(BYTE *)lpDst = (BYTE)(wch & 0xFF);
*((BYTE *)lpDst + 1) = (BYTE)((wch >> 8) & 0xFF);
lpDst++;
}
break;
#endif // REMOVE_GB18030_SUPPORT
case -1:
bInvalidCode = TRUE;
goto lbl_redo;
default:
return 0;
}
lpSrc += ret;
nBytes += ret;
if (nBytes >= nSrcChars)
break;
}
lpWideCharStr[nDstChars] = lpWideCharStr[nDstChars + 1] = 0;
return nDstChars;
}
/*
static void Gbk2Gb(BYTE *pGbBuf, BYTE *pGbkCode)
{
if ((*pGbkCode >= 0xA1 && *pGbkCode <= 0xF7) &&
(*(pGbkCode + 1) >= 0xA1 && *(pGbkCode + 1) <= 0xFE))
memcpy(pGbBuf, pGbkCode, 2);
else
{
*pGbBuf = ' ';
*(pGbBuf + 1) = ' ';
}
}*/
#if !defined(REMOVE_GB18030_SUPPORT)
static int Ucs2ToGB18030(WCHAR wCode, BYTE **lppDst, int *pDstChars,
int cchMultiByte, BOOL bForSize)
{
DWORD idx; /* The bytesize of the GB18030 character */
int count = 2; /* This is the most common case. */
indextbl_t u2g;
BYTE gbch[4] = {'\0', '\0', '\0', '\0'};
if (wCode <= 0x7F) // ASCII character
{
count = 1;
idx = (DWORD)wCode;
goto lbl_handle_idx;
}
else if (wCode == 0x20AC)
{
count = 1;
idx = 0x0080;
goto lbl_handle_idx;
}
else if (wCode <= 0xFF || (wCode >= 0x0390 && wCode < 0x03B0))
{
// for ASCII or Greek characters in user-defined gb areas
int i;
BYTE *pCode;
BOOL bFound = FALSE;
pCode = (BYTE *)gb2gsm;
for (i = 0; i < sizeof(gb2gsm) / sizeof(WCHAR); i++)
{
if ((*pCode + (*(pCode + 1) << 8)) == wCode)
{
bFound = TRUE;
break;
}
pCode += 2;
}
if (bFound)
{
count = 2;
idx = 0xAAA1 + i;
goto lbl_handle_idx;
}
}
if (wCode <= 0xD7FFL || (wCode >= 0xE766L && wCode <= 0xFFFFL))
{
WORD tblentry;
u2g = __ucs_to_gb18030_index[wCode >> 8];
if ((wCode & 0xFF) < u2g.tblbegin ||
(wCode & 0xFF) > u2g.tblend)
{
/* Use algorithm (4-byte GB18030) */
idx = u2g.algoffset + (wCode & 0xFF);
/* Yikes, my index table could not cover one special case */
if (wCode >= 0x49B8 && wCode <= 0x49FF)
idx -= 11;
count = 4;
goto lbl_handle_idx;
}
/* Use mapping table (2-byte or 4-byte GB18030) */
tblentry = __ucs_to_gb18030[wCode - u2g.tbloffset];
if (tblentry > 0x8000L)
/* 2-byte GB18030 */
idx = (DWORD)tblentry;
else
{
/* 4-byte GB18030 stored in a special compact format */
BYTE a, b;
a = 0x81;
b = 0x30 + (tblentry >> 11);
if (tblentry >= 0x7000)
{
a += 3;
b -= 14;
}
else if (tblentry >= 0x6000)
{
a += 2;
b -= 6;
}
else if (tblentry >= 0x3000)
{
a += 1;
b -= 6;
}
else if (b >= 0x31)
b += 5;
gbch[0] = a;
gbch[1] = b;
gbch[2] = 0x81 + (tblentry >> 4 & 0x7F);
gbch[3] = 0x30 + (tblentry & 0xF);
count = 4;
} // (tblentry > 0x8000)
}
#if 0 // not supporting user-defined unicode areas
else if (wCode >= 0xE000L && wCode <= 0xE765L)
{
/* User-defined areas in GB18030 (2-byte) */
if (wCode <= 0xE233L)
idx = 0xAAA1L + (((wCode - 0xE000L) / 94) << 8) +
(wCode - 0xE000L) % 94;
else if (wCode <= 0xE4C5L)
idx = 0xF8A1L + (((wCode - 0xE234L) / 94) << 8) +
(wCode - 0xE234L) % 94;
else
{
idx = 0xA140L + (((wCode - 0xE4C6L) / 96) << 8) +
(wCode - 0xE4C6L) % 96;
/* Skip the gap at 0x7F */
if ((idx & 0xFF) >= 0x7F)
idx++;
}
}
#endif
else
{
count = 1;
idx = 0x20; // ' '
}
lbl_handle_idx:
switch (count)
{
case 1:
(*pDstChars)++;
if (!bForSize)
{
if (*pDstChars > cchMultiByte)
return 0;
*(*lppDst)++ = (BYTE)idx;
}
break;
case 2:
gbch[0] = (unsigned char)((idx >> 8) & 0xFF);
gbch[1] = (unsigned char)(idx & 0xFF);
/* See whether there is enough room for the second byte we
write. */
if (gbch[1] == '\0')
(*pDstChars)++;
else
*pDstChars += 2;
if (!bForSize)
{
if (*pDstChars > cchMultiByte)
return 0;
*(*lppDst)++ = gbch[0];
if (gbch[1] != '\0')
*(*lppDst)++ = gbch[1];
}
break;
case 4:
/* See whether there is enough room for all four bytes we
write. */
*pDstChars += 4;
if (!bForSize)
{
if (*pDstChars > cchMultiByte)
return 0;
if (gbch[0] && gbch[1] && gbch[2] && gbch[3])
{
*(*lppDst) = gbch[0];
*(*lppDst + 1) = gbch[1];
*(*lppDst + 2) = gbch[2];
*(*lppDst + 3) = gbch[3];
}
else
{
*(*lppDst + 3) = (unsigned char)(idx % 10) + 0x30;
idx /= 10;
*(*lppDst + 2) = (unsigned char)(idx % 126) + 0x81;
idx /= 126;
*(*lppDst + 1) = (unsigned char)(idx % 10) + 0x30;
*(*lppDst) = (unsigned char)(idx / 10) + 0x81;
}
*lppDst += 4;
}
break;
default:
return 0;
} // switch (count)
return count;
}
#endif // REMOVE_GB18030_SUPPORT
static int IsUniInUserGb(WCHAR wCode)
{
int i;
BYTE *pCode;
pCode = (BYTE *)gb2gsm;
for (i = 0; i < sizeof(gb2gsm) / sizeof(WCHAR); i++)
{
if ((*pCode + (*(pCode + 1) << 8)) == wCode)
{
return i;
}
pCode += 2;
}
return -1;
}
static int Ucs2ToGB2312(UINT CodePage, WCHAR wCode, BYTE *lpDst,
int *pDstChars, int *pchMultiByte, BOOL bForSize)
{
int count;
int case_no = -1;
WCHAR table[20] = {0x0, 0x1, 0x2, 0x3, 0x4, 0x20, 0x21, 0x22, 0x23,
0x24, 0x25, 0x26, 0x30, 0x31, 0x32, 0x33, 0xF9, 0xFA, 0xFE, 0xFF};
/* according to the high byte (sometimes with low byte) of UNICODE,
* switch cases to 0, 1, 2, 3, 4.
* 0: the high byte is 0x0, it is dealed alone;
* 1: the UNICODE is in range of 0x4EOO to 0x9FFF;
* 2: the UNICODE is in range of 0xE000 to 0xE8FF;
* 3: the high byte is one of elements in table[];
* NOTE: if the high byte is in table[], and the UNICODE can
* be found in the file "other_gb.cod", then it will
* be dealed.
* while if the UNICODE can't be found in the file
* "other_gb.cod", then it will go to case 4 and then
* be handled.
* 4: others.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -