📄 unicode2local.c
字号:
/*-------------------------------------------------*
* $RCSfile: unicode2local.c,v $
* $Date: 2007/01/17 12:28:43 $
* $Author: lanzhu $
* $Log: unicode2local.c,v $
* Revision 1.1.1.1 2007/01/17 12:28:43 lanzhu
* 齐兰柱 准备一个新的代码分支
*
* Revision 1.3 2006/12/18 02:14:27 taiyun
* Modify include filename
*
* Revision 1.2 2006/12/12 00:30:51 lanzhu
* 删除没有用的程序, 去除 WARNING
*
* Revision 1.1.1.1 2006/12/05 03:01:16 lanzhu
* no message
*
* Revision 1.1.1.1 2006/12/01 09:49:35 lanzhu
* no message
*
* Revision 1.3 2006/09/21 00:53:57 lanzhu
* 添加了 2个函数
*
* Revision 1.2 2006/09/20 12:34:30 lanzhu
* 添加了 local To utf8
*
* Revision 1.1 2006/09/20 02:08:25 lanzhu
* 添加 UTF8 UTF16 UTF32 LOCAL 码值转换程序
*
*
*--------------------------------------------------*/
#include "SPMP_define.h"
#include "spca_general.h"
#include "SysUtility.h"
// undef
#undef INT16
#undef UTF8
#undef UTF16
#undef UTF32
#undef FD32_EUTF8
// define
#define INT16 SINT16
#define UTF8 UINT8
#define UTF16 UINT16
#define UTF32 UINT32
#define FD32_EUTF8 (-1)
#define FD32_EUTF32 (-1)
#define AP_ERR (-1)
#define AP_OK (0)
/**************************************************************************
* G E N E R A L C O N S T A N T S *
**************************************************************************/
/**************************************************************************
* M A C R O S *
**************************************************************************/
/**************************************************************************
* D A T A T Y P E S *
**************************************************************************/
/**************************************************************************
* G L O B A L D A T A *
**************************************************************************/
#define CHANGE_BUF_LEN (0x400)
// 编码转换区域的地址指针
static UINT8 * gpChangeBuf = NULL;
/**************************************************************************
* E X T E R N A L R E F E R E N C E S *
**************************************************************************/
/**************************************************************************
* F U N C T I O N D E C L A R A T I O N S *
**************************************************************************/
extern INT16 char2uni_949(const unsigned char *rawstring, INT16 boundlen, UINT16 *uni);
extern INT16 uni2char_949(const UINT16 uni, unsigned char *out, INT16 boundlen);
static INT16 fd32_utf8to16(const UTF8 *Utf8, UTF16 *Utf16);
static INT16 fd32_utf8to32(const UTF8 *s , UTF32 *Ch);
static INT16 fd32_utf32to16(UTF32 Ch , UTF16 *s);
static INT16 fd32_utf32to8(UTF32 Ch , UTF8 *s);
//static INT16 fd32_utf16to32(const UTF16 *s , UTF32 *Ch);
// 将本地编码 转换为 UNICOE 编码
INT16 SPMP_local2uni(UINT8 *plocal, UINT8 *puni);
// 将UNICODE 编码转换为本地 编码
INT16 SPMP_uni2local( UINT8 *putf8, UINT8 *pbuf);
INT16 SPMP_GetUniPtr( UINT8 * plocal, UINT8 * *puni);
INT16 SPMP_GetLocalPtr( UINT8 * puni, UINT8 * *plocal);
/************************************************************************/
/* 输入本地编码,获得UTF8 编码的地址指针
input:
plocal [in] UINT8 * 本地编码数据的存放地址
puni [out] UINT8 * *
output:
0 成功, 非0值失败
func:
输入本地编码的数据,进行编码的转换,返回UTF8编码的地址指针
note:
*/
/************************************************************************/
INT16 SPMP_GetUniPtr( UINT8 * plocal, UINT8 * *puni)
{
if ((NULL == plocal) || (NULL == puni) ){
ERROR_REPORT;
return AP_ERR;
}
if (NULL == gpChangeBuf)
{
gpChangeBuf = MEM_ALLOC( CHANGE_BUF_LEN );
if (NULL == gpChangeBuf)
{
ERROR_REPORT;
ERROR_STOP;
}
}
// 进行编码的转换
SPMP_local2uni(plocal, gpChangeBuf);
// 返回UNICODE 编码的指针
*puni = gpChangeBuf;
// 返回成功
return AP_OK;
}
/************************************************************************/
/* 输入 UTF8 的编码,获得 Local 编码的地址指针
input:
puin [in] UINT8 * UNICODE 编码数据的存放地址
plocal [out] UINT8 * *
output:
0 成功, 非0值失败
func:
输入 UTF8编码的数据,进行编码的转换,返回 LOCAL 编码的地址指针
note:
*/
/************************************************************************/
INT16 SPMP_GetLocalPtr( UINT8 * puni, UINT8 * *plocal)
{
// 判断输入参数是否合法
if ((NULL == plocal) || (NULL == puni) ){
ERROR_REPORT;
return AP_ERR;
}
// 判断是否需要进行内存的申请
if (NULL == gpChangeBuf) {
gpChangeBuf = MEM_ALLOC( CHANGE_BUF_LEN );
if (NULL == gpChangeBuf)
{
ERROR_REPORT;
ERROR_STOP;
}
}
// 进行编码的转换
SPMP_uni2local( puni, gpChangeBuf );
// 返回UNICODE 编码的指针
*plocal = gpChangeBuf;
// 返回成功
return AP_OK;
}
/************************************************************************/
/* 将本地码转换为 UTF8 编码
input:
plocal [in] 本地编码数据存放的地址指针
puni [out] UTF8 编码存放的地址指针
output:
0 成功,非0值失败
func:
将本地编码转换为UTF8编码
note:
*/
/************************************************************************/
INT16 SPMP_local2uni(UINT8 *plocal, UINT8 *puni)
{
UINT8 data;
INT16 ret;
UINT16 unicode_16;
// UINT32 unicode_32;
while (1)
{
// 取出一个本地码
data = *plocal;
if (data == '\0'){
*puni = '\0';
break;
}
// 如果是ASCII码,直接进行数据的复制
if ( !(data & 0x80) ){
*puni = data;
puni++;
plocal++;
continue;
}
// 进行本地编码到 UTF16 的转换
char2uni_949(plocal, sizeof(UINT16), &unicode_16);
// 将UTF16转换为UTF8
ret = fd32_utf32to8((UINT32)unicode_16, puni);
// 移动指针
plocal += 2;
puni += ret;
}
// 返回成功
return 0;
}
/************************************************************************/
/* 将UTF8 的编码 转换为 本地编码
input:
putf8 [in] UTF8编码数据的地址指针
pbuf [out] 转换后得到的本地码的地址指针
otuput:
0 成功, 非0值失败
func:
note:
*/
/************************************************************************/
INT16 SPMP_uni2local( UINT8 *putf8, UINT8 *pbuf)
{
UINT8 data;
UINT16 unicode_16;
INT16 ret;
*pbuf = '\0';
while (1)
{
data = *putf8;
if (data == '\0')
{
*pbuf = '\0';
break;
}
// 如果是ASCII 码, 直接进行复制
if ( !(data & 0x80) )
{
*pbuf = data;
pbuf++;
putf8++;
continue;
}
// 不是ASCII, 将UTF8 转换为UTF16,返回使用的UTF8 的数据的数量
ret = fd32_utf8to16(putf8, &unicode_16);
putf8 += ret;
// 将UNICOE 转换为本地编码
ret = uni2char_949(unicode_16, pbuf, sizeof(unicode_16));
pbuf += ret;
}
// 返回成功
return 0;
}
/************************************************************************/
/* 将UTF8 的编码转换为 UTF16
input:
putf8 [in] UTF8 数据的指针
putf16 [out] 存放得到的UTF16 的数据指针
output:
使用的UTF8 的数据长度
func:
note:
*/
/************************************************************************/
static INT16 fd32_utf8to16(const UTF8 *Utf8, UTF16 *Utf16)
{
UTF32 Ch;
INT16 Res;
// 将 UTF8 转换为 UTF32
if ((Res = fd32_utf8to32(Utf8 , &Ch)) < 0)
{
return FD32_EUTF8;
}
// 将 UTF32 转换为 UTF16
fd32_utf32to16(Ch , Utf16);
return Res;
}
/**************************************************************************/
/* Converts a UTF-8 character to Unicode scalar value (same as UTF-32). */
/* On success, returns the number of BYTEs taken by the character. */
/* On failure, returns FD32_EUTF8. */
/* */
/* The conversion is done according to the following rules: */
/* */
/* Scalar UTF-8 */
/* 00000000 00000000 0xxxxxxx <-> 0xxxxxxx */
/* 00000000 00000yyy yyxxxxxx <-> 110yyyyy 10xxxxxx */
/* 00000000 zzzzyyyy yyxxxxxx <-> 1110zzzz 10yyyyyy 10xxxxxx */
/* 000uuuuu zzzzyyyy yyxxxxxx <-> 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx */
/* */
/* NOTE: For optimization reasons, it is assumed that this function is */
/* not called when the UTF-8 character is not multi-byte. In this case */
/* the caller should process the single-byte character directly. */
/**************************************************************************/
static INT16 fd32_utf8to32(const UTF8 *s , UTF32 *Ch)
{
// 2 字节的情形
if ((*s & 0xE0) == 0xC0)
{
*Ch = (*s++ & 0x1F) << 6;
if ((*s & 0xC0) != 0x80) return FD32_EUTF8;
*Ch += *s++ & 0x3F;
return 2;
}
// 3 字节情形
if ((*s & 0xF0) == 0xE0)
{
*Ch = (*s++ & 0x0F) << 12;
if ((*s & 0xC0) != 0x80) return FD32_EUTF8;
*Ch += (*s++ & 0x3F) << 6;
if ((*s & 0xC0) != 0x80) return FD32_EUTF8;
*Ch += *s++ & 0x3F;
return 3;
}
// 4 字节的情形
if ((*s & 0xF8) == 0xF0)
{
*Ch = (*s++ & 0x07) << 18;
if ((*s & 0xC0) != 0x80) return FD32_EUTF8;
*Ch = (*s++ & 0x3F) << 12;
if ((*s & 0xC0) != 0x80) return FD32_EUTF8;
*Ch += (*s++ & 0x3F) << 6;
if ((*s & 0xC0) != 0x80) return FD32_EUTF8;
*Ch += *s++ & 0x3F;
return 4;
}
return FD32_EUTF8;
}
/**************************************************************************/
/* Converts a Unicode scalar value (same as UTF-32) to UTF-8. */
/* On success, returns the number of BYTEs taken by the character. */
/* On failure, returns FD32_EUTF32 (invalid scalar value). */
/* See fd32_utf8to32 comments for conversion details. */
/* NOTE: For optimization reasons, it is assumed that this function */
/* is not called when the UTF-8 character is not multi-byte. In this */
/* case the caller should process the single-byte character directly. */
/**************************************************************************/
static INT16 fd32_utf32to8(UTF32 Ch , UTF8 *s)
{
if (Ch < 0x000800)
{
*s++ = (UTF8) (0xC0 + ((Ch & 0x0007C0) >> 6));
*s = (UTF8) (0x80 + (Ch & 0x00003F));
return 2;
}
if (Ch < 0x010000)
{
*s++ = (UTF8) (0xE0 + ((Ch & 0x00F000) >> 12));
*s++ = (UTF8) (0x80 + ((Ch & 0x000FC0) >> 6));
*s = (UTF8) (0x80 + (Ch & 0x00003F));
return 3;
}
if (Ch < 0x200000)
{
*s++ = (UTF8) (0xF0 + ((Ch & 0xFC0000) >> 18));
*s++ = (UTF8) (0x80 + ((Ch & 0x03F000) >> 12));
*s++ = (UTF8) (0x80 + ((Ch & 0x000FC0) >> 6));
*s = (UTF8) (0x80 + (Ch & 0x00003F));
return 4;
}
return (-1);
}
/**************************************************************************/
/* Converts a UTF-16 character to Unicode scalar value (same as UTF-32). */
/* On success, returns the number of WORDs taken by the character. */
/* On failure, returns FD32_EUTF16. */
/* */
/* The conversion is done according to the following rules: */
/* */
/* Scalar UTF-16 */
/* 00000000 zzzzyyyy yyxxxxxx <-> zzzzyyyy yyxxxxxx */
/* 000uuuuu zzzzyyyy yyxxxxxx <-> 110110ww wwzzzzyy 110111yy yyxxxxxx */
/* where wwww = uuuuu - 1. */
/**************************************************************************/
/* Converts a Unicode scalar value (same as UTF-32) to UTF-16. */
/* On success, returns the number of WORDs taken by the character. */
/* On failure, returns FD32_EUTF32 (invalid scalar value). */
/* See fd32_utf16to32 comments for conversion details. */
/**************************************************************************/
static INT16 fd32_utf32to16(UTF32 Ch , UTF16 *s)
{
// 小于 65536 的情形
if (Ch < 0x010000)
{
*s = (UTF16) Ch;
return 1;
}
// 大于 65536 的情形
if (Ch < 0x200000)
{
*s++ = (UTF16) (0xD800 + (((Ch >> 16) - 1) << 6) + ((Ch & 0x00FC00) >> 2));
*s = (UTF16) (0xDC00 + (Ch & 0x0003FF));
return 2;
}
return FD32_EUTF32;
}
/************************************************************************/
/*
程序编译时发现没有用到的BUG
*/
/************************************************************************/
#if 0
/**************************************************************************/
/* Converts a UTF-16 character to Unicode scalar value (same as UTF-32). */
/* On success, returns the number of WORDs taken by the character. */
/* On failure, returns FD32_EUTF16. */
/* */
/* The conversion is done according to the following rules: */
/* */
/* Scalar UTF-16 */
/* 00000000 zzzzyyyy yyxxxxxx <-> zzzzyyyy yyxxxxxx */
/* 000uuuuu zzzzyyyy yyxxxxxx <-> 110110ww wwzzzzyy 110111yy yyxxxxxx */
/* where wwww = uuuuu - 1. */
/**************************************************************************/
static INT16 fd32_utf16to32(const UTF16 *s , UTF32 *Ch)
{
if ((*s & 0xFC00) != 0xD800)
{
*Ch = *s;
return 1;
}
*Ch = ((*s++ & 0x03FF) << 10) + 0x010000;
if ((*s & 0xFC00) != 0xDC00)
{
return (-1);
}
*Ch += *s & 0x03FF;
return 2;
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -