📄 unicode2local.c

📁 嵌入式系统下的文件管理处理,和基本图片jpg格式处理原代码
💻 C
字号:
/*-------------------------------------------------*
* $RCSfile: unicode2local.c,v $
* $Date: 2007/01/17 12:28:43 $
* $Author: lanzhu $
* $Log: unicode2local.c,v $
* Revision 1.1.1.1  2007/01/17 12:28:43  lanzhu
* 齐兰柱 准备一个新的代码分支
*
* Revision 1.3  2006/12/18 02:14:27  taiyun
* Modify include filename
*
* Revision 1.2  2006/12/12 00:30:51  lanzhu
* 删除没有用的程序, 去除 WARNING
*
* Revision 1.1.1.1  2006/12/05 03:01:16  lanzhu
* no message
*
* Revision 1.1.1.1  2006/12/01 09:49:35  lanzhu
* no message
*
* Revision 1.3  2006/09/21 00:53:57  lanzhu
* 添加了 2个函数
*
* Revision 1.2  2006/09/20 12:34:30  lanzhu
* 添加了 local To utf8
*
* Revision 1.1  2006/09/20 02:08:25  lanzhu
* 添加 UTF8 UTF16 UTF32 LOCAL 码值转换程序
*
*
*--------------------------------------------------*/
#include "SPMP_define.h"
#include "spca_general.h"

#include "SysUtility.h"

// undef 
#undef INT16
#undef UTF8
#undef UTF16
#undef UTF32
#undef FD32_EUTF8

// define 
#define INT16		SINT16
#define UTF8		UINT8
#define UTF16		UINT16
#define UTF32		UINT32

#define		FD32_EUTF8		(-1)
#define		FD32_EUTF32		(-1)

#define		AP_ERR			(-1)
#define		AP_OK			(0)


/**************************************************************************
 *                   G E N E R A L    C O N S T A N T S                   *
 **************************************************************************/

/**************************************************************************
 *                             M A C R O S                                *
 **************************************************************************/

/**************************************************************************
 *                         D A T A   T Y P E S                            *
 **************************************************************************/

/**************************************************************************
 *                        G L O B A L   D A T A                           *
 **************************************************************************/

#define	CHANGE_BUF_LEN		(0x400)

// 编码转换区域的地址指针
static	UINT8 * gpChangeBuf = NULL; 

 /**************************************************************************
 *                 E X T E R N A L   R E F E R E N C E S                  *
 **************************************************************************/

 /**************************************************************************
 *               F U N C T I O N   D E C L A R A T I O N S                *
 **************************************************************************/
extern INT16 char2uni_949(const unsigned char *rawstring, INT16 boundlen, UINT16 *uni);
extern INT16 uni2char_949(const UINT16 uni, unsigned char *out, INT16 boundlen);

static INT16 fd32_utf8to16(const UTF8 *Utf8, UTF16 *Utf16);
static INT16 fd32_utf8to32(const UTF8 *s , UTF32 *Ch);
static INT16 fd32_utf32to16(UTF32 Ch , UTF16 *s);
static INT16 fd32_utf32to8(UTF32 Ch , UTF8 *s);
//static INT16 fd32_utf16to32(const UTF16 *s , UTF32 *Ch);


// 将本地编码 转换为 UNICOE 编码 
INT16 SPMP_local2uni(UINT8 *plocal, UINT8 *puni);
// 将UNICODE 编码转换为本地 编码
INT16 SPMP_uni2local( UINT8 *putf8, UINT8 *pbuf);

INT16 SPMP_GetUniPtr( UINT8 * plocal, UINT8 * *puni);
INT16 SPMP_GetLocalPtr( UINT8 * puni, UINT8 * *plocal);



/************************************************************************/
/*	输入本地编码，获得UTF8 编码的地址指针

  input:
			plocal	[in]	UINT8 *		本地编码数据的存放地址
			puni	[out]	UINT8 * *	
  output:
			0 成功， 非0值失败
  func:
			输入本地编码的数据，进行编码的转换，返回UTF8编码的地址指针
  note:

                                                                        */
/************************************************************************/
INT16 SPMP_GetUniPtr( UINT8 * plocal, UINT8 * *puni)
{

	if ((NULL == plocal) || (NULL == puni) ){
		ERROR_REPORT;
		return AP_ERR;
	}

	if (NULL == gpChangeBuf) 
	{
		gpChangeBuf = MEM_ALLOC( CHANGE_BUF_LEN );

		if (NULL == gpChangeBuf) 
		{
			ERROR_REPORT;
			ERROR_STOP;
		}
	}

	// 进行编码的转换
	SPMP_local2uni(plocal, gpChangeBuf);

	// 返回UNICODE 编码的指针	
	*puni = gpChangeBuf;

	// 返回成功
	return AP_OK;
}

/************************************************************************/
/*	输入 UTF8 的编码，获得 Local 编码的地址指针
  input:
			puin	[in]	UINT8 *		UNICODE 编码数据的存放地址
			plocal	[out]	UINT8 * *	
  output:
			0 成功， 非0值失败
  func:
			输入 UTF8编码的数据，进行编码的转换，返回 LOCAL 编码的地址指针
  note:
                                                                        */
/************************************************************************/
INT16 SPMP_GetLocalPtr( UINT8 * puni, UINT8 * *plocal)
{
	// 判断输入参数是否合法
	if ((NULL == plocal) || (NULL == puni) ){
		ERROR_REPORT;
		return AP_ERR;
	}

	// 判断是否需要进行内存的申请
	if (NULL == gpChangeBuf) {
		gpChangeBuf = MEM_ALLOC( CHANGE_BUF_LEN );
		if (NULL == gpChangeBuf) 
		{
			ERROR_REPORT;			
			ERROR_STOP;
		}
	}
	
	// 进行编码的转换
	SPMP_uni2local( puni, gpChangeBuf );
	
	// 返回UNICODE 编码的指针	
	*plocal = gpChangeBuf;
	
	// 返回成功
	return AP_OK;	
}

/************************************************************************/
/*	将本地码转换为 UTF8 编码
  input:
			plocal	[in]	本地编码数据存放的地址指针
			puni	[out]	UTF8 编码存放的地址指针
  output:
			0 成功，非0值失败
  func:
			将本地编码转换为UTF8编码		
  note:
                                                                        */
/************************************************************************/
INT16 SPMP_local2uni(UINT8 *plocal, UINT8 *puni)
{
	UINT8	data;
	INT16	ret;
	UINT16	unicode_16;
//	UINT32	unicode_32;	
	
	while (1)
	{
		// 取出一个本地码
		data = *plocal;

		if (data == '\0'){
			*puni = '\0';
			break;
		}

		// 如果是ASCII码，直接进行数据的复制 		
		if ( !(data & 0x80) ){
			*puni = data;
			puni++;
			plocal++;
			continue;
		}

		// 进行本地编码到 UTF16 的转换
		char2uni_949(plocal, sizeof(UINT16), &unicode_16);
		// 将UTF16转换为UTF8
		ret = fd32_utf32to8((UINT32)unicode_16, puni);

		// 移动指针
		plocal	+= 2;
		puni	+= ret;
	}

	// 返回成功
	return	0;  
}
/************************************************************************/
/*	将UTF8 的编码 转换为 本地编码
  input:
			putf8	[in]	UTF8编码数据的地址指针
			pbuf	[out]	转换后得到的本地码的地址指针
  otuput:
			0 成功， 非0值失败
  func:
  note:
                                                                        */
/************************************************************************/
INT16 SPMP_uni2local( UINT8 *putf8, UINT8 *pbuf)
{
	UINT8 data;
	UINT16 unicode_16;
	INT16 ret;

	*pbuf	= '\0';		

	while (1)
	{
		data = *putf8;

		if (data == '\0') 
		{
			*pbuf = '\0';
			break;
		}

		// 如果是ASCII 码， 直接进行复制
		if ( !(data & 0x80) ) 
		{
			*pbuf = data;
			pbuf++;
			putf8++;
			continue;
		}

		// 不是ASCII， 将UTF8 转换为UTF16，返回使用的UTF8 的数据的数量	
		ret = fd32_utf8to16(putf8, &unicode_16);
		putf8 += ret;

		// 将UNICOE 转换为本地编码
		ret = uni2char_949(unicode_16, pbuf, sizeof(unicode_16));
		pbuf += ret;	
	}

	// 返回成功
	return 0;
}

/************************************************************************/
/*	将UTF8 的编码转换为 UTF16
  input:
		putf8	[in]	UTF8 数据的指针
		putf16	[out]	存放得到的UTF16 的数据指针
  output:
		使用的UTF8 的数据长度
  func:
  note:
                                                                        */
/************************************************************************/
static INT16 fd32_utf8to16(const UTF8 *Utf8, UTF16 *Utf16)
{
	UTF32	Ch;
	INT16   Res;

	// 将 UTF8 转换为 UTF32
	if ((Res = fd32_utf8to32(Utf8 , &Ch)) < 0)
	{
		return FD32_EUTF8;				
	}

	// 将 UTF32 转换为 UTF16
	fd32_utf32to16(Ch , Utf16);

	return Res;
}

/**************************************************************************/
/* Converts a UTF-8 character to Unicode scalar value (same as UTF-32).  */
/* On success, returns the number of BYTEs taken by the character.       */
/* On failure, returns FD32_EUTF8.                                       */
/*                                                                       */
/* The conversion is done according to the following rules:              */
/*                                                                       */
/*           Scalar                               UTF-8                  */
/* 00000000 00000000 0xxxxxxx <-> 0xxxxxxx                               */
/* 00000000 00000yyy yyxxxxxx <-> 110yyyyy  10xxxxxx                     */
/* 00000000 zzzzyyyy yyxxxxxx <-> 1110zzzz  10yyyyyy  10xxxxxx           */
/* 000uuuuu zzzzyyyy yyxxxxxx <-> 11110uuu  10uuzzzz  10yyyyyy  10xxxxxx */
/*                                                                       */
/* NOTE: For optimization reasons, it is assumed that this function is   */
/* not called when the UTF-8 character is not multi-byte. In this case   */
/* the caller should process the single-byte character directly.         */
/**************************************************************************/
static INT16 fd32_utf8to32(const UTF8 *s , UTF32 *Ch)
{
	// 2 字节的情形	
	if ((*s & 0xE0) == 0xC0)
	{
	  *Ch = (*s++ & 0x1F) << 6;
	  if ((*s & 0xC0) != 0x80) return FD32_EUTF8;		
	  *Ch += *s++ & 0x3F;
	  return 2;					
	}

	// 3 字节情形
	if ((*s & 0xF0) == 0xE0)
	{
		*Ch = (*s++ & 0x0F) << 12;
		if ((*s & 0xC0) != 0x80) return FD32_EUTF8;
		*Ch += (*s++ & 0x3F) << 6;
		if ((*s & 0xC0) != 0x80) return FD32_EUTF8;
		*Ch += *s++ & 0x3F;
		return 3;
	}

	// 4 字节的情形
	if ((*s & 0xF8) == 0xF0)
	{
		*Ch = (*s++ & 0x07) << 18;
		if ((*s & 0xC0) != 0x80) return FD32_EUTF8;
		*Ch = (*s++ & 0x3F) << 12;
		if ((*s & 0xC0) != 0x80) return FD32_EUTF8;
		*Ch += (*s++ & 0x3F) << 6;
		if ((*s & 0xC0) != 0x80) return FD32_EUTF8;
		*Ch += *s++ & 0x3F;
		return 4;
	}

	return FD32_EUTF8;
}


/**************************************************************************/
/* Converts a Unicode scalar value (same as UTF-32) to UTF-8.         	 */
/* On success, returns the number of BYTEs taken by the character.       */
/* On failure, returns FD32_EUTF32 (invalid scalar value).               */
/* See fd32_utf8to32 comments for conversion details.                    */
/* NOTE: For optimization reasons, it is assumed that this function      */
/* is not called when the UTF-8 character is not multi-byte. In this     */
/* case the caller should process the single-byte character directly.    */
/**************************************************************************/
static INT16 fd32_utf32to8(UTF32 Ch , UTF8 *s)
{
	if (Ch < 0x000800)
	{
		*s++ = (UTF8) (0xC0 + ((Ch & 0x0007C0) >> 6));
		*s   = (UTF8) (0x80 +  (Ch & 0x00003F));
		return 2;
	}

	if (Ch < 0x010000)
	{
		*s++ = (UTF8) (0xE0 + ((Ch & 0x00F000) >> 12));
		*s++ = (UTF8) (0x80 + ((Ch & 0x000FC0) >> 6));
		*s   = (UTF8) (0x80 +  (Ch & 0x00003F));
		return 3;
	}

	if (Ch < 0x200000)
	{
		*s++ = (UTF8) (0xF0 + ((Ch & 0xFC0000) >> 18));
		*s++ = (UTF8) (0x80 + ((Ch & 0x03F000) >> 12));
		*s++ = (UTF8) (0x80 + ((Ch & 0x000FC0) >> 6));
		*s   = (UTF8) (0x80 +  (Ch & 0x00003F));
		return 4;
	}

	return (-1);
}


/**************************************************************************/
/* Converts a UTF-16 character to Unicode scalar value (same as UTF-32).  */
/* On success, returns the number of WORDs taken by the character.        */
/* On failure, returns FD32_EUTF16.                                       */
/*                                                                        */
/* The conversion is done according to the following rules:               */
/*                                                                        */
/*           Scalar                              UTF-16                   */
/* 00000000 zzzzyyyy yyxxxxxx <-> zzzzyyyy yyxxxxxx                       */
/* 000uuuuu zzzzyyyy yyxxxxxx <-> 110110ww wwzzzzyy  110111yy yyxxxxxx    */
/* where wwww = uuuuu - 1.                                                */
/**************************************************************************/
/* Converts a Unicode scalar value (same as UTF-32) to UTF-16.     		  */
/* On success, returns the number of WORDs taken by the character.        */
/* On failure, returns FD32_EUTF32 (invalid scalar value).                */
/* See fd32_utf16to32 comments for conversion details.                    */
/**************************************************************************/
static INT16 fd32_utf32to16(UTF32 Ch , UTF16 *s)
{
	// 小于 65536 的情形
	if (Ch < 0x010000)
	{
		*s = (UTF16) Ch;
		return 1;
	}
	
	// 大于 65536 的情形
	if (Ch < 0x200000)
	{
		*s++ = (UTF16) (0xD800 + (((Ch >> 16) - 1) << 6) + ((Ch & 0x00FC00) >> 2));
		*s   = (UTF16) (0xDC00 + (Ch & 0x0003FF));
		return 2;
	}
	
	return FD32_EUTF32;
}

/************************************************************************/
/*			
				程序编译时发现没有用到的BUG
                                                                        */
/************************************************************************/

#if 0

/**************************************************************************/
/* Converts a UTF-16 character to Unicode scalar value (same as UTF-32).  */
/* On success, returns the number of WORDs taken by the character.        */
/* On failure, returns FD32_EUTF16.                                       */
/*                                                                        */
/* The conversion is done according to the following rules:               */
/*                                                                        */
/*           Scalar                              UTF-16                   */
/* 00000000 zzzzyyyy yyxxxxxx <-> zzzzyyyy yyxxxxxx                       */
/* 000uuuuu zzzzyyyy yyxxxxxx <-> 110110ww wwzzzzyy  110111yy yyxxxxxx    */
/* where wwww = uuuuu - 1.                                                */
/**************************************************************************/
static INT16 fd32_utf16to32(const UTF16 *s , UTF32 *Ch)
{
	if ((*s & 0xFC00) != 0xD800)
	{
		*Ch = *s;
		return 1;
	}
	
	*Ch = ((*s++ & 0x03FF) << 10) + 0x010000;
	
	if ((*s & 0xFC00) != 0xDC00)
	{
		return (-1);	  
	}
	
	*Ch += *s & 0x03FF;
	return 2;
}

#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -