⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 convertcode.c

📁 对于通用的嵌入式系统字处理单元,本模块化程序可以快速实现常用汉字编码(GB2312,GB18030,BIG5码,Unicode码)的相互转换,无需库函数.从而实现使用单汉字字库显现多内码.具体参见re
💻 C
字号:
// convertCode.cpp : Defines the entry point for the console application.
//
/***************************************************************************
* Copyright (c) 2008, .......
* All rights reserved.
* 
* 文件名称:convertCode.c
* 文件标识:
* 摘    要:汉字内码转换处理函数
* 			@main()
*			@Utf16TOGb18030()
*			@Utf8TOGb18030()
* 当前版本:1.0
* 作    者:CZ
* 完成日期: 
*
* 修改与优化处理:
**************************************************************************/

#include "stdafx.h"

#define _UTF16BE_GB	1
#define _UTF16LE_GB	1
#define _UTF8_GB	1
#define _BIG5_GB	1

#if (_UTF16LE_GB || _UTF16BE_GB)
#include ".\tools1\UTF16_GB_2B_H.h"
#include ".\tools1\UTF16_GB_4B_H.h"
#endif

#if (_BIG5_GB)
#include ".\tools2\BIG5_GB_2B_H.h"
#endif

U16 SearchCodeTable(U16 unicodeKey);
S8 Utf16TOGb18030(U16* pu16InputCode, U8* pu8OutputCode, U32* pu32OutputLen);
S8 Utf8TOGb18030(U8* pu8InputCode, U32 u32InputLen, U8* pu8OutputCode, U32* pu32OutputLen);

// 命令格式: convertCode UTF16_GB utftxt gbtxt
int main(int argc, char* argv[])
{
	FILE * pfIn, * pfOut;
	U16	 u16Indata;
	U8  outputCode[8],inputCode[4];
	U32 u32OutputLen,i,u32InputLen;

#if (_UTF8_GB)  // 优先编译此处  
  argc = 4;   
  argv[1] = "UTF8_GB";   
  argv[2] = "U8TEST.txt";   
  argv[3] = "GB.TXT";   
#elif (_UTF16BE_GB)    
  argc = 4;   
  argv[1] = "UTF16BE_GB";   
  argv[2] = "U16TESTB.txt";   
  argv[3] = "GB.TXT";   
#elif  (_UTF16LE_GB) 
  argc = 4;   
  argv[1] = "UTF16_GB";   
  argv[2] = "U16TEST.txt";   
  argv[3] = "GB.TXT";   
#endif
  
	if(argc != 4)/*  if(0)!! */
	{
		printf("命令格式: convertCode UTF16_GB utftxt(文件名) gbtxt(文件名)\n");
		return 1;
	}
#if (_UTF16LE_GB)
	if (strcmp(argv[1],"UTF16_GB") == 0) /* !! if (1)*/
	{
	  if (((pfIn = fopen(argv[2],"rb"))!= NULL)&& ((pfOut = fopen(argv[3],"wb"))!=NULL))
	  //if (((pfIn = fopen("U16TEST.txt","rb"))!= NULL)&& ((pfOut = fopen("GB.TXT","wb"))!=NULL))
	  {
		 inputCode[0] = fgetc(pfIn);
		 inputCode[1] = fgetc(pfIn);
		 if ((inputCode[0] != 0xff)||(inputCode[1] != 0xfE))
		 {
	 		 if ((inputCode[0] == 0xfe)&&(inputCode[1] == 0xff))
			 {
				 printf("输入文件为BigEndian格式,请使用UTF16BE_GB标识转换\n");
				 return 2;
			 }
			 else
			 {
				printf("警告:输入文件没有BOM标识\n");
				// 默认按照Little Endian
				fseek( pfIn, 0, SEEK_SET); // 文件指针回到文件头
			 }
		 }
		 
		 // 从输入文件读入UTF16数据,转换为GB18030格式后写入输出文件
		 while (!feof(pfIn))
		 {// 输入文件未结束
		  // 从输入文件读入UTF16数据
		  inputCode[0] = fgetc(pfIn);
		  inputCode[1] = fgetc(pfIn);
		  u16Indata =inputCode[0] + (inputCode[1]<<8);
		  if( ferror( pfIn ) )
		  {
			printf( "\nRead error" );
			break;
		  }
		  // 转换为GB18030格式后写入输出文件
		  Utf16TOGb18030(&u16Indata, outputCode, &u32OutputLen);
		  for (i = 0; i < u32OutputLen; i++)
		  {
			fputc(outputCode[i],pfOut);
		  }
		 }
		fclose(pfIn);
		fclose(pfOut);
		return 0;

		}
	  else
	  {
		printf("输入%s(%X)或输出文件%s(%X)不存在\n",argv[2],(S32)pfIn,argv[3],(S32)pfOut);
		return 3;
	  }
	} 
#endif	

#if (_UTF16BE_GB)
	if(strcmp(argv[1],"UTF16BE_GB") == 0)
	{
	  if (((pfIn = fopen(argv[2],"rb"))!= NULL)&& ((pfOut = fopen(argv[3],"wb"))!=NULL))
 	  {
		 inputCode[0] = fgetc(pfIn);
		 inputCode[1] = fgetc(pfIn);
		 if ((inputCode[0] != 0xfe)||(inputCode[1] != 0xff))
		 {
	 		 if ((inputCode[0] == 0xff)&&(inputCode[1] == 0xfe))
			 {
				 printf("输入文件为LittleEndian格式,请使用UTF16_GB标识转换\n");
				 return 4;
			 }
			 else
			 {
				printf("警告:输入文件没有BOM标识\n");
				// 默认按照BIG Endian
				fseek( pfIn, 0, SEEK_SET); // 指针回到文件头
			 }
		 }
		 
		 // 从输入文件读入UTF16数据,转换为GB18030格式后写入输出文件
		 while (!feof(pfIn))
		 {// 输入文件未结束
		  // 从输入文件读入UTF16数据
		  inputCode[0] = fgetc(pfIn);
		  inputCode[1] = fgetc(pfIn);
		  u16Indata =inputCode[1] + (inputCode[0]<<8);
		  if( ferror( pfIn ) )
		  {
			printf( "\nRead error" );
			break;
		  }
		  // 转换为GB18030格式后写入输出文件
		  Utf16TOGb18030(&u16Indata, outputCode, &u32OutputLen);
		  for (i = 0; i < u32OutputLen; i++)
		  {
			fputc(outputCode[i],pfOut);
		  }
		 }
		fclose(pfIn);
		fclose(pfOut);
		return 0;

		}
	  else
	  {
		printf("输入%s(%X)或输出文件%s(%X)不存在\n",argv[2],(S32)pfIn,argv[3],(S32)pfOut);
		return 5;
	  }
		
	}		
#endif

#if (_UTF8_GB)
	if(strcmp(argv[1],"UTF8_GB") == 0)
	{
	  if (((pfIn = fopen(argv[2],"rb"))!= NULL)&& ((pfOut = fopen(argv[3],"wb"))!=NULL))
 	  {
		 inputCode[0] = fgetc(pfIn);
		 inputCode[1] = fgetc(pfIn);
		 inputCode[2] = fgetc(pfIn);

		 if ((inputCode[0] != 0xef)||(inputCode[1] != 0xbb)||(inputCode[2] != 0xbf))
		 {
			printf("警告:输入文件没有BOM标识\n");
				// 默认按照UTF8
			fseek( pfIn, 0, SEEK_SET); // 指针回到文件头
		 }
		 
		 // 从输入文件读入UTF8数据,转换为GB18030格式后写入输出文件
		 while (!feof(pfIn))
		 {// 输入文件未结束
		  // 从输入文件读入UTF8数据
		  inputCode[0] = fgetc(pfIn);
		  u32InputLen  = 1 ;

		  if (inputCode[0] > 0x80 )
		  {
			i = 0x40;
			while (i & inputCode[0])
			{
				u32InputLen++;
				i = (i >> 1);
			}
		  } 
		  
		  if( u32InputLen > 3 )
		  {
			if (!feof(pfIn))
			{
				printf( "\n不支持格式的输入文件" );
			}
			break;
		  }
		  
		  for( i = 1; i < u32InputLen ; i++)
		  {
		  	inputCode[i] =  fgetc(pfIn);
		  }	

		  if( ferror( pfIn ) )
		  {
			printf( "\n输入文件未知错误" );
			break;
		  }
		  // 转换为GB18030格式后写入输出文件
		  Utf8TOGb18030(inputCode, u32InputLen, outputCode, &u32OutputLen);
		  for (i = 0; i < u32OutputLen; i++)
		  {
			fputc(outputCode[i],pfOut);
		  }
		 }
		fclose(pfIn);
		fclose(pfOut);
		return 0;

		}
	  else
	  {
		printf("输入%s(%X)或输出文件%s(%X)不存在\n",argv[2],(S32)pfIn,argv[3],(S32)pfOut);
		return 5;
	  }
		
	}		
#endif

	{
		printf("命令格式: convertCode UTF16_GB utftxt(文件名) gbtxt(文件名)\n");
		return 1;
	}
	
}

#if (_UTF16LE_GB || _UTF16BE_GB)
S8 Utf16TOGb18030(U16* pu16InputCode, U8* pu8OutputCode, U32* pu32OutputLen)
{
	U16 u16InData,u16SeekResult,u16OutData;
	U8	u8DataH,u8DataL;
	
	
	// 取得输入的UTF16数据,并分成高低位
	u16InData = *(pu16InputCode);
	u8DataH	  = u16InData >> 8;
	u8DataL   = u16InData & 0xff;
	
	// 读出转换表结果
	u16OutData = TB_UTF16_GB[u8DataH][u8DataL];
	
	// 单字节以及无对应结果的情况判断
	if (u16OutData <= 0xff)
	{
		if ((u16OutData == 0)&&(u16InData != 0))
		{	// 无对应结果
			* pu32OutputLen = 0;
			return CONVERT_ERROR;
		}
		else
		{	// 单字节结果
			* pu32OutputLen = 1;
			pu8OutputCode[0] = (U8)u16OutData;
			return CONVERT_OK;
		}
	}

	// 4字节的情况判断
	if((u16InData >= 0x3400 )&&(u16InData <= 0x4db5))
	{
		u16SeekResult = SearchCodeTable(u16InData);
		if (u16SeekResult > 0)
		{
			* pu32OutputLen = 4;
			pu8OutputCode[0]  =  u16OutData >> 8;
			pu8OutputCode[1]  =  u16OutData & 0xff;
			pu8OutputCode[2]  =  u16SeekResult >> 8;
			pu8OutputCode[3]  =  u16SeekResult & 0xff;
			return CONVERT_OK;
		} 
		else
		{// 无对应结果
			* pu32OutputLen = 0;
			return CONVERT_ERROR;
		}
	}

	// 余下的为2字节的情况
	* pu32OutputLen = 2;
	pu8OutputCode[0]  =  u16OutData >> 8;
	pu8OutputCode[1]  =  u16OutData & 0xff;
	return CONVERT_OK;


}


U16 SearchCodeTable(U16 unicodeKey)
{
    int first = 0;
    int end = sizeof(TB_UTF16_GB_4B)/sizeof(UTF16_GB_4B) - 1;
    int mid = 0;

    while (first <= end)
    {
        mid = (first + end) / 2;

        if (TB_UTF16_GB_4B[mid].unicode == unicodeKey)
        {
            return TB_UTF16_GB_4B[mid].gb;
        }
        else if (TB_UTF16_GB_4B[mid].unicode > unicodeKey)
        {

			end = mid - 1;
        }
        else 
        {
            first = mid + 1;
        }
    }
    return 0;
}
#endif   

#if (_UTF8_GB)
S8 Utf8TOGb18030(U8* pu8InputCode, U32 u32InputLen, U8* pu8OutputCode, U32* pu32OutputLen)
{
	U16 u16InData,u16SeekResult,u16OutData;
	U8	u8DataH,u8DataL;
	
	switch(u32InputLen)
	{
		case 1:			// 
			u8DataH   = 0;
			u8DataL = pu8InputCode[0]	;
			u16InData = (U16)u8DataL;
			
			if(u8DataL > 0x7f)
			{
				* pu32OutputLen = 0;
				return INPUTCODE_ERROR;
			}		
			else
			{
				* pu32OutputLen = 1;
				pu8OutputCode[0] = u8DataL;
				return CONVERT_OK;
			}
			break;
		
		case 2:	
          	u8DataH = (pu8InputCode[0] & 0x1f) >> 2;
          	u8DataL = ((pu8InputCode[0] & 0x3) << 6) + (pu8InputCode[1] & 0x3f);
          	u16InData = (u8DataH << 8) + u8DataL;
          	break;
		
		case 3:
			u8DataH = ((pu8InputCode[0] & 0x0F) << 4) | ((pu8InputCode[1] >> 2) & 0x0F);
            u8DataL = ((pu8InputCode[1] & 0x03) << 6) + (pu8InputCode[2] & 0x3F);
           	u16InData = (u8DataH << 8) + u8DataL;
            break;
            
		default:
			* pu32OutputLen = 0;
			return INPUTLEN_ERROR;
	}	
	
	// 读出转换表结果
	u16OutData = TB_UTF16_GB[u8DataH][u8DataL];
	
	// 单字节以及无对应结果的情况判断
	if (u16OutData <= 0xff)
	{
		if ((u16OutData == 0)&&(u16InData != 0))
		{	// 无对应结果
			* pu32OutputLen = 0;
			return CONVERT_ERROR;
		}
		else
		{	// 单字节结果
			* pu32OutputLen = 1;
			pu8OutputCode[0] = (U8)u16OutData;
			return CONVERT_OK;
		}
	}

	// 4字节的情况判断
	if((u16InData >= 0x3400 )&&(u16InData <= 0x4db5))
	{
		u16SeekResult = SearchCodeTable(u16InData);
		if (u16SeekResult > 0)
		{
			* pu32OutputLen = 4;
			pu8OutputCode[0]  =  u16OutData >> 8;
			pu8OutputCode[1]  =  u16OutData & 0xff;
			pu8OutputCode[2]  =  u16SeekResult >> 8;
			pu8OutputCode[3]  =  u16SeekResult & 0xff;
			return CONVERT_OK;
		} 
		else
		{// 无对应结果
			* pu32OutputLen = 0;
			return CONVERT_ERROR;
		}
	}

	// 余下的为2字节的情况
	* pu32OutputLen = 2;
	pu8OutputCode[0]  =  u16OutData >> 8;
	pu8OutputCode[1]  =  u16OutData & 0xff;
	return CONVERT_OK;
	
}

#endif

#if (_BIG5_GB)
S8 Big5TOGb18030(U8* pu8InputCode, U32 u32InputLen, U8* pu8OutputCode, U32* pu32OutputLen)
{
	U16 u16InData,u16SeekResult,u16OutData;
	U8	u8DataH,u8DataL;

	// 参数检查,默认需将u32InputLen置为2,即本函数只转换一个Big5汉字字符
	if(u32InputLen != 2)	
	{	// 不是默认参数,如果是ASCII码,直接转换,否则报错
		if((u32InputLen == 1)&&(pu8InputCode[0] < 0x80))
		{
			* pu32OutputLen = 1;
			pu8OutputCode[0]  = pu8InputCode[0];
			return CONVERT_OK;
		}	
		return INPUTLEN_ERROR;
	}		
	
	// 取得输入的UTF16数据,并分成高低位
	u16InData = *(pu16InputCode);
	u8DataH	  = u16InData >> 8;
	u8DataL   = u16InData & 0xff;
	
	// 读出转换表结果
	u16OutData = TB_UTF16_GB[u8DataH][u8DataL];
	
	// 单字节以及无对应结果的情况判断
	if (u16OutData <= 0xff)
	{
		if ((u16OutData == 0)&&(u16InData != 0))
		{	// 无对应结果
			* pu32OutputLen = 0;
			return CONVERT_ERROR;
		}
		else
		{	// 单字节结果
			* pu32OutputLen = 1;
			pu8OutputCode[0] = (U8)u16OutData;
			return CONVERT_OK;
		}
	}

	// 4字节的情况判断
	if((u16InData >= 0x3400 )&&(u16InData <= 0x4db5))
	{
		u16SeekResult = SearchCodeTable(u16InData);
		if (u16SeekResult > 0)
		{
			* pu32OutputLen = 4;
			pu8OutputCode[0]  =  u16OutData >> 8;
			pu8OutputCode[1]  =  u16OutData & 0xff;
			pu8OutputCode[2]  =  u16SeekResult >> 8;
			pu8OutputCode[3]  =  u16SeekResult & 0xff;
			return CONVERT_OK;
		} 
		else
		{// 无对应结果
			* pu32OutputLen = 0;
			return CONVERT_ERROR;
		}
	}

	// 余下的为2字节的情况
	* pu32OutputLen = 2;
	pu8OutputCode[0]  =  u16OutData >> 8;
	pu8OutputCode[1]  =  u16OutData & 0xff;
	return CONVERT_OK;


}
#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -