📄 convertcode.c
字号:
// convertCode.cpp : Defines the entry point for the console application.
//
/***************************************************************************
* Copyright (c) 2008, .......
* All rights reserved.
*
* 文件名称:convertCode.c
* 文件标识:
* 摘 要:汉字内码转换处理函数
* @main()
* @Utf16TOGb18030()
* @Utf8TOGb18030()
* 当前版本:1.0
* 作 者:CZ
* 完成日期:
*
* 修改与优化处理:
**************************************************************************/
#include "stdafx.h"
#define _UTF16BE_GB 1
#define _UTF16LE_GB 1
#define _UTF8_GB 1
#define _BIG5_GB 1
#if (_UTF16LE_GB || _UTF16BE_GB)
#include ".\tools1\UTF16_GB_2B_H.h"
#include ".\tools1\UTF16_GB_4B_H.h"
#endif
#if (_BIG5_GB)
#include ".\tools2\BIG5_GB_2B_H.h"
#endif
U16 SearchCodeTable(U16 unicodeKey);
S8 Utf16TOGb18030(U16* pu16InputCode, U8* pu8OutputCode, U32* pu32OutputLen);
S8 Utf8TOGb18030(U8* pu8InputCode, U32 u32InputLen, U8* pu8OutputCode, U32* pu32OutputLen);
// 命令格式: convertCode UTF16_GB utftxt gbtxt
int main(int argc, char* argv[])
{
FILE * pfIn, * pfOut;
U16 u16Indata;
U8 outputCode[8],inputCode[4];
U32 u32OutputLen,i,u32InputLen;
#if (_UTF8_GB) // 优先编译此处
argc = 4;
argv[1] = "UTF8_GB";
argv[2] = "U8TEST.txt";
argv[3] = "GB.TXT";
#elif (_UTF16BE_GB)
argc = 4;
argv[1] = "UTF16BE_GB";
argv[2] = "U16TESTB.txt";
argv[3] = "GB.TXT";
#elif (_UTF16LE_GB)
argc = 4;
argv[1] = "UTF16_GB";
argv[2] = "U16TEST.txt";
argv[3] = "GB.TXT";
#endif
if(argc != 4)/* if(0)!! */
{
printf("命令格式: convertCode UTF16_GB utftxt(文件名) gbtxt(文件名)\n");
return 1;
}
#if (_UTF16LE_GB)
if (strcmp(argv[1],"UTF16_GB") == 0) /* !! if (1)*/
{
if (((pfIn = fopen(argv[2],"rb"))!= NULL)&& ((pfOut = fopen(argv[3],"wb"))!=NULL))
//if (((pfIn = fopen("U16TEST.txt","rb"))!= NULL)&& ((pfOut = fopen("GB.TXT","wb"))!=NULL))
{
inputCode[0] = fgetc(pfIn);
inputCode[1] = fgetc(pfIn);
if ((inputCode[0] != 0xff)||(inputCode[1] != 0xfE))
{
if ((inputCode[0] == 0xfe)&&(inputCode[1] == 0xff))
{
printf("输入文件为BigEndian格式,请使用UTF16BE_GB标识转换\n");
return 2;
}
else
{
printf("警告:输入文件没有BOM标识\n");
// 默认按照Little Endian
fseek( pfIn, 0, SEEK_SET); // 文件指针回到文件头
}
}
// 从输入文件读入UTF16数据,转换为GB18030格式后写入输出文件
while (!feof(pfIn))
{// 输入文件未结束
// 从输入文件读入UTF16数据
inputCode[0] = fgetc(pfIn);
inputCode[1] = fgetc(pfIn);
u16Indata =inputCode[0] + (inputCode[1]<<8);
if( ferror( pfIn ) )
{
printf( "\nRead error" );
break;
}
// 转换为GB18030格式后写入输出文件
Utf16TOGb18030(&u16Indata, outputCode, &u32OutputLen);
for (i = 0; i < u32OutputLen; i++)
{
fputc(outputCode[i],pfOut);
}
}
fclose(pfIn);
fclose(pfOut);
return 0;
}
else
{
printf("输入%s(%X)或输出文件%s(%X)不存在\n",argv[2],(S32)pfIn,argv[3],(S32)pfOut);
return 3;
}
}
#endif
#if (_UTF16BE_GB)
if(strcmp(argv[1],"UTF16BE_GB") == 0)
{
if (((pfIn = fopen(argv[2],"rb"))!= NULL)&& ((pfOut = fopen(argv[3],"wb"))!=NULL))
{
inputCode[0] = fgetc(pfIn);
inputCode[1] = fgetc(pfIn);
if ((inputCode[0] != 0xfe)||(inputCode[1] != 0xff))
{
if ((inputCode[0] == 0xff)&&(inputCode[1] == 0xfe))
{
printf("输入文件为LittleEndian格式,请使用UTF16_GB标识转换\n");
return 4;
}
else
{
printf("警告:输入文件没有BOM标识\n");
// 默认按照BIG Endian
fseek( pfIn, 0, SEEK_SET); // 指针回到文件头
}
}
// 从输入文件读入UTF16数据,转换为GB18030格式后写入输出文件
while (!feof(pfIn))
{// 输入文件未结束
// 从输入文件读入UTF16数据
inputCode[0] = fgetc(pfIn);
inputCode[1] = fgetc(pfIn);
u16Indata =inputCode[1] + (inputCode[0]<<8);
if( ferror( pfIn ) )
{
printf( "\nRead error" );
break;
}
// 转换为GB18030格式后写入输出文件
Utf16TOGb18030(&u16Indata, outputCode, &u32OutputLen);
for (i = 0; i < u32OutputLen; i++)
{
fputc(outputCode[i],pfOut);
}
}
fclose(pfIn);
fclose(pfOut);
return 0;
}
else
{
printf("输入%s(%X)或输出文件%s(%X)不存在\n",argv[2],(S32)pfIn,argv[3],(S32)pfOut);
return 5;
}
}
#endif
#if (_UTF8_GB)
if(strcmp(argv[1],"UTF8_GB") == 0)
{
if (((pfIn = fopen(argv[2],"rb"))!= NULL)&& ((pfOut = fopen(argv[3],"wb"))!=NULL))
{
inputCode[0] = fgetc(pfIn);
inputCode[1] = fgetc(pfIn);
inputCode[2] = fgetc(pfIn);
if ((inputCode[0] != 0xef)||(inputCode[1] != 0xbb)||(inputCode[2] != 0xbf))
{
printf("警告:输入文件没有BOM标识\n");
// 默认按照UTF8
fseek( pfIn, 0, SEEK_SET); // 指针回到文件头
}
// 从输入文件读入UTF8数据,转换为GB18030格式后写入输出文件
while (!feof(pfIn))
{// 输入文件未结束
// 从输入文件读入UTF8数据
inputCode[0] = fgetc(pfIn);
u32InputLen = 1 ;
if (inputCode[0] > 0x80 )
{
i = 0x40;
while (i & inputCode[0])
{
u32InputLen++;
i = (i >> 1);
}
}
if( u32InputLen > 3 )
{
if (!feof(pfIn))
{
printf( "\n不支持格式的输入文件" );
}
break;
}
for( i = 1; i < u32InputLen ; i++)
{
inputCode[i] = fgetc(pfIn);
}
if( ferror( pfIn ) )
{
printf( "\n输入文件未知错误" );
break;
}
// 转换为GB18030格式后写入输出文件
Utf8TOGb18030(inputCode, u32InputLen, outputCode, &u32OutputLen);
for (i = 0; i < u32OutputLen; i++)
{
fputc(outputCode[i],pfOut);
}
}
fclose(pfIn);
fclose(pfOut);
return 0;
}
else
{
printf("输入%s(%X)或输出文件%s(%X)不存在\n",argv[2],(S32)pfIn,argv[3],(S32)pfOut);
return 5;
}
}
#endif
{
printf("命令格式: convertCode UTF16_GB utftxt(文件名) gbtxt(文件名)\n");
return 1;
}
}
#if (_UTF16LE_GB || _UTF16BE_GB)
S8 Utf16TOGb18030(U16* pu16InputCode, U8* pu8OutputCode, U32* pu32OutputLen)
{
U16 u16InData,u16SeekResult,u16OutData;
U8 u8DataH,u8DataL;
// 取得输入的UTF16数据,并分成高低位
u16InData = *(pu16InputCode);
u8DataH = u16InData >> 8;
u8DataL = u16InData & 0xff;
// 读出转换表结果
u16OutData = TB_UTF16_GB[u8DataH][u8DataL];
// 单字节以及无对应结果的情况判断
if (u16OutData <= 0xff)
{
if ((u16OutData == 0)&&(u16InData != 0))
{ // 无对应结果
* pu32OutputLen = 0;
return CONVERT_ERROR;
}
else
{ // 单字节结果
* pu32OutputLen = 1;
pu8OutputCode[0] = (U8)u16OutData;
return CONVERT_OK;
}
}
// 4字节的情况判断
if((u16InData >= 0x3400 )&&(u16InData <= 0x4db5))
{
u16SeekResult = SearchCodeTable(u16InData);
if (u16SeekResult > 0)
{
* pu32OutputLen = 4;
pu8OutputCode[0] = u16OutData >> 8;
pu8OutputCode[1] = u16OutData & 0xff;
pu8OutputCode[2] = u16SeekResult >> 8;
pu8OutputCode[3] = u16SeekResult & 0xff;
return CONVERT_OK;
}
else
{// 无对应结果
* pu32OutputLen = 0;
return CONVERT_ERROR;
}
}
// 余下的为2字节的情况
* pu32OutputLen = 2;
pu8OutputCode[0] = u16OutData >> 8;
pu8OutputCode[1] = u16OutData & 0xff;
return CONVERT_OK;
}
U16 SearchCodeTable(U16 unicodeKey)
{
int first = 0;
int end = sizeof(TB_UTF16_GB_4B)/sizeof(UTF16_GB_4B) - 1;
int mid = 0;
while (first <= end)
{
mid = (first + end) / 2;
if (TB_UTF16_GB_4B[mid].unicode == unicodeKey)
{
return TB_UTF16_GB_4B[mid].gb;
}
else if (TB_UTF16_GB_4B[mid].unicode > unicodeKey)
{
end = mid - 1;
}
else
{
first = mid + 1;
}
}
return 0;
}
#endif
#if (_UTF8_GB)
S8 Utf8TOGb18030(U8* pu8InputCode, U32 u32InputLen, U8* pu8OutputCode, U32* pu32OutputLen)
{
U16 u16InData,u16SeekResult,u16OutData;
U8 u8DataH,u8DataL;
switch(u32InputLen)
{
case 1: //
u8DataH = 0;
u8DataL = pu8InputCode[0] ;
u16InData = (U16)u8DataL;
if(u8DataL > 0x7f)
{
* pu32OutputLen = 0;
return INPUTCODE_ERROR;
}
else
{
* pu32OutputLen = 1;
pu8OutputCode[0] = u8DataL;
return CONVERT_OK;
}
break;
case 2:
u8DataH = (pu8InputCode[0] & 0x1f) >> 2;
u8DataL = ((pu8InputCode[0] & 0x3) << 6) + (pu8InputCode[1] & 0x3f);
u16InData = (u8DataH << 8) + u8DataL;
break;
case 3:
u8DataH = ((pu8InputCode[0] & 0x0F) << 4) | ((pu8InputCode[1] >> 2) & 0x0F);
u8DataL = ((pu8InputCode[1] & 0x03) << 6) + (pu8InputCode[2] & 0x3F);
u16InData = (u8DataH << 8) + u8DataL;
break;
default:
* pu32OutputLen = 0;
return INPUTLEN_ERROR;
}
// 读出转换表结果
u16OutData = TB_UTF16_GB[u8DataH][u8DataL];
// 单字节以及无对应结果的情况判断
if (u16OutData <= 0xff)
{
if ((u16OutData == 0)&&(u16InData != 0))
{ // 无对应结果
* pu32OutputLen = 0;
return CONVERT_ERROR;
}
else
{ // 单字节结果
* pu32OutputLen = 1;
pu8OutputCode[0] = (U8)u16OutData;
return CONVERT_OK;
}
}
// 4字节的情况判断
if((u16InData >= 0x3400 )&&(u16InData <= 0x4db5))
{
u16SeekResult = SearchCodeTable(u16InData);
if (u16SeekResult > 0)
{
* pu32OutputLen = 4;
pu8OutputCode[0] = u16OutData >> 8;
pu8OutputCode[1] = u16OutData & 0xff;
pu8OutputCode[2] = u16SeekResult >> 8;
pu8OutputCode[3] = u16SeekResult & 0xff;
return CONVERT_OK;
}
else
{// 无对应结果
* pu32OutputLen = 0;
return CONVERT_ERROR;
}
}
// 余下的为2字节的情况
* pu32OutputLen = 2;
pu8OutputCode[0] = u16OutData >> 8;
pu8OutputCode[1] = u16OutData & 0xff;
return CONVERT_OK;
}
#endif
#if (_BIG5_GB)
S8 Big5TOGb18030(U8* pu8InputCode, U32 u32InputLen, U8* pu8OutputCode, U32* pu32OutputLen)
{
U16 u16InData,u16SeekResult,u16OutData;
U8 u8DataH,u8DataL;
// 参数检查,默认需将u32InputLen置为2,即本函数只转换一个Big5汉字字符
if(u32InputLen != 2)
{ // 不是默认参数,如果是ASCII码,直接转换,否则报错
if((u32InputLen == 1)&&(pu8InputCode[0] < 0x80))
{
* pu32OutputLen = 1;
pu8OutputCode[0] = pu8InputCode[0];
return CONVERT_OK;
}
return INPUTLEN_ERROR;
}
// 取得输入的UTF16数据,并分成高低位
u16InData = *(pu16InputCode);
u8DataH = u16InData >> 8;
u8DataL = u16InData & 0xff;
// 读出转换表结果
u16OutData = TB_UTF16_GB[u8DataH][u8DataL];
// 单字节以及无对应结果的情况判断
if (u16OutData <= 0xff)
{
if ((u16OutData == 0)&&(u16InData != 0))
{ // 无对应结果
* pu32OutputLen = 0;
return CONVERT_ERROR;
}
else
{ // 单字节结果
* pu32OutputLen = 1;
pu8OutputCode[0] = (U8)u16OutData;
return CONVERT_OK;
}
}
// 4字节的情况判断
if((u16InData >= 0x3400 )&&(u16InData <= 0x4db5))
{
u16SeekResult = SearchCodeTable(u16InData);
if (u16SeekResult > 0)
{
* pu32OutputLen = 4;
pu8OutputCode[0] = u16OutData >> 8;
pu8OutputCode[1] = u16OutData & 0xff;
pu8OutputCode[2] = u16SeekResult >> 8;
pu8OutputCode[3] = u16SeekResult & 0xff;
return CONVERT_OK;
}
else
{// 无对应结果
* pu32OutputLen = 0;
return CONVERT_ERROR;
}
}
// 余下的为2字节的情况
* pu32OutputLen = 2;
pu8OutputCode[0] = u16OutData >> 8;
pu8OutputCode[1] = u16OutData & 0xff;
return CONVERT_OK;
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -