📄 utf_unicode_convert.c
字号:
/*
@file : utf_unicode_convert.c
@brief: code convert
Author: ******
Version: V1.0
Copyright XXXXXXXXXXXXXX Inc. All rights reserved.
@date: 2008-11-24
*/
#include <stdio.h>
#include <stdlib.h>
#include "utf8toucs2.h"
#include "gbk_to_unicode_table.h"
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
void UTF_8ToUnicode(wchar_t* pOut,char *pText)
{
char* uchar = (char *)pOut;
uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);
return;
}
void UnicodeToUTF_8(char* pOut,wchar_t* pText)
{
// WCHAR is little endian
char* pchar = (char *)pText;
pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6);
pOut[2] = (0x80 | (pchar[0] & 0x3F));
return;
}
unsigned short gbk_to_unicode(unsigned short input)
{
unsigned char hight = (input & 0xFF00 ) >> 8;
unsigned short unicode = 0;
int fd = 0;
// unsigned char low = (input & 0x00FF);
if(hight>=0x81) {
// printf("input = %d\n", input-0x8140);
#if 0
return gbkToUnicode[input-0x8140];
#else
fd = open("./gbk2ucs.dat", O_RDONLY);
if(fd < 0)
{
printf("open gbk2ucs.data error, %s\n", strerror(errno));
return 0;
}
lseek(fd, (input-0x8140)<<1, SEEK_SET);
read(fd, (char*)&unicode, 2);
close(fd);
return unicode;
#endif
}
else return 0;
}
ucs2 * gbkto_ucs2(const char* src, ucs2 * pUCS2buf, int UCS2bufSize)
{
const unsigned char *pGBKStr = (const unsigned char *) src;
int ucs2Index=0;
while(1) {
if( ((*pGBKStr)&0x80)==0x00 )
{
pUCS2buf[ucs2Index] =*(pGBKStr);
// printf("1. gbk=%02x ucs2=%02x \n", *(pGBKStr),pUCS2buf[ucs2Index]);
pGBKStr++;
if(pUCS2buf[ucs2Index++] == '\0') {
//printf("\n");
return pUCS2buf;
}
}
else {
pUCS2buf[ucs2Index] =gbk_to_unicode(((*(pGBKStr)<<8) | (*(pGBKStr+1))));
//printf("2. gbk=%02x ucs2=%02x \n", ((*(pGBKStr)<<8) | (*(pGBKStr+1))),pUCS2buf[ucs2Index]);
ucs2Index++;
pGBKStr += 2;
}
if(ucs2Index>=UCS2bufSize) {
//printf("3. ucs2Index = %d, UCS2bufSize = %d \n ",ucs2Index,UCS2bufSize);
pUCS2buf[ucs2Index-1] = '\0';
return pUCS2buf;
}
}
}
int gbk_length(const char* src)
{
const unsigned char *pGBKStr = (const unsigned char *) src;
int chrnum=0;
int c=10000;
while(c--) {
if( ((*pGBKStr)&0x80)==0x00 )
{
chrnum++;
if(*pGBKStr == '\0')
return chrnum-1;
pGBKStr++;
}
else {
pGBKStr += 2;
chrnum++;
}
}
return chrnum;
}
ucs2 * utf8_decode(const utf8 * pUTF8str, ucs2 * pUCS2buf, int UCS2bufSize)
{
const utf8 *putf8 = pUTF8str;
int ucs2Index=0;
while (1) {
if ((putf8[0] & 0x80) == 0x00) {
pUCS2buf[ucs2Index] = putf8[0];
putf8++;
if(pUCS2buf[ucs2Index++] == '\0')
return pUCS2buf;
}
else if ((putf8[0] & 0xe0) == 0xc0 &&
(putf8[1] & 0xc0) == 0x80) {
pUCS2buf[ucs2Index++] =((putf8[0] & 0x1fL) << 6)|((putf8[1] & 0x3fL) << 0);
putf8 += 2;
}
else if ((putf8[0] & 0xf0) == 0xe0 &&
(putf8[1] & 0xc0) == 0x80 &&
(putf8[2] & 0xc0) == 0x80) {
pUCS2buf[ucs2Index++] =((putf8[0] & 0x0fL) << 12) |
((putf8[1] & 0x3fL) << 6) |
((putf8[2] & 0x3fL) << 0);
putf8 += 3;
}
else
return pUCS2buf;
if(ucs2Index>=UCS2bufSize)
return pUCS2buf;
}
};
int utf8_size(const utf8 * pUTF8str)
{
const utf8 *putf8 = pUTF8str;
int Length=0;
while (1) {
if ((putf8[Length] & 0x80) == 0x00) {
if(putf8[Length++] == '\0')
return Length;
}
else if ((putf8[Length] & 0xe0) == 0xc0 &&
(putf8[Length+1] & 0xc0) == 0x80) {
Length += 2;
}
else if ((putf8[Length] & 0xf0) == 0xe0 &&
(putf8[Length+1] & 0xc0) == 0x80 &&
(putf8[Length+2] & 0xc0) == 0x80) {
Length += 3;
}
else
return Length;
}
};
int utf8_length(const utf8 * pUTF8str)
{
const utf8 *putf8 = pUTF8str;
int Length=0;
int chNum=0;
while (1) {
if ((putf8[Length] & 0x80) == 0x00) {
chNum++;
if(putf8[Length++] == '\0')
return chNum-1;
}
else if ((putf8[Length] & 0xe0) == 0xc0 &&
(putf8[Length+1] & 0xc0) == 0x80) {
Length += 2;
chNum++;
}
else if ((putf8[Length] & 0xf0) == 0xe0 &&
(putf8[Length+1] & 0xc0) == 0x80 &&
(putf8[Length+2] & 0xc0) == 0x80) {
Length += 3;
chNum++;
}
else
return chNum;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -