📄 u2utf8.c
字号:
#include <stdio.h>
#define ENDIAN 0
/*we only support utf-8 that the byte num of uf-8 is less than or equal to 4, so we use unsigned int.*/
unsigned int u2utf8(unsigned short uni)
{
unsigned int utf8;
if(uni < 0x80)
{
utf8 = uni;
return utf8;
}
if(uni < 0x800)
{
utf8 = (0xc0 | (uni >> 6)) << 8
| (0x80 | (uni & 0x3f));
return utf8;
}
if(uni < 0x10000)
{
utf8 = (0xe0 | (uni>>12)) << 16
| (0x80 | (uni >> 6 & 0x3f)) << 8
| (0x80 | (uni & 0x3f));
return utf8;
}
if(uni < 0x20000)
{
utf8 = (0xf0 | (uni >> 18)) << 24
| (0x80 | (uni >> 12 & 0x3f)) << 16
| (0x80 | (uni >> 6 & 0x3f)) << 8
| (0x80 | (uni & 0x3f));
return utf8;
}
else
{
/*we don't deal with it, so we return the unicode.*/
return uni;
}
}
/*e.g. 7063---> 7*0x1000 + 0*0x100 + 6*0x10 + 3*/
unsigned int dec16(unsigned int c)
{
/*
return (c >> 12) * 0x1000
+ (c >> 8 & 0x0f) * 0x100
+ (c >> 4 & 0x0f) * 0x10
+ (c & 0x0f);
*/
return (c >> 12) << 12
+ (c >> 8 & 0x0f) << 8
+ (c >> 4 & 0x0f) << 4
+ (c & 0x0f);
}
int main(int argc, char *argv[])
{
unsigned int utf8;
unsigned short uni;
unsigned int c = 0;
char *p;
int cc = 0;
int count = 0, cou = 0;
if(argc < 2)
{
printf("\nusage: u2utf8 hex( >= 0) \n\te.g. u2utf 7063\n\n");
return -1;
}
if(argv[1][0] == '-')
{
printf("\n negative is not allowed!");
return -1;
}
if(argv[1][0] == '0' && (argv[1][1] == 'x' || (argv[1][1] == 'X')))
{
p = argv[1] + 2;
while(*p)
{
count ++;
//printf("%x \t", *p);
p ++;
}
p--;
if(count <= 0)
{
printf("illegal!\n");
return;
}
//printf("count = %d\n", count);
for(;count > 0; count --)
{
//printf("count = %d\n", count);
if(*p >= '0' && *p <= '9')
{
cc = *p - '0';
}
else if(*p >= 'A' && *p <= 'F')
{
cc = *p - 'A' + 10;
}
else if(*p >= 'a' && *p <= 'f')
{
cc = *p - 'a' + 10;
}
else
{
printf("illegal char\n");
return -1;
}
c |= cc << (4 * (cou ++));
p --;
}
printf("%x \t", c);
if(c > 0xffff)
{
printf("\n%d is too big!\n");
return -1;
}
uni = c;
}
else
{
c = atoi(argv[1]);
if(c > 0xffff)
{
printf("\n%d is too big!\n");
return -1;
}
uni = c;
}
#if ENDIAN
uni = (uni << 8) | (uni >> 8);
#endif
utf8 = u2utf8(uni);
printf("u-%x--->utf-8:0x%x\n", uni, utf8);
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -