u2utf8.c

来自「utf-8和unicode的互转的c代码」· C语言 代码 · 共 150 行

C
150
字号




#include <stdio.h>

#define ENDIAN 0
/*we only support utf-8 that the byte num of uf-8 is less than or equal to 4, so we use unsigned int.*/

unsigned int u2utf8(unsigned short uni)
{
	unsigned int utf8;
	if(uni < 0x80)
	{
		utf8 = uni;
		return utf8;
	}
	if(uni < 0x800)
	{
		utf8 = (0xc0 | (uni >> 6)) << 8
			| (0x80 | (uni & 0x3f));

		return utf8;
	}
	if(uni < 0x10000)
	{
		utf8 = (0xe0 | (uni>>12)) << 16
			| (0x80 | (uni >> 6 & 0x3f)) << 8
			| (0x80 | (uni & 0x3f));
		return utf8;
	}
	if(uni < 0x20000)
	{
		utf8 = (0xf0 | (uni >> 18)) << 24
			| (0x80 | (uni >> 12 & 0x3f)) << 16
			| (0x80 | (uni >> 6 & 0x3f)) << 8
			| (0x80 | (uni & 0x3f));
		return utf8;
	}
	else
	{
		/*we don't deal with it, so we return the unicode.*/
		return uni;
	}

}

/*e.g. 7063---> 7*0x1000 + 0*0x100 + 6*0x10 + 3*/
unsigned int dec16(unsigned int c)
{
/*
	return (c >> 12) * 0x1000
		+ (c >> 8 & 0x0f) * 0x100
		+ (c >> 4 & 0x0f) * 0x10
		+ (c & 0x0f);
*/
	return (c >> 12) << 12
		+ (c >> 8 & 0x0f) << 8
		+ (c >> 4 & 0x0f) << 4
		+ (c & 0x0f);
}



int main(int argc, char *argv[])
{
	unsigned int utf8;
	unsigned short uni;
	unsigned int c = 0;
	char *p;
	int cc = 0;
	int count = 0, cou = 0;
	if(argc < 2)
	{
		printf("\nusage: u2utf8 hex( >= 0) \n\te.g. u2utf 7063\n\n");
		return -1;
	}
	if(argv[1][0] == '-')
	{
		printf("\n negative is not allowed!");
		return -1;
	}
	if(argv[1][0] == '0' && (argv[1][1] == 'x' || (argv[1][1] == 'X')))
	{
		p = argv[1] + 2;
		while(*p)
		{
			count ++;
			//printf("%x \t", *p);
			p ++;

		}
		p--;
		if(count <= 0)
		{
			printf("illegal!\n");
			return;
		}
		//printf("count = %d\n", count);
		for(;count > 0; count --)
		{
			//printf("count = %d\n", count);
			if(*p >= '0' && *p <= '9')
			{
				cc = *p - '0';
			}
			else if(*p >= 'A' && *p <= 'F')
			{
				cc = *p - 'A' + 10;
			}
			else if(*p >= 'a' && *p <= 'f')
			{
				cc = *p - 'a' + 10;
			}
			else
			{
				printf("illegal char\n");
				return -1;
			}

			c |= cc << (4 * (cou ++));
			p --;
		}
		printf("%x \t", c);
		if(c > 0xffff)
		{
			printf("\n%d is too big!\n");
			return -1;
		}
		uni = c;
	}
	else
	{
		c = atoi(argv[1]);
		if(c > 0xffff)
		{
			printf("\n%d is too big!\n");
			return -1;
		}
		uni = c;
	}

#if ENDIAN
	uni = (uni << 8) | (uni >> 8);
#endif
	utf8 = u2utf8(uni);
	printf("u-%x--->utf-8:0x%x\n", uni, utf8);
	return 0;
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?