⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 u2utf8.c

📁 utf-8和unicode的互转的c代码
💻 C
字号:




#include <stdio.h>

#define ENDIAN 0
/*we only support utf-8 that the byte num of uf-8 is less than or equal to 4, so we use unsigned int.*/

unsigned int u2utf8(unsigned short uni)
{
	unsigned int utf8;
	if(uni < 0x80)
	{
		utf8 = uni;
		return utf8;
	}
	if(uni < 0x800)
	{
		utf8 = (0xc0 | (uni >> 6)) << 8
			| (0x80 | (uni & 0x3f));

		return utf8;
	}
	if(uni < 0x10000)
	{
		utf8 = (0xe0 | (uni>>12)) << 16
			| (0x80 | (uni >> 6 & 0x3f)) << 8
			| (0x80 | (uni & 0x3f));
		return utf8;
	}
	if(uni < 0x20000)
	{
		utf8 = (0xf0 | (uni >> 18)) << 24
			| (0x80 | (uni >> 12 & 0x3f)) << 16
			| (0x80 | (uni >> 6 & 0x3f)) << 8
			| (0x80 | (uni & 0x3f));
		return utf8;
	}
	else
	{
		/*we don't deal with it, so we return the unicode.*/
		return uni;
	}

}

/*e.g. 7063---> 7*0x1000 + 0*0x100 + 6*0x10 + 3*/
unsigned int dec16(unsigned int c)
{
/*
	return (c >> 12) * 0x1000
		+ (c >> 8 & 0x0f) * 0x100
		+ (c >> 4 & 0x0f) * 0x10
		+ (c & 0x0f);
*/
	return (c >> 12) << 12
		+ (c >> 8 & 0x0f) << 8
		+ (c >> 4 & 0x0f) << 4
		+ (c & 0x0f);
}



int main(int argc, char *argv[])
{
	unsigned int utf8;
	unsigned short uni;
	unsigned int c = 0;
	char *p;
	int cc = 0;
	int count = 0, cou = 0;
	if(argc < 2)
	{
		printf("\nusage: u2utf8 hex( >= 0) \n\te.g. u2utf 7063\n\n");
		return -1;
	}
	if(argv[1][0] == '-')
	{
		printf("\n negative is not allowed!");
		return -1;
	}
	if(argv[1][0] == '0' && (argv[1][1] == 'x' || (argv[1][1] == 'X')))
	{
		p = argv[1] + 2;
		while(*p)
		{
			count ++;
			//printf("%x \t", *p);
			p ++;

		}
		p--;
		if(count <= 0)
		{
			printf("illegal!\n");
			return;
		}
		//printf("count = %d\n", count);
		for(;count > 0; count --)
		{
			//printf("count = %d\n", count);
			if(*p >= '0' && *p <= '9')
			{
				cc = *p - '0';
			}
			else if(*p >= 'A' && *p <= 'F')
			{
				cc = *p - 'A' + 10;
			}
			else if(*p >= 'a' && *p <= 'f')
			{
				cc = *p - 'a' + 10;
			}
			else
			{
				printf("illegal char\n");
				return -1;
			}

			c |= cc << (4 * (cou ++));
			p --;
		}
		printf("%x \t", c);
		if(c > 0xffff)
		{
			printf("\n%d is too big!\n");
			return -1;
		}
		uni = c;
	}
	else
	{
		c = atoi(argv[1]);
		if(c > 0xffff)
		{
			printf("\n%d is too big!\n");
			return -1;
		}
		uni = c;
	}

#if ENDIAN
	uni = (uni << 8) | (uni >> 8);
#endif
	utf8 = u2utf8(uni);
	printf("u-%x--->utf-8:0x%x\n", uni, utf8);
	return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -