⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 unicodeutf8.c

📁 标准c代码
💻 C
字号:
#include <stdio.h>
#include <string.h>

/// UTF-8的unicode表示方法到unicode的值转换函数
int utf82unicode(unsigned int  byte[], int index, int count, char *out)
{
    int i, len=0;
	unsigned short unicode;

	for (i=index; i < count; ++i) 
	{

        if (index >= count) return len;
        if ( (byte[index] & 0x80) == 0x0)              //  一位
        {
                unicode = byte[index]; 
				index=index+1;

        }
         else if ((byte[index] & 0xE0) == 0xC0) // 两位
        {
                if (index + 1 >= count ) return len;
                unicode = (((int)(byte[index] & 0x1F)) << 6) 
                        | (byte[ index + 1] & 0x3F);
				index=index+2;
        } 
        else if ((byte[index] & 0xF0) == 0xE0) // 三位
        {
                if (index + 2 >= count) return len;
                unicode = (((int)(byte[index] & 0x0F)) << 12) 
                        | (((int)(byte[index  + 1] & 0x3F)) << 6) 
                        | (byte[index + 2] & 0x3F);
				index=index+3;
        }
         else if ((byte[index] & 0xF8) == 0xF0) // 四位
        {
                if (index + 3 >= count) return len;
                unicode = (((int)(byte[index] & 0x07)) << 18) 
                        | (((int)(byte[index + 1] & 0x3F)) << 12) 
                        | (((int)(byte[index + 2] & 0x3F)) << 6) 
                        | (byte[index + 3] & 0x3F);
				index=index+4;
        }
         else if ((byte[index] & 0xFC) == 0xF8) // 五位
        {
                if (index + 4 >= count) return len;
                unicode = (((int)(byte[index] & 0x03)) << 24) 
                        | (((int)(byte[index + 1] & 0x3F)) << 18) 
                        | (((int)(byte[index + 2] & 0x3F)) << 12) 
                        | (((int)(byte[index + 3] & 0x3F)) << 6) 
                        | (byte[index + 4] & 0x3F);
				index=index+5;
        }
         else if ((byte[index] & 0xFE) == 0xFC) // 六位
        {
                if (index + 5 >= count) return len;
                unicode = (((int)(byte[index] & 0x01)) << 30) 
                        | (((int)(byte[index + 1] & 0x3F)) << 24) 
                        | (((int)(byte[index + 2] & 0x3F)) << 18) 
                        | (((int)(byte[index + 3] & 0x3F)) << 12) 
                        | (((int)(byte[index + 4] & 0x3F)) << 6) 
                        | (byte[index + 5] & 0x3F);
				index=index+6;
        }
         else
         {
                return len;
        }

		memcpy(&out[len] ,(char *)&unicode , 2 );
		len=len+2;
	}

	return len;

}

int char2digist(char in, int *out)
{
        if ('0' <= in && in <= '9')
                *out = in - '0' + 0x0;
        else if ('A' <= in && in <= 'F')
                *out = in - 'A' + 0xA;
        else if ('a' <= in && in <= 'f')
                *out = in - 'a' + 0xa;
        else  
                return 0;

        return 1;

}


int widechar2hexbyte(char* ch, int index, int count, unsigned int *byte)
{
        int h, l;
        if (index + 1 < count) {
                if (char2digist(ch[index], &h) && char2digist(ch[index + 1], &l))
                {
                        *byte = ((unsigned int)(h << 4)) | l;
                        return 1;
                }
        } else {
                if (char2digist(ch[index], &l))
                {
                        *byte = l;
                        return 1;
                }
        }
        return 0;

}

int utf8unicode(char *src , char *out)
{
        int bi, i, len;
        unsigned int bytes[200];
		int j=0, outlen =0;

		bi = 0, len = strlen(src);
        for (i = 0; i < len && bi < 200; ++ i)
        {
                if (!widechar2hexbyte(src, i++, len, &bytes[bi++]))
                        return 1;
        }

		memset(out , 0 , sizeof(out));
		outlen =utf82unicode(bytes, 0, bi, out);


        return outlen;

}/*---------------------------------------------------*/


typedef   unsigned  short  uchar2;   

int Uni2UTF(uchar2 wchar, char *utf8)
{
 if (utf8 == NULL) {
  return -1;
 }
 int len = 0;
 int size_d = 8;
 
 if (wchar < 0x80)
 {  //
  //length = 1;
  utf8[len++] = (char)wchar;
 }
 else if(wchar < 0x800)
 {
  //length = 2;
  
  if (len + 1 >= size_d)
   return -1;
  
  utf8[len++] = 0xc0 | ( wchar >> 6 );
  utf8[len++] = 0x80 | ( wchar & 0x3f );
 }
 else if(wchar < 0x10000 )
 {
  //length = 3;
  if (len + 2 >= size_d)
   return -1;
  
  utf8[len++] = 0xe0 | ( wchar >> 12 );
  utf8[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
  utf8[len++] = 0x80 | ( wchar & 0x3f );
 }
 else if( wchar < 0x200000 ) 
 {
  //length = 4;
  if (len + 3 >= size_d)
   return -1;
  
  utf8[len++] = 0xf0 | ( (int)wchar >> 18 );
  utf8[len++] = 0x80 | ( (wchar >> 12) & 0x3f );
  utf8[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
  utf8[len++] = 0x80 | ( wchar & 0x3f );
 }
 return len;
}

int unicodeutf8(char *unic, int uniLen, char *utf8)
{
	int		i, len ;
	char *pUtf8 = utf8;

	uchar2 *pUni =(uchar2 *)unic;
	
	for (i =0; i< uniLen ; i=i+2 )
	{
		if( (len=Uni2UTF( *pUni, pUtf8 ) )<0) return -1;
		pUtf8 = pUtf8+len;
		pUni ++;
	}

	return (pUtf8 - utf8);//返回utf8长度
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -