📄 warunicode.c
字号:
#include "StdAfx.h"#include "WarUnicode.h"/* From RFC 2640 */size_t war_ucs4_to_utf8(const wchar_t *ucs4_buf, size_t ucs4_len, unsigned char *utf8_buf) { const wchar_t *ucs4_endbuf = ucs4_buf + ucs4_len; size_t utf8_len = 0; // return value for UTF8 size unsigned char *t_utf8_buf = utf8_buf; // Temporary pointer // to load UTF8 values while (ucs4_buf != ucs4_endbuf) { if ( *ucs4_buf <= 0x7F) // ASCII chars no conversion needed { *t_utf8_buf++ = (unsigned char) *ucs4_buf; utf8_len++; ucs4_buf++; } else if ( *ucs4_buf <= 0x07FF ) // In the 2 byte utf-8 range { *t_utf8_buf++= (unsigned char) (0xC0 + (*ucs4_buf/0x40)); *t_utf8_buf++= (unsigned char) (0x80 + (*ucs4_buf%0x40)); utf8_len+=2; ucs4_buf++; } else if ( *ucs4_buf <= 0xFFFF ) /* In the 3 byte utf-8 range. The values 0x0000FFFE, 0x0000FFFF and 0x0000D800 - 0x0000DFFF do not occur in UCS-4 */ { *t_utf8_buf++= (unsigned char) (0xE0 + (*ucs4_buf/0x1000)); *t_utf8_buf++= (unsigned char) (0x80 + ((*ucs4_buf/0x40)%0x40)); *t_utf8_buf++= (unsigned char) (0x80 + (*ucs4_buf%0x40)); utf8_len+=3; ucs4_buf++; } else if ( *ucs4_buf <= 0x1FFFFF ) //In the 4 byte utf-8 range { *t_utf8_buf++= (unsigned char) (0xF0 + (*ucs4_buf/0x040000)); *t_utf8_buf++= (unsigned char) (0x80 + ((*ucs4_buf/0x10000)%0x40)); *t_utf8_buf++= (unsigned char) (0x80 + ((*ucs4_buf/0x40)%0x40)); *t_utf8_buf++= (unsigned char) (0x80 + (*ucs4_buf%0x40)); utf8_len+=4; ucs4_buf++; } else if ( *ucs4_buf <= 0x03FFFFFF )//In the 5 byte utf-8 range { *t_utf8_buf++= (unsigned char) (0xF8 + (*ucs4_buf/0x01000000)); *t_utf8_buf++= (unsigned char) (0x80 + ((*ucs4_buf/0x040000)%0x40)); *t_utf8_buf++= (unsigned char) (0x80 + ((*ucs4_buf/0x1000)%0x40)); *t_utf8_buf++= (unsigned char) (0x80 + ((*ucs4_buf/0x40)%0x40)); *t_utf8_buf++= (unsigned char) (0x80 + (*ucs4_buf%0x40)); utf8_len+=5; ucs4_buf++; } else if ( *ucs4_buf <= 0x7FFFFFFF )//In the 6 byte utf-8 range { *t_utf8_buf++= (unsigned char) (0xF8 +(*ucs4_buf/0x40000000)); *t_utf8_buf++= (unsigned char) (0x80 + ((*ucs4_buf/0x01000000)%0x40)); *t_utf8_buf++= (unsigned char) (0x80 + ((*ucs4_buf/0x040000)%0x40)); *t_utf8_buf++= (unsigned char) (0x80 + ((*ucs4_buf/0x1000)%0x40)); *t_utf8_buf++= (unsigned char) (0x80 + ((*ucs4_buf/0x40)%0x40)); *t_utf8_buf++= (unsigned char) (0x80 + (*ucs4_buf%0x40)); utf8_len+=6; ucs4_buf++; } } return (utf8_len);}/* From RFC 2640 */int war_utf8_to_ucs4(wchar_t *ucs4_buf, size_t utf8_len, const unsigned char *utf8_buf){ const unsigned char *utf8_endbuf = utf8_buf + utf8_len; size_t ucs_len=0; while (utf8_buf != utf8_endbuf) { if ((*utf8_buf & 0x80) == 0x00) /*ASCII chars no conversion needed */ { *ucs4_buf++ = (wchar_t) *utf8_buf; utf8_buf++; ucs_len++; } else if ((*utf8_buf & 0xE0)== 0xC0) //In the 2 byte utf-8 range { *ucs4_buf++ = (wchar_t) (((*utf8_buf - 0xC0) * 0x40) + ( *(utf8_buf+1) - 0x80)); utf8_buf += 2; ucs_len++; } else if ( (*utf8_buf & 0xF0) == 0xE0 ) /*In the 3 byte utf-8 range */ { *ucs4_buf++ = (wchar_t) (((*utf8_buf - 0xE0) * 0x1000) + (( *(utf8_buf+1) - 0x80) * 0x40) + ( *(utf8_buf+2) - 0x80)); utf8_buf+=3; ucs_len++; } else if ((*utf8_buf & 0xF8) == 0xF0) /* In the 4 byte utf-8 range */ { *ucs4_buf++ = (wchar_t) (((*utf8_buf - 0xF0) * 0x040000) + (( *(utf8_buf+1) - 0x80) * 0x1000) + (( *(utf8_buf+2) - 0x80) * 0x40) + ( *(utf8_buf+3) - 0x80)); utf8_buf+=4; ucs_len++; } else if ((*utf8_buf & 0xFC) == 0xF8) /* In the 5 byte utf-8 range */ { *ucs4_buf++ = (wchar_t) (((*utf8_buf - 0xF8) * 0x01000000) + ((*(utf8_buf+1) - 0x80) * 0x040000) + (( *(utf8_buf+2) - 0x80) * 0x1000) + (( *(utf8_buf+3) - 0x80) * 0x40) + ( *(utf8_buf+4) - 0x80)); utf8_buf+=5; ucs_len++; } else if ((*utf8_buf & 0xFE) == 0xFC) /* In the 6 byte utf-8 range */ { *ucs4_buf++ = (wchar_t) (((*utf8_buf - 0xFC) * 0x40000000) + ((*(utf8_buf+1) - 0x80) * 0x010000000) + ((*(utf8_buf+2) - 0x80) * 0x040000) + (( *(utf8_buf+3) - 0x80) * 0x1000) + (( *(utf8_buf+4) - 0x80) * 0x40) + ( *(utf8_buf+5) - 0x80)); utf8_buf+=6; ucs_len++; } } return (ucs_len);}/* From RFC 2640 */int war_utf8_valid(const unsigned char *buf, size_t len){ const unsigned char *endbuf = buf + len; unsigned char byte2mask=0x00, c; int trailing = 0; while (buf != endbuf) { c = *buf++; if (trailing) { if ((c&0xC0) == 0x80) { if (byte2mask) if (c&byte2mask) byte2mask=0x00; else return 0; trailing--; } else return 0; } else { if ((c&0x80) == 0x00) continue; else if ((c&0xE0) == 0xC0) { if (c&0x1E) trailing =1; else return 0; } else if ((c&0xF0) == 0xE0) { if (!(c&0x0F)) byte2mask=0x20; trailing = 2; } else if ((c&0xF8) == 0xF0) { if (!(c&0x07)) byte2mask=0x30; trailing = 3; } else if ((c&0xFC) == 0xF8) { if (!(c&0x03)) byte2mask=0x38; trailing = 4; } else if ((c&0xFE) == 0xFC) { if (!(c&0x01)) byte2mask=0x3C; trailing = 5;} else return 0; } } return trailing == 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -