📄 utf8.h
字号:
/* * This file is part of the Sofia-SIP package * * Copyright (C) 2005 Nokia Corporation. * * Contact: Pekka Pessi <pekka.pessi@nokia.com> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA * *//**@file sofia-sip/utf8.h * Encoding/Decoding Functions for UCS Transformation Format UTF-8. * * UTF-8 encoding codes the ISO 10646 (Unicode, UCS2 and UCS4) characters as * variable length (1 - 6 bytes) strings of 8-bit characters. * * @author Pekka Pessi <pekka.pessi@nokia.com> * * @date Created: Tue Apr 21 15:32:38 1998 pessi * @sa <a href="ftp://ftp.ietf.org/rfc/rfc2279.txt">RFC 2279</a>, * <i>"UTF-8, a transformation format of ISO 10646"</i>, * F. Yergeau. January 1998. * */#ifndef UTF8_H/** Defined when <sofia-sip/utf8.h> has been included */#define UTF8_H#ifndef SU_TYPES_H#include <sofia-sip/su_types.h>#endifSOFIA_BEGIN_DECLStypedef unsigned char utf8;typedef unsigned short utf16;typedef unsigned char ucs1;typedef unsigned short ucs2;typedef unsigned int ucs4;SOFIAPUBFUN size_t utf8_width(const utf8 *);/* Latin-1 encoding/decoding */SOFIAPUBFUN size_t ucs18decode(char *dst, size_t dst_size, const utf8 *s);SOFIAPUBFUN size_t ucs1encode(utf8 *dst, const ucs1 *s, size_t n, const char quote[128]);SOFIAPUBFUN size_t ucs1declen(const utf8 *s);SOFIAPUBFUN size_t ucs1enclen(const ucs1 *s, size_t n, const char quote[128]);/* UCS2 (BMP) encoding/decoding */size_t ucs2decode(ucs2 *dst, size_t dst_size, const utf8 *s);size_t ucs2encode(utf8 *dst, const ucs2 *s, size_t n, const char quote[128]);size_t ucs2declen(const utf8 *s);size_t ucs2enclen(const ucs2 *s, size_t n, const char quote[128]);size_t ucs4decode(ucs4 *dst, size_t dst_size, const utf8 *s);size_t ucs4encode(utf8 *dst, const ucs4 *s, size_t n, const char quote[128]);size_t ucs4declen(const utf8 *s);size_t ucs4enclen(const ucs4 *s, size_t n, const char quote[128]);size_t ucs2len(ucs2 const *s);int ucs2cmp(ucs2 const *s1, ucs2 const *s2);int ucs2ncmp(ucs2 const *s1, ucs2 const *s2, size_t n);size_t ucs4len(ucs4 const *s);int ucs4cmp(ucs4 const *s1, ucs4 const *s2);int ucs4ncmp(ucs4 const *s1, ucs4 const *s2, size_t n);/* * IS_UCS4_n tests whether UCS4 character should be represented * with 'n' byte utf8 string */#define IS_UCS4_1(x) ((ucs4)(x) <= 0x7fu)#define IS_UCS4_2(x) (0x80u <= (ucs4)(x) && (ucs4)(x) <= 0x7ffu)#define IS_UCS4_3(x) (0x800u <= (ucs4)(x) && (ucs4)(x) <= 0xffffu)#define IS_UCS4_4(x) (0x10000u <= (ucs4)(x) && (ucs4)(x) <= 0x1fFFFFu)#define IS_UCS4_5(x) (0x200000u <= (ucs4)(x) && (ucs4)(x) <= 0x3ffFFFFu)#define IS_UCS4_6(x) (0x4000000u <= (ucs4)(x) && (ucs4)(x) <= 0x7fffFFFFu)/* Special test for ISO-8859-1 characters */#define IS_UCS4_I(x) (0x80u <= (ucs4)(x) && (ucs4)(x) <= 0xffu)/* Length of an UCS4 character in UTF8 encoding */#define UTF8_LEN4(x) (IS_UCS4_1(x) || IS_UCS4_2(x) && 2 || \ IS_UCS4_3(x) && 3 || IS_UCS4_4(x) && 4 || \ IS_UCS4_5(x) && 5 || IS_UCS4_6(x) && 6)/* Length of an UCS2 character in UTF8 encoding */#define UTF8_LEN2(x) (IS_UCS4_1(x) || IS_UCS4_2(x) && 2 || IS_UCS4_3(x) && 3)/* * IS_UTF8_n tests the length of the next wide character */#define IS_UTF8_1(c) (0x00 == ((c) & 0x80))#define IS_UTF8_2(c) (0xc0 == ((c) & 0xe0))#define IS_UTF8_3(c) (0xe0 == ((c) & 0xf0))#define IS_UTF8_4(c) (0xf0 == ((c) & 0xf8))#define IS_UTF8_5(c) (0xf8 == ((c) & 0xfc))#define IS_UTF8_6(c) (0xfc == ((c) & 0xfe))/* Extension byte? */#define IS_UTF8_X(c) (0x80 == ((c) & 0xc0))/* ISO-8859-1 character? */#define IS_UTF8_I(c) (0xc0 == ((c) & 0xfc))#define IS_UTF8_S1(s) \(IS_UTF8_1(s[0]))#define IS_UTF8_S2(s) \(IS_UTF8_2(s[0])&&((s)[1]&192)==128)#define IS_UTF8_SI(s) \(IS_UTF8_I(s[0])&&((s)[1]&192)==128)#define IS_UTF8_S3(s) \(IS_UTF8_3(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128)#define IS_UTF8_S4(s) \(IS_UTF8_4(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128&&((s)[3]&192)==128)#define IS_UTF8_S5(s) \(IS_UTF8_5(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128&&\ ((s)[3]&192)==128&&((s)[4]&192)==128)#define IS_UTF8_S6(s) \(IS_UTF8_6(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128&&((s)[3]&192)==128&&\ ((s)[4]&192)==128&&((s)[5]&192)==128)#define UCS4_S1(s) ((ucs4)(s[0]))#define UCS4_S2(s) ((ucs4)\ (((s[0])&31)<<6)|((s[1])&63))#define UCS4_S3(s) ((ucs4)\ (((s[0])&15)<<12)|(((s[1])&63)<<6)|((s[2])&63))#define UCS4_S4(s) ((ucs4)\ (((s[0])&7)<<18)|(((s[1])&63)<<12)|(((s[2])&63)<<6)|\ ((s[3])&63))#define UCS4_S5(s) ((ucs4)\ (((s[0])&3)<<24)|(((s[1])&63)<<18)|(((s[2])&63)<<12)|\ (((s[3])&63)<<6)|((s[4])&63))#define UCS4_S6(s) ((ucs4)\ (((s[0])&1)<<30)|(((s[1])&63)<<24)|(((s[2])&63)<<18)|\ (((s[3])&63)<<12)|(((s[4])&63)<<6)|((s[5])&63))#define UTF8_S1(s,c) ((s)[0]=(c))#define UTF8_S2(s,c) ((s)[0]=(((c)>>6)&31)|0xc0,\ (s)[1]=((c)&63)|128)#define UTF8_S3(s,c) ((s)[0]=(((c)>>12)&15)|0xe0,\ (s)[1]=((c>>6)&63)|128,\ (s)[2]=((c)&63)|128)#define UTF8_S4(s,c) ((s)[0]=(((c)>>18)&7)|0xf0,\ (s)[1]=((c>>12)&63)|128,\ (s)[2]=((c>>6)&63)|128,\ (s)[3]=((c)&63)|128)#define UTF8_S5(s,c) ((s)[0]=(((c)>>24)&3)|0xf8,\ (s)[1]=((c>>18)&63)|128,\ (s)[2]=((c>>12)&63)|128,\ (s)[3]=((c>>6)&63)|128,\ (s)[4]=((c)&63)|128)#define UTF8_S6(s,c) ((s)[0]=(((c)>>30)&1)|0xfc,\ (s)[1]=((c>>24)&63)|128,\ (s)[2]=((c>>18)&63)|128,\ (s)[3]=((c>>12)&63)|128,\ (s)[4]=((c>>6)&63)|128,\ (s)[5]=((c)&63)|128) SOFIA_END_DECLS#endif /* UTF8_H */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -