⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wchar.c

📁 从一个开源软件中摘取的正则表达式模块
💻 C
📖 第 1 页 / 共 3 页
字号:
ucs_wcwidth(pg_wchar ucs){	/* sorted list of non-overlapping intervals of non-spacing characters */	static const struct mbinterval combining[] = {		{0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486},		{0x0488, 0x0489}, {0x0591, 0x05A1}, {0x05A3, 0x05B9},		{0x05BB, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2},		{0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},		{0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED},		{0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A},		{0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},		{0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954},		{0x0962, 0x0963}, {0x0981, 0x0981}, {0x09BC, 0x09BC},		{0x09C1, 0x09C4}, {0x09CD, 0x09CD}, {0x09E2, 0x09E3},		{0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},		{0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71},		{0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5},		{0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD}, {0x0B01, 0x0B01},		{0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},		{0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82},		{0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40},		{0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56},		{0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},		{0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA},		{0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31},		{0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1},		{0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},		{0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},		{0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, {0x0F80, 0x0F84},		{0x0F86, 0x0F87}, {0x0F90, 0x0F97}, {0x0F99, 0x0FBC},		{0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},		{0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059},		{0x1160, 0x11FF}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6},		{0x17C9, 0x17D3}, {0x180B, 0x180E}, {0x18A9, 0x18A9},		{0x200B, 0x200F}, {0x202A, 0x202E}, {0x206A, 0x206F},		{0x20D0, 0x20E3}, {0x302A, 0x302F}, {0x3099, 0x309A},		{0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}, {0xFEFF, 0xFEFF},		{0xFFF9, 0xFFFB}	};	/* test for 8-bit control characters */	if (ucs == 0)		return 0;	if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)		return -1;	/* binary search in table of non-spacing characters */	if (mbbisearch(ucs, combining,				   sizeof(combining) / sizeof(struct mbinterval) - 1))		return 0;	/*	 * if we arrive here, ucs is not a combining or C0/C1 control character	 */	return 1 +		(ucs >= 0x1100 &&		 (ucs <= 0x115f ||		/* Hangul Jamo init. consonants */		  (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&		   ucs != 0x303f) ||	/* CJK ... Yi */		  (ucs >= 0xac00 && ucs <= 0xd7a3) ||	/* Hangul Syllables */		  (ucs >= 0xf900 && ucs <= 0xfaff) ||	/* CJK Compatibility												 * Ideographs */		  (ucs >= 0xfe30 && ucs <= 0xfe6f) ||	/* CJK Compatibility Forms */		  (ucs >= 0xff00 && ucs <= 0xff5f) ||	/* Fullwidth Forms */		  (ucs >= 0xffe0 && ucs <= 0xffe6) ||		  (ucs >= 0x20000 && ucs <= 0x2ffff)));}static pg_wcharutf2ucs(const unsigned char *c){	/*	 * one char version of pg_utf2wchar_with_len. no control here, c must	 * point to a large enough string	 */	if ((*c & 0x80) == 0)		return (pg_wchar) c[0];	else if ((*c & 0xe0) == 0xc0)		return (pg_wchar) (((c[0] & 0x1f) << 6) |						   (c[1] & 0x3f));	else if ((*c & 0xf0) == 0xe0)		return (pg_wchar) (((c[0] & 0x0f) << 12) |						   ((c[1] & 0x3f) << 6) |						   (c[2] & 0x3f));	else if ((*c & 0xf8) == 0xf0)		return (pg_wchar) (((c[0] & 0x07) << 18) |						   ((c[1] & 0x3f) << 12) |						   ((c[2] & 0x3f) << 6) |						   (c[3] & 0x3f));	else		/* that is an invalid code on purpose */		return 0xffffffff;}static intpg_utf_dsplen(const unsigned char *s){	return ucs_wcwidth(utf2ucs(s));}/* * convert mule internal code to pg_wchar * caller should allocate enough space for "to" * len: length of from. * "from" not necessarily null terminated. */static intpg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len){	int			cnt = 0;	while (len > 0 && *from)	{		if (IS_LC1(*from) && len >= 2)		{			*to = *from++ << 16;			*to |= *from++;			len -= 2;		}		else if (IS_LCPRV1(*from) && len >= 3)		{			from++;			*to = *from++ << 16;			*to |= *from++;			len -= 3;		}		else if (IS_LC2(*from) && len >= 3)		{			*to = *from++ << 16;			*to |= *from++ << 8;			*to |= *from++;			len -= 3;		}		else if (IS_LCPRV2(*from) && len >= 4)		{			from++;			*to = *from++ << 16;			*to |= *from++ << 8;			*to |= *from++;			len -= 4;		}		else		{						/* assume ASCII */			*to = (unsigned char) *from++;			len--;		}		to++;		cnt++;	}	*to = 0;	return cnt;}intpg_mule_mblen(const unsigned char *s){	int			len;	if (IS_LC1(*s))		len = 2;	else if (IS_LCPRV1(*s))		len = 3;	else if (IS_LC2(*s))		len = 3;	else if (IS_LCPRV2(*s))		len = 4;	else		len = 1;				/* assume ASCII */	return len;}static intpg_mule_dsplen(const unsigned char *s){	int			len;	if (IS_LC1(*s))		len = 1;	else if (IS_LCPRV1(*s))		len = 1;	else if (IS_LC2(*s))		len = 2;	else if (IS_LCPRV2(*s))		len = 2;	else		len = 1;				/* assume ASCII */	return len;}/* * ISO8859-1 */static intpg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len){	int			cnt = 0;	while (len > 0 && *from)	{		*to++ = *from++;		len--;		cnt++;	}	*to = 0;	return cnt;}static intpg_latin1_mblen(const unsigned char *s){	return 1;}static intpg_latin1_dsplen(const unsigned char *s){	return pg_ascii_dsplen(s);}/* * SJIS */static intpg_sjis_mblen(const unsigned char *s){	int			len;	if (*s >= 0xa1 && *s <= 0xdf)		len = 1;				/* 1 byte kana? */	else if (IS_HIGHBIT_SET(*s))		len = 2;				/* kanji? */	else		len = 1;				/* should be ASCII */	return len;}static intpg_sjis_dsplen(const unsigned char *s){	int			len;	if (*s >= 0xa1 && *s <= 0xdf)		len = 1;				/* 1 byte kana? */	else if (IS_HIGHBIT_SET(*s))		len = 2;				/* kanji? */	else		len = pg_ascii_dsplen(s);		/* should be ASCII */	return len;}/* * Big5 */static intpg_big5_mblen(const unsigned char *s){	int			len;	if (IS_HIGHBIT_SET(*s))		len = 2;				/* kanji? */	else		len = 1;				/* should be ASCII */	return len;}static intpg_big5_dsplen(const unsigned char *s){	int			len;	if (IS_HIGHBIT_SET(*s))		len = 2;				/* kanji? */	else		len = pg_ascii_dsplen(s);		/* should be ASCII */	return len;}/* * GBK */static intpg_gbk_mblen(const unsigned char *s){	int			len;	if (IS_HIGHBIT_SET(*s))		len = 2;				/* kanji? */	else		len = 1;				/* should be ASCII */	return len;}static intpg_gbk_dsplen(const unsigned char *s){	int			len;	if (IS_HIGHBIT_SET(*s))		len = 2;				/* kanji? */	else		len = pg_ascii_dsplen(s);		/* should be ASCII */	return len;}/* * UHC */static intpg_uhc_mblen(const unsigned char *s){	int			len;	if (IS_HIGHBIT_SET(*s))		len = 2;				/* 2byte? */	else		len = 1;				/* should be ASCII */	return len;}static intpg_uhc_dsplen(const unsigned char *s){	int			len;	if (IS_HIGHBIT_SET(*s))		len = 2;				/* 2byte? */	else		len = pg_ascii_dsplen(s);		/* should be ASCII */	return len;}/* *	* GB18030 *	 * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp> *	  */static intpg_gb18030_mblen(const unsigned char *s){	int			len;	if (!IS_HIGHBIT_SET(*s))		len = 1;				/* ASCII */	else	{		if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) || (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))			len = 2;		else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)			len = 4;		else			len = 2;	}	return len;}static intpg_gb18030_dsplen(const unsigned char *s){	int			len;	if (IS_HIGHBIT_SET(*s))		len = 2;	else		len = pg_ascii_dsplen(s);		/* ASCII */	return len;}/* *------------------------------------------------------------------- * multibyte sequence validators * * These functions accept "s", a pointer to the first byte of a string, * and "len", the remaining length of the string.  If there is a validly * encoded character beginning at *s, return its length in bytes; else * return -1. * * The functions can assume that len > 0 and that *s != '\0', but they must * test for and reject zeroes in any additional bytes of a multibyte character. * * Note that this definition allows the function for a single-byte * encoding to be just "return 1". *------------------------------------------------------------------- */static intpg_ascii_verifier(const unsigned char *s, int len){	return 1;}#define IS_EUC_RANGE_VALID(c)	((c) >= 0xa1 && (c) <= 0xfe)static intpg_eucjp_verifier(const unsigned char *s, int len){	int			l;	unsigned char c1,				c2;	c1 = *s++;	switch (c1)	{		case SS2:				/* JIS X 0201 */			l = 2;			if (l > len)				return -1;			c2 = *s++;			if (c2 < 0xa1 || c2 > 0xdf)				return -1;			break;		case SS3:				/* JIS X 0212 */			l = 3;			if (l > len)				return -1;			c2 = *s++;			if (!IS_EUC_RANGE_VALID(c2))				return -1;			c2 = *s++;			if (!IS_EUC_RANGE_VALID(c2))				return -1;			break;		default:			if (IS_HIGHBIT_SET(c1))		/* JIS X 0208? */			{				l = 2;				if (l > len)					return -1;				if (!IS_EUC_RANGE_VALID(c1))					return -1;				c2 = *s++;				if (!IS_EUC_RANGE_VALID(c2))					return -1;			}			else				/* must be ASCII */			{				l = 1;			}			break;	}	return l;}static intpg_euckr_verifier(const unsigned char *s, int len){	int			l;	unsigned char c1,				c2;	c1 = *s++;	if (IS_HIGHBIT_SET(c1))	{		l = 2;		if (l > len)			return -1;		if (!IS_EUC_RANGE_VALID(c1))			return -1;		c2 = *s++;		if (!IS_EUC_RANGE_VALID(c2))			return -1;	}	else		/* must be ASCII */	{		l = 1;	}	return l;}/* EUC-CN byte sequences are exactly same as EUC-KR */#define pg_euccn_verifier	pg_euckr_verifierstatic intpg_euctw_verifier(const unsigned char *s, int len){	int			l;	unsigned char c1,				c2;	c1 = *s++;	switch (c1)	{		case SS2:				/* CNS 11643 Plane 1-7 */			l = 4;			if (l > len)				return -1;			c2 = *s++;			if (c2 < 0xa1 || c2 > 0xa7)				return -1;			c2 = *s++;			if (!IS_EUC_RANGE_VALID(c2))				return -1;			c2 = *s++;			if (!IS_EUC_RANGE_VALID(c2))				return -1;			break;		case SS3:				/* unused */			return -1;		default:			if (IS_HIGHBIT_SET(c1))		/* CNS 11643 Plane 1 */			{				l = 2;				if (l > len)					return -1;				/* no further range check on c1? */				c2 = *s++;				if (!IS_EUC_RANGE_VALID(c2))					return -1;			}			else				/* must be ASCII */			{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -