📄 ucs4.c

📁 package of develop dns
💻 C
字号:
#ifndef lintstatic char *rcsid = "$Id: ucs4.c,v 1.1.1.1 2003/06/04 00:26:14 marka Exp $";#endif/* * Copyright (c) 2001 Japan Network Information Center.  All rights reserved. *   * By using this file, you agree to the terms and conditions set forth bellow. *  * 			LICENSE TERMS AND CONDITIONS  *  * The following License Terms and Conditions apply, unless a different * license is obtained from Japan Network Information Center ("JPNIC"), * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, * Chiyoda-ku, Tokyo 101-0047, Japan. *  * 1. Use, Modification and Redistribution (including distribution of any *    modified or derived work) in source and/or binary forms is permitted *    under this License Terms and Conditions. *  * 2. Redistribution of source code must retain the copyright notices as they *    appear in each source code file, this License Terms and Conditions. *  * 3. Redistribution in binary form must reproduce the Copyright Notice, *    this License Terms and Conditions, in the documentation and/or other *    materials provided with the distribution.  For the purposes of binary *    distribution the "Copyright Notice" refers to the following language: *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved." *  * 4. The name of JPNIC may not be used to endorse or promote products *    derived from this Software without specific prior written approval of *    JPNIC. *  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. */#include <config.h>#include <stddef.h>#include <stdlib.h>#include <string.h>#include <idn/assert.h>#include <idn/result.h>#include <idn/logmacro.h>#include <idn/util.h>#include <idn/ucs4.h>#include <idn/debug.h>/* * Unicode surrogate pair. */#define IS_SURROGATE_HIGH(v)	(0xd800 <= (v) && (v) <= 0xdbff)#define IS_SURROGATE_LOW(v)	(0xdc00 <= (v) && (v) <= 0xdfff)#define SURROGATE_HIGH(v)	(SURROGATE_H_OFF + (((v) - 0x10000) >> 10))#define SURROGATE_LOW(v)	(SURROGATE_L_OFF + ((v) & 0x3ff))#define SURROGATE_BASE		0x10000#define SURROGATE_H_OFF		0xd800#define SURROGATE_L_OFF		0xdc00#define COMBINE_SURROGATE(h, l) \	(SURROGATE_BASE + (((h)-SURROGATE_H_OFF)<<10) + ((l)-SURROGATE_L_OFF))/* * ASCII ctype macros. * Note that these macros evaluate the argument multiple times.  Be careful. */#define ASCII_TOUPPER(c) \	(('a' <= (c) && (c) <= 'z') ? ((c) - 'a' + 'A') : (c))#define ASCII_TOLOWER(c) \	(('A' <= (c) && (c) <= 'Z') ? ((c) - 'A' + 'a') : (c))idn_result_tidn_ucs4_ucs4toutf16(const unsigned long *ucs4, unsigned short *utf16,		     size_t tolen) {	unsigned short *utf16p = utf16;	unsigned long v;	idn_result_t r;	TRACE(("idn_ucs4_ucs4toutf16(ucs4=\"%s\", tolen=%d)\n",	       idn__debug_ucs4xstring(ucs4, 50), (int)tolen));	while (*ucs4 != '\0') {		v = *ucs4++;		if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {			WARNING(("idn_ucs4_ucs4toutf16: UCS4 string contains "				 "surrogate pair\n"));			r = idn_invalid_encoding;			goto ret;		} else if (v > 0xffff) {			/* Convert to surrogate pair */			if (v >= 0x110000) {				r = idn_invalid_encoding;				goto ret;			}			if (tolen < 2) {				r = idn_buffer_overflow;				goto ret;			}			*utf16p++ = SURROGATE_HIGH(v);			*utf16p++ = SURROGATE_LOW(v);			tolen -= 2;		} else {			if (tolen < 1) {				r = idn_buffer_overflow;				goto ret;			}			*utf16p++ = v;			tolen--;		}	}	if (tolen < 1) {		r = idn_buffer_overflow;		goto ret;	}	*utf16p = '\0';	r = idn_success;ret:	if (r == idn_success) {		TRACE(("idn_ucs4_ucs4toutf16(): success (utf16=\"%s\")\n",		       idn__debug_utf16xstring(utf16, 50)));	} else {		TRACE(("idn_ucs4_ucs4toutf16(): %s\n",		       idn_result_tostring(r)));	}	return (r);}idn_result_tidn_ucs4_utf16toucs4(const unsigned short *utf16, unsigned long *ucs4,		     size_t tolen) {	unsigned long *ucs4p = ucs4;	unsigned short v0, v1;	idn_result_t r;	TRACE(("idn_ucs4_utf16toucs4(utf16=\"%s\", tolen=%d)\n",	       idn__debug_utf16xstring(utf16, 50), (int)tolen));	while (*utf16 != '\0') {		v0 = *utf16;		if (tolen < 1) {			r = idn_buffer_overflow;			goto ret;		}		if (IS_SURROGATE_HIGH(v0)) {			v1 = *(utf16 + 1);			if (!IS_SURROGATE_LOW(v1)) {				WARNING(("idn_ucs4_utf16toucs4: "					 "corrupted surrogate pair\n"));				r = idn_invalid_encoding;				goto ret;			}			*ucs4p++ = COMBINE_SURROGATE(v0, v1);			tolen--;			utf16 += 2;		} else {			*ucs4p++ = v0;			tolen--;			utf16++;					}	}	if (tolen < 1) {		r = idn_buffer_overflow;		goto ret;	}	*ucs4p = '\0';	r = idn_success;ret:	if (r == idn_success) {		TRACE(("idn_ucs4_utf16toucs4(): success (ucs4=\"%s\")\n",		       idn__debug_ucs4xstring(ucs4, 50)));	} else {		TRACE(("idn_ucs4_utf16toucs4(): %s\n",		       idn_result_tostring(r)));	}	return (r);}idn_result_tidn_ucs4_utf8toucs4(const char *utf8, unsigned long *ucs4, size_t tolen) {	const unsigned char *utf8p = (const unsigned char *)utf8;	unsigned long *ucs4p = ucs4;	unsigned long v, min;	unsigned char c;	int width;	int i;	idn_result_t r;	TRACE(("idn_ucs4_utf8toucs4(utf8=\"%s\", tolen=%d)\n",	       idn__debug_xstring(utf8, 50), (int)tolen));	while(*utf8p != '\0') {		c = *utf8p++;		if (c < 0x80) {			v = c;			min = 0;			width = 1;		} else if (c < 0xc0) {			WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));			r = idn_invalid_encoding;			goto ret;		} else if (c < 0xe0) {			v = c & 0x1f;			min = 0x80;			width = 2;		} else if (c < 0xf0) {			v = c & 0x0f;			min = 0x800;			width = 3;		} else if (c < 0xf8) {			v = c & 0x07;			min = 0x10000;			width = 4;		} else if (c < 0xfc) {			v = c & 0x03;			min = 0x200000;			width = 5;		} else if (c < 0xfe) {			v = c & 0x01;			min = 0x4000000;			width = 6;		} else {			WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));			r = idn_invalid_encoding;			goto ret;		}		for (i = width - 1; i > 0; i--) {			c = *utf8p++;			if (c < 0x80 || 0xc0 <= c) {				WARNING(("idn_ucs4_utf8toucs4: "					 "invalid character\n"));				r = idn_invalid_encoding;				goto ret;			}			v = (v << 6) | (c & 0x3f);		}	        if (v < min) {			WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));			r = idn_invalid_encoding;			goto ret;		}		if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {			WARNING(("idn_ucs4_utf8toucs4: UTF-8 string contains "				 "surrogate pair\n"));			r = idn_invalid_encoding;			goto ret;		}		if (tolen < 1) {			r = idn_buffer_overflow;			goto ret;		}		tolen--;		*ucs4p++ = v;	}	if (tolen < 1) {		r = idn_buffer_overflow;		goto ret;	}	*ucs4p = '\0';	r = idn_success;ret:	if (r == idn_success) {		TRACE(("idn_ucs4_utf8toucs4(): success (ucs4=\"%s\")\n",		       idn__debug_ucs4xstring(ucs4, 50)));	} else {		TRACE(("idn_ucs4_utf8toucs4(): %s\n",		       idn_result_tostring(r)));	}	return (r);}idn_result_tidn_ucs4_ucs4toutf8(const unsigned long *ucs4, char *utf8, size_t tolen) {	unsigned char *utf8p = (unsigned char *)utf8;	unsigned long v;	int width;	int mask;	int offset;	idn_result_t r;	TRACE(("idn_ucs4_ucs4toutf8(ucs4=\"%s\", tolen=%d)\n",	       idn__debug_ucs4xstring(ucs4, 50), (int)tolen));	while (*ucs4 != '\0') {		v = *ucs4++;		if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {			WARNING(("idn_ucs4_ucs4toutf8: UCS4 string contains "				 "surrogate pair\n"));			r = idn_invalid_encoding;			goto ret;		}		if (v < 0x80) {			mask = 0;			width = 1;		} else if (v < 0x800) {			mask = 0xc0;			width = 2;		} else if (v < 0x10000) {			mask = 0xe0;			width = 3;		} else if (v < 0x200000) {			mask = 0xf0;			width = 4;		} else if (v < 0x4000000) {			mask = 0xf8;			width = 5;		} else if (v < 0x80000000) {			mask = 0xfc;			width = 6;		} else {			WARNING(("idn_ucs4_ucs4toutf8: invalid character\n"));			r = idn_invalid_encoding;			goto ret;		}		if (tolen < width) {			r = idn_buffer_overflow;			goto ret;		}		offset = 6 * (width - 1);		*utf8p++ = (v >> offset) | mask;		mask = 0x80;		while (offset > 0) {			offset -= 6;			*utf8p++ = ((v >> offset) & 0x3f) | mask;		}		tolen -= width;	}	if (tolen < 1) {		r = idn_buffer_overflow;		goto ret;	}	*utf8p = '\0';	r = idn_success;ret:	if (r == idn_success) {		TRACE(("idn_ucs4_ucs4toutf8(): success (utf8=\"%s\")\n",		       idn__debug_xstring(utf8, 50)));	} else {		TRACE(("idn_ucs4_ucs4toutf8(): %s\n",		       idn_result_tostring(r)));	}	return (r);}size_tidn_ucs4_strlen(const unsigned long *ucs4) {	size_t len;	for (len = 0; *ucs4 != '\0'; ucs4++, len++)		/* nothing to do */ ;	return (len);}unsigned long *idn_ucs4_strcpy(unsigned long *to, const unsigned long *from) {	unsigned long *result = to;	while (*from != '\0')		*to++ = *from++;	*to = '\0';	return (result);}unsigned long *idn_ucs4_strcat(unsigned long *to, const unsigned long *from) {	unsigned long *result = to;	while (*to != '\0')		to++;	while (*from != '\0')		*to++ = *from++;	*to = '\0';	return (result);}intidn_ucs4_strcmp(const unsigned long *str1, const unsigned long *str2) {	while (*str1 != '\0') {		if (*str1 > *str2)			return (1);		else if (*str1 < *str2)			return (-1);		str1++;		str2++;	}	if (*str1 > *str2)		return (1);	else if (*str1 < *str2)		return (-1);	return (0);}intidn_ucs4_strcasecmp(const unsigned long *str1, const unsigned long *str2) {	unsigned long c1, c2;	while (*str1 != '\0') {		c1 = ASCII_TOLOWER(*str1);		c2 = ASCII_TOLOWER(*str2);		if (c1 > c2)			return (1);		else if (c1 < c2)			return (-1);		str1++;		str2++;	}	c1 = ASCII_TOLOWER(*str1);	c2 = ASCII_TOLOWER(*str2);	if (c1 > c2)		return (1);	else if (c1 < c2)		return (-1);	return (0);}unsigned long *idn_ucs4_strdup(const unsigned long *str) {	size_t length = idn_ucs4_strlen(str);	unsigned long *dupstr;	dupstr = (unsigned long *)malloc(sizeof(*str) * (length + 1));	if (dupstr == NULL)		return NULL;	memcpy(dupstr, str, sizeof(*str) * (length + 1));	return dupstr;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -