wchar.c

来自「Axis 221 camera embedded programing inte」· C语言代码 · 共 1,712 行 · 第 1/3 页
1,712 行
/*  Copyright (C) 2002, 2003, 2004     Manuel Novoa III * *  This library is free software; you can redistribute it and/or *  modify it under the terms of the GNU Library General Public *  License as published by the Free Software Foundation; either *  version 2 of the License, or (at your option) any later version. * *  This library is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU *  Library General Public License for more details. * *  You should have received a copy of the GNU Library General Public *  License along with this library; if not, write to the Free *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//*  ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION! * *  Besides uClibc, I'm using this code in my libc for elks, which is *  a 16-bit environment with a fairly limited compiler.  It would make *  things much easier for me if this file isn't modified unnecessarily. *  In particular, please put any new or replacement functions somewhere *  else, and modify the makefile to use your version instead. *  Thanks.  Manuel * *  ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION! *//* May 23, 2002     Initial Notes: * * I'm still tweaking this stuff, but it passes the tests I've thrown * at it, and Erik needs it for the gcc port.  The glibc extension * __wcsnrtombs() hasn't been tested, as I didn't find a test for it * in the glibc source.  I also need to fix the behavior of * _wchar_utf8sntowcs() if the max number of wchars to convert is 0. * * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt * file on my platform (x86) show about 5-10% faster conversion speed than * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with * individual mbrtowc()/wcrtomb() calls. * * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled * as a fail-safe UTF-8 decoder appropriate for a terminal, etc.  which * needs to deal gracefully with whatever is sent to it.  In that mode, * it passes Markus Kuhn's UTF-8-test.txt stress test.  I plan to add * an arg to force that behavior, so the interface will be changing. * * I need to fix the error checking for 16-bit wide chars.  This isn't * an issue for uClibc, but may be for ELKS.  I'm currently not sure * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS. * * July 1, 2002 * * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case. * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit *    locales. * Enabled building of a C/POSIX-locale-only version, so full locale support *    no longer needs to be enabled. * * Nov 4, 2002 * * Fixed a bug in _wchar_wcsntoutf8s().  Don't store wcs position if dst is NULL. * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in *   order to support %ls in printf.  See comments below for details. * Change behaviour of wc<->mb functions when in the C locale.  Now they do *   a 1-1 map for the range 0x80-UCHAR_MAX.  This is for backwards compatibility *   and consistency with the stds requirements that a printf format string by *   a valid multibyte string beginning and ending in it's initial shift state. * * Nov 5, 2002 * * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday. * * Nov 7, 2002 * * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08. *   Added some size/speed optimizations and integrated it into my locale *   framework.  Minimally tested at the moment, but the stub C-locale *   version (which most people would probably be using) should be fine. * * Nov 21, 2002 * * Revert the wc<->mb changes from earlier this month involving the C-locale. * Add a couple of ugly hacks to support *wprintf. * Add a mini iconv() and iconv implementation (requires locale support). * * Aug 1, 2003 * Bug fix for mbrtowc. * * Aug 18, 2003 * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ. * * Feb 11, 2004 * Bug fix: Fix size check for remaining output space in iconv(). * * Manuel */#define _GNU_SOURCE#define _ISOC99_SOURCE#include <errno.h>#include <stddef.h>#include <limits.h>#include <stdint.h>#include <inttypes.h>#include <stdlib.h>#include <stdio.h>#include <assert.h>#include <locale.h>#include <wchar.h>#include <bits/uClibc_uwchar.h>/**********************************************************************/#ifdef __UCLIBC_HAS_LOCALE__#ifdef __UCLIBC_MJN3_ONLY__#ifdef L_iswspace/* generates one warning */#warning TODO: Fix Cc2wc* and Cwc2c* defines!#endif#endif /* __UCLIBC_MJN3_ONLY__ */#define ENCODING		((__UCLIBC_CURLOCALE_DATA).encoding)#define Cc2wc_IDX_SHIFT		__LOCALE_DATA_Cc2wc_IDX_SHIFT#define Cc2wc_ROW_LEN		__LOCALE_DATA_Cc2wc_ROW_LEN#define Cwc2c_DOMAIN_MAX	__LOCALE_DATA_Cwc2c_DOMAIN_MAX#define Cwc2c_TI_SHIFT		__LOCALE_DATA_Cwc2c_TI_SHIFT#define Cwc2c_TT_SHIFT		__LOCALE_DATA_Cwc2c_TT_SHIFT#define Cwc2c_TI_LEN		__LOCALE_DATA_Cwc2c_TI_LEN#ifndef __CTYPE_HAS_UTF_8_LOCALES#warning __CTYPE_HAS_UTF_8_LOCALES not set!#endif#else  /* __UCLIBC_HAS_LOCALE__ */#ifdef __UCLIBC_MJN3_ONLY__#ifdef L_btowc/* emit only once */#warning fix preprocessor logic testing locale settings#endif#endif#define ENCODING (__ctype_encoding_7_bit)#ifdef __CTYPE_HAS_8_BIT_LOCALES#error __CTYPE_HAS_8_BIT_LOCALES is defined!#endif#ifdef __CTYPE_HAS_UTF_8_LOCALES#error __CTYPE_HAS_UTF_8_LOCALES is defined!#endif#undef L__wchar_utf8sntowcs#undef L__wchar_wcsntoutf8s#endif /* __UCLIBC_HAS_LOCALE__ *//**********************************************************************/#if WCHAR_MAX > 0xffffUL#define UTF_8_MAX_LEN 6#else#define UTF_8_MAX_LEN 3#endif#define KUHN 1/* Implementation-specific work functions. */extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,								 const char **__restrict src, size_t n,								 mbstate_t *ps, int allow_continuation);extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,								 const wchar_t **__restrict src, size_t wn);/* glibc extensions. */extern size_t __mbsnrtowcs(wchar_t *__restrict dst,						   const char **__restrict src,						   size_t NMC, size_t len, mbstate_t *__restrict ps);extern size_t __wcsnrtombs(char *__restrict dst,						   const wchar_t **__restrict src,						   size_t NWC, size_t len, mbstate_t *__restrict ps);/**********************************************************************/#ifdef L_btowcwint_t btowc(int c){#ifdef __CTYPE_HAS_8_BIT_LOCALES	wchar_t wc;	unsigned char buf[1];	mbstate_t mbstate;	if (c != EOF) {		*buf = (unsigned char) c;		mbstate.__mask = 0;		/* Initialize the mbstate. */		if (mbrtowc(&wc, buf, 1, &mbstate) <= 1) {			return wc;		}	}	return WEOF;#else  /*  __CTYPE_HAS_8_BIT_LOCALES */#ifdef __UCLIBC_HAS_LOCALE__	assert((ENCODING == __ctype_encoding_7_bit)		   || (ENCODING == __ctype_encoding_utf8));#endif /* __UCLIBC_HAS_LOCALE__ */	/* If we don't have 8-bit locale support, then this is trivial since	 * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */	return (((unsigned int)c) < 0x80) ? c : WEOF;#endif /*  __CTYPE_HAS_8_BIT_LOCALES */}#endif/**********************************************************************/#ifdef L_wctob/* Note: We completely ignore ps in all currently supported conversions. */int wctob(wint_t c){#ifdef __CTYPE_HAS_8_BIT_LOCALES	unsigned char buf[MB_LEN_MAX];	return (wcrtomb(buf, c, NULL) == 1) ? *buf : EOF;#else  /*  __CTYPE_HAS_8_BIT_LOCALES */#ifdef __UCLIBC_HAS_LOCALE__	assert((ENCODING == __ctype_encoding_7_bit)		   || (ENCODING == __ctype_encoding_utf8));#endif /* __UCLIBC_HAS_LOCALE__ */	/* If we don't have 8-bit locale support, then this is trivial since	 * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */		/* TODO: need unsigned version of wint_t... *//*  	return (((unsigned int)c) < 0x80) ? c : WEOF; */	return ((c >= 0) && (c < 0x80)) ? c : EOF;#endif /*  __CTYPE_HAS_8_BIT_LOCALES */}#endif/**********************************************************************/#ifdef L_mbsinitint mbsinit(const mbstate_t *ps){	return !ps || !ps->__mask;}#endif/**********************************************************************/#ifdef L_mbrlensize_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)	 __attribute__ ((__weak__, __alias__("__mbrlen")));size_t __mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps){	static mbstate_t mbstate;	/* Rely on bss 0-init. */	return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);}#endif/**********************************************************************/#ifdef L_mbrtowcsize_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,			   size_t n, mbstate_t *__restrict ps){	static mbstate_t mbstate;	/* Rely on bss 0-init. */	wchar_t wcbuf[1];	const char *p;	size_t r;	char empty_string[1];		/* Avoid static to be fPIC friendly. */	if (!ps) {		ps = &mbstate;	}	if (!s) {		pwc = (wchar_t *) s;	/* NULL */		empty_string[0] = 0;	/* Init the empty string when necessary. */		s = empty_string;		n = 1;	} else if (!n) {		/* TODO: change error code? */		return (ps->__mask && (ps->__wc == 0xffffU))			? ((size_t) -1) : ((size_t) -2);	}	p = s;#ifdef __CTYPE_HAS_UTF_8_LOCALES	/* Need to do this here since mbsrtowcs doesn't allow incompletes. */	if (ENCODING == __ctype_encoding_utf8) {		if (!pwc) {			pwc = wcbuf;		}		r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);		return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */	}#endif#ifdef __UCLIBC_MJN3_ONLY__#warning TODO: This adds a trailing nul!#endif /* __UCLIBC_MJN3_ONLY__ */	r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);	if (((ssize_t) r) >= 0) {		if (pwc) {			*pwc = *wcbuf;		}	}	return (size_t) r;}#endif/**********************************************************************/#ifdef L_wcrtomb/* Note: We completely ignore ps in all currently supported conversions. *//* TODO: Check for valid state anyway? */size_t wcrtomb(register char *__restrict s, wchar_t wc,			   mbstate_t *__restrict ps){#ifdef __UCLIBC_MJN3_ONLY__#warning TODO: Should wcsnrtombs nul-terminate unconditionally?  Check glibc.#endif /* __UCLIBC_MJN3_ONLY__ */	wchar_t wcbuf[1];	const wchar_t *pwc;	size_t r;	char buf[MB_LEN_MAX];	if (!s) {		s = buf;		wc = 0;	}	pwc = wcbuf;	wcbuf[0] = wc;	r = __wcsnrtombs(s, &pwc, 1, MB_LEN_MAX, ps);	return (r != 0) ? r : 1;}#endif/**********************************************************************/#ifdef L_mbsrtowcssize_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,				 size_t len, mbstate_t *__restrict ps){	static mbstate_t mbstate;	/* Rely on bss 0-init. */	return __mbsnrtowcs(dst, src, SIZE_MAX, len,						((ps != NULL) ? ps : &mbstate));}#endif/**********************************************************************/#ifdef L_wcsrtombs/* Note: We completely ignore ps in all currently supported conversions. * TODO: Check for valid state anyway? */size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,				 size_t len, mbstate_t *__restrict ps){	return __wcsnrtombs(dst, src, SIZE_MAX, len, ps);}#endif/**********************************************************************/#ifdef L__wchar_utf8sntowcs/* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's * UTF-8-test.txt strss test. *//*  #define DECODER */#ifdef DECODER#ifndef KUHN#define KUHN#endif#endifsize_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,						  const char **__restrict src, size_t n,						  mbstate_t *ps, int allow_continuation){	register const char *s;	__uwchar_t mask;	__uwchar_t wc;	wchar_t wcbuf[1];	size_t count;	int incr;	s = *src;	assert(s != NULL);	assert(ps != NULL);	incr = 1;	/* NOTE: The following is an AWFUL HACK!  In order to support %s in	 * wprintf, we need to be able to compute the number of wchars needed	 * for the mbs conversion, not to exceed the precision specified.	 * But if dst is NULL, the return value is the length assuming a	 * sufficiently sized buffer.  So, we allow passing of (wchar_t *) ps	 * as pwc in order to flag that we really want the length, subject	 * to the restricted buffer size and no partial conversions.	 * See mbsnrtowcs() as well. */	if (!pwc || (pwc == ((wchar_t *)ps))) {		if (!pwc) {			wn = SIZE_MAX;		}		pwc = wcbuf;		incr = 0;	}	/* This is really here only to support the glibc extension function	 * __mbsnrtowcs which apparently returns 0 if wn == 0 without any	 * check on the validity of the mbstate. */	if (!(count = wn)) {		return 0;	}	if ((mask = (__uwchar_t) ps->__mask) != 0) { /* A continuation... */#ifdef DECODER		wc = (__uwchar_t) ps->__wc;		if (n) {			goto CONTINUE;		}		goto DONE;#else		if ((wc = (__uwchar_t) ps->__wc) != 0xffffU) {			/* TODO: change error code here and below? */			if (n) {				goto CONTINUE;			}			goto DONE;		}		__set_errno(EILSEQ);		return (size_t) -1;		/* We're in an error state. */#endif	}	do {		if (!n) {			goto DONE;		}		--n;		if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */			mask = 0x40;#ifdef __UCLIBC_MJN3_ONLY__#warning TODO: Fix range for 16 bit wchar_t case.#endif			if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {				goto START;			}		BAD:#ifdef DECODER			wc = 0xfffdU;			goto COMPLETE;#else			ps->__mask = mask;			ps->__wc = 0xffffU;			__set_errno(EILSEQ);			return (size_t) -1;	/* Illegal start byte! */#endif		CONTINUE:			while (n) {				--n;				if ((*s & 0xc0) != 0x80) {					goto BAD;				}				mask <<= 5;				wc <<= 6;				wc += (*s & 0x3f);	/* keep seperate for bcc (smaller code) */				++s;			START:				wc &= ~(mask << 1);				if ((wc & mask) == 0) {	/* Character completed. */					if ((mask >>= 5) == 0x40) {						mask += mask;					}					/* Check for invalid sequences (longer than necessary)					 * and invalid chars.  */					if ( (wc < mask) /* Sequence not minimal length. */#ifdef KUHN#if UTF_8_MAX_LEN == 3#error broken since mask can overflow!!						 /* For plane 0, these are the only defined values.*/						 || (wc > 0xfffdU)#else						 /* Note that we don't need to worry about exceeding */						 /* 31 bits as that is the most that UTF-8 provides. */						 || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)#endif						 || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )#endif /* KUHN */						 ) {						goto BAD;					}					goto COMPLETE;				}			}			/* Character potentially valid but incomplete. */			if (!allow_continuation) {				if (count != wn) {					return 0;				}				/* NOTE: The following can fail if you allow and then disallow				 * continuation!!! */#if UTF_8_MAX_LEN == 3#error broken since mask can overflow!!#endif				/* Need to back up... */				do {					--s;				} while ((mask >>= 5) >= 0x40);				goto DONE;			}			ps->__mask = (wchar_t) mask;			ps->__wc = (wchar_t) wc;			*src = s;			return (size_t) -2;		}	COMPLETE:		*pwc = wc;		pwc += incr;	}#ifdef DECODER	while (--count);#else	while (wc && --count);	if (!wc) {		s = NULL;	}#endif DONE:	/* ps->__wc is irrelavent here. */	ps->__mask = 0;	if (pwc != wcbuf) {		*src = s;	}	return wn - count;}#endif/**********************************************************************/#ifdef L__wchar_wcsntoutf8s
wchar.c - 源码说明

本页面展示了「Axis 221 camera embedded programing interface」中的 wchar.c 源码文件，采用 C语言编程语言编写，共 1,712 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与programing相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?