⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wchar.c

📁 它通过提供glibc兼容使得应用程序移植到较小的c 库时相当得容易. 它能够应用到带虚拟存储的Linux和uClinux上.在大多数带MMU部件的平台上为使它更加紧凑,它也能够编译成共享库.uClib
💻 C
📖 第 1 页 / 共 3 页
字号:
typedef struct {	mbstate_t tostate;	mbstate_t fromstate;	int tocodeset;	int fromcodeset;	int frombom;	int tobom;	int fromcodeset0;	int frombom0;	int tobom0;	int skip_invalid_input;		/* To support iconv -c option. */} _UC_iconv_t;#ifdef L_iconv#include <iconv.h>#include <string.h>#include <endian.h>#include <byteswap.h>#if (__BYTE_ORDER != __BIG_ENDIAN) && (__BYTE_ORDER != __LITTLE_ENDIAN)#error unsupported endianness for iconv#endif#ifndef __CTYPE_HAS_8_BIT_LOCALES#error currently iconv requires 8 bit locales#endif#ifndef __CTYPE_HAS_UTF_8_LOCALES#error currently iconv requires UTF-8 locales#endifenum {	IC_WCHAR_T = 0xe0,	IC_MULTIBYTE = 0xe0,#if __BYTE_ORDER == __BIG_ENDIAN	IC_UCS_4 =	0xec,	IC_UTF_32 = 0xe4,	IC_UCS_2 =	0xe2,	IC_UTF_16 = 0xea,#else	IC_UCS_4 =	0xed,	IC_UTF_32 = 0xe5,	IC_UCS_2 =	0xe3,	IC_UTF_16 = 0xeb,#endif	IC_UTF_8 = 2,	IC_ASCII = 1};/* For the multibyte * bit 0 means swap endian * bit 1 means 2 byte * bit 2 means 4 byte * */const unsigned char codesets[] =	"\x0a\xe0""WCHAR_T\x00"		/* superset of UCS-4 but platform-endian */#if __BYTE_ORDER == __BIG_ENDIAN	"\x08\xec""UCS-4\x00"		/* always BE */	"\x0a\xec""UCS-4BE\x00"	"\x0a\xed""UCS-4LE\x00"	"\x09\fe4""UTF-32\x00"		/* platform endian with BOM */	"\x0b\xe4""UTF-32BE\x00"	"\x0b\xe5""UTF-32LE\x00"	"\x08\xe2""UCS-2\x00"		/* always BE */	"\x0a\xe2""UCS-2BE\x00"	"\x0a\xe3""UCS-2LE\x00"	"\x09\xea""UTF-16\x00"		/* platform endian with BOM */	"\x0b\xea""UTF-16BE\x00"	"\x0b\xeb""UTF-16LE\x00"#elif __BYTE_ORDER == __LITTLE_ENDIAN	"\x08\xed""UCS-4\x00"		/* always BE */	"\x0a\xed""UCS-4BE\x00"	"\x0a\xec""UCS-4LE\x00"	"\x09\xf4""UTF-32\x00"		/* platform endian with BOM */	"\x0b\xe5""UTF-32BE\x00"	"\x0b\xe4""UTF-32LE\x00"	"\x08\xe3""UCS-2\x00"		/* always BE */	"\x0a\xe3""UCS-2BE\x00"	"\x0a\xe2""UCS-2LE\x00"	"\x09\xfa""UTF-16\x00"		/* platform endian with BOM */	"\x0b\xeb""UTF-16BE\x00"	"\x0b\xea""UTF-16LE\x00"#endif	"\x08\x02""UTF-8\x00"	"\x0b\x01""US-ASCII\x00"	"\x07\x01""ASCII";			/* Must be last! (special case to save a nul) */static int find_codeset(const char *name){	const unsigned char *s;	int codeset;	for (s = codesets ; *s ; s += *s) {		if (!strcasecmp(s+2, name)) {			return s[1];		}	}	/* The following is ripped from find_locale in locale.c. */	/* TODO: maybe CODESET_LIST + *s ??? */	/* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */	codeset = 2;	s = CODESET_LIST;	do {		++codeset;		/* Increment codeset first. */		if (!strcasecmp(CODESET_LIST+*s, name)) {			return codeset;		}	} while (*++s);	return 0;			/* No matching codeset! */}iconv_t iconv_open(const char *tocode, const char *fromcode){	register _UC_iconv_t *px;	int tocodeset, fromcodeset;	if (((tocodeset = find_codeset(tocode)) != 0)		&& ((fromcodeset = find_codeset(fromcode)) != 0)) {		if ((px = malloc(sizeof(_UC_iconv_t))) != NULL) {			px->tocodeset = tocodeset;			px->tobom0 = px->tobom = (tocodeset & 0x10) >> 4;			px->fromcodeset0 = px->fromcodeset = fromcodeset;			px->frombom0 = px->frombom = (fromcodeset & 0x10) >> 4;			px->skip_invalid_input = px->tostate.mask = px->fromstate.mask = 0;			return (iconv_t) px;		}	} else {		__set_errno(EINVAL);	}	return (iconv_t)(-1);}int iconv_close(iconv_t cd){	free(cd);	return 0;}size_t iconv(iconv_t cd, char **__restrict inbuf,			 size_t *__restrict inbytesleft,		     char **__restrict outbuf, size_t *__restrict outbytesleft){	_UC_iconv_t *px = (_UC_iconv_t *) cd;	size_t nrcount, r;	wchar_t wc, wc2;	int inci, inco;	assert(px != (_UC_iconv_t *)(-1));	assert(sizeof(wchar_t) == 4);	if (!inbuf || !*inbuf) {	/* Need to reinitialze conversion state. */		/* Note: For shift-state encodings we possibly need to output the		 * shift sequence to return to initial state! */		if ((px->fromcodeset & 0xf0) == 0xe0) {		}		px->tostate.mask = px->fromstate.mask = 0;		px->fromcodeset = px->fromcodeset0;		px->tobom = px->tobom0;		px->frombom = px->frombom0;		return 0;	}	nrcount = 0;	while (*inbytesleft) {		if (!*outbytesleft) {		TOO_BIG:			__set_errno(E2BIG);			return (size_t) -1;		}		inci = inco = 1;		if (px->fromcodeset >= IC_MULTIBYTE) {			inci = (px->fromcodeset == IC_WCHAR_T) ? 4: (px->fromcodeset & 6);			if (*inbytesleft < inci) goto INVALID;			wc = (((unsigned int)((unsigned char)((*inbuf)[0]))) << 8)				+ ((unsigned char)((*inbuf)[1]));			if (inci == 4) {				wc = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)					+ ((unsigned char)((*inbuf)[3])) + (wc << 16);				if (!(px->fromcodeset & 1)) wc = bswap_32(wc);			} else {				if (!(px->fromcodeset & 1)) wc = bswap_16(wc);				if (((px->fromcodeset & IC_UTF_16) == IC_UTF_16)					 && (((__uwchar_t)(wc - 0xd800U)) < (0xdc00U - 0xd800U))					) {			/* surrogate */					wc =- 0xd800U;					if (*inbytesleft < 4) goto INVALID;					wc2 = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)						+ ((unsigned char)((*inbuf)[3]));					if (!(px->fromcodeset & 1)) wc = bswap_16(wc2);					if (((__uwchar_t)(wc2 -= 0xdc00U)) < (0xe0000U - 0xdc00U)) {						goto ILLEGAL;					}					inci = 4;	/* Change inci here in case skipping illegals. */					wc = 0x10000UL + (wc << 10) + wc2;				}			}			if (px->frombom) {				px->frombom = 0;				if ((wc == 0xfeffU)					|| (wc == ((inci == 4)							   ? (((wchar_t) 0xfffe0000UL))							   : ((wchar_t)(0xfffeUL))))					) {					if (wc != 0xfeffU) {						px->fromcodeset ^= 1; /* toggle endianness */						wc = 0xfeffU;					}					if (!px->frombom) {						goto BOM_SKIP_OUTPUT;					}					goto GOT_BOM;				}			}			if (px->fromcodeset != IC_WCHAR_T) {				if (((__uwchar_t) wc) > (((px->fromcodeset & IC_UCS_4) == IC_UCS_4)										 ? 0x7fffffffUL : 0x10ffffUL)#ifdef KUHN					|| (((__uwchar_t)(wc - 0xfffeU)) < 2)					|| (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))#endif					) {					goto ILLEGAL;				}			}		} else if (px->fromcodeset == IC_UTF_8) {			const char *p = *inbuf;			r = _wchar_utf8sntowcs(&wc, 1, &p, *inbytesleft, &px->fromstate, 0);			if (((ssize_t) r) <= 0) { /* either EILSEQ or incomplete or nul */				if (((ssize_t) r) < 0) { /* either EILSEQ or incomplete or nul */					assert((r == (size_t)(-1)) || (r == (size_t)(-2)));					if (r == (size_t)(-2)) {					INVALID:						__set_errno(EINVAL);					} else {						px->fromstate.mask = 0;						inci = 1;					ILLEGAL:						if (px->skip_invalid_input) {							px->skip_invalid_input = 2;	/* flag for iconv utility */							goto BOM_SKIP_OUTPUT;						}						__set_errno(EILSEQ);					}					return (size_t)(-1);				}#ifdef __UCLIBC_MJN3_ONLY__#warning optimize this#endif				if (p != NULL) { /* incomplet char case */					goto INVALID;				}				p = *inbuf + 1;	/* nul */			}			inci = p - *inbuf;		} else if ((wc = ((unsigned char)(**inbuf))) >= 0x80) {	/* Non-ASCII... */			if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */				goto ILLEGAL;			} else {			/* some other 8-bit ascii-extension codeset */				const codeset_8_bit_t *c8b					= __locale_mmap->codeset_8_bit + px->fromcodeset - 3;				wc -= 0x80;				wc = __global_locale.tbl8c2wc[							 (c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]							  << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];				if (!wc) {					goto ILLEGAL;				}			}		}		if (px->tobom) {			inci = 0;			wc = 0xfeffU;	GOT_BOM:			px->tobom = 0;		}		if (px->tocodeset >= IC_MULTIBYTE) {			inco = (px->tocodeset == IC_WCHAR_T) ? 4: (px->tocodeset & 6);			if (*outbytesleft < inci) goto TOO_BIG;			if (px->tocodeset != IC_WCHAR_T) {				if (((__uwchar_t) wc) > (((px->tocodeset & IC_UCS_4) == IC_UCS_4)										 ? 0x7fffffffUL : 0x10ffffUL)#ifdef KUHN					|| (((__uwchar_t)(wc - 0xfffeU)) < 2)					|| (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))#endif					) {				REPLACE_32:					wc = 0xfffd;					++nrcount;				}			}			if (inco == 4) {				if (px->tocodeset & 1) wc = bswap_32(wc);			} else {				if (((__uwchar_t)wc ) > 0xffffU) {					if ((px->tocodeset & IC_UTF_16) != IC_UTF_16) {						goto REPLACE_32;					}					if (*outbytesleft < (inco = 4)) goto TOO_BIG;					wc2 = 0xdc00U + (wc & 0x3ff);					wc = 0xd800U + ((wc >> 10) & 0x3ff);					if (px->tocodeset & 1) {						wc = bswap_16(wc);						wc2 = bswap_16(wc2);					}					wc += (wc2 << 16);				} else if (px->tocodeset & 1) wc = bswap_16(wc);			}							(*outbuf)[0] = (char)((unsigned char)(wc));			(*outbuf)[1] = (char)((unsigned char)(wc >> 8));			if (inco == 4) {				(*outbuf)[2] = (char)((unsigned char)(wc >> 16));				(*outbuf)[3] = (char)((unsigned char)(wc >> 24));			}		} else if (px->tocodeset == IC_UTF_8) {			const wchar_t *pw = &wc;			do {				r = _wchar_wcsntoutf8s(*outbuf, *outbytesleft, &pw, 1);				if (r != (size_t)(-1)) {#ifdef __UCLIBC_MJN3_ONLY__#warning what happens for a nul?#endif					if (r == 0) {						if (wc != 0) {							goto TOO_BIG;						}						++r;					}					break;				}				wc = 0xfffdU;				++nrcount;			} while (1);			inco = r;		} else if (((__uwchar_t)(wc)) < 0x80) {		CHAR_GOOD:				**outbuf = wc;		} else {			if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) {				const codeset_8_bit_t *c8b					= __locale_mmap->codeset_8_bit + px->tocodeset - 3;				__uwchar_t u;				u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)];				u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)						 + ((wc >> Cwc2c_TT_SHIFT)							& ((1 << Cwc2c_TI_SHIFT)-1))];				wc = __global_locale.tbl8wc2c[Cwc2c_TI_LEN						 + (u << Cwc2c_TT_SHIFT)						 + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];				if (wc) {					goto CHAR_GOOD;				}			}			**outbuf = '?';			++nrcount;		}		*outbuf += inco;		*outbytesleft -= inco;	BOM_SKIP_OUTPUT:		*inbuf += inci;		*inbytesleft -= inci;	}	return nrcount;}#endif/**********************************************************************/#ifdef L_iconv_main#include <stdio.h>#include <stdlib.h>#include <string.h>#include <wchar.h>#include <iconv.h>#include <stdarg.h>#include <libgen.h>extern const unsigned char codesets[];#define IBUF BUFSIZ#define OBUF BUFSIZchar *progname;int hide_errors;static void error_msg(const char *fmt, ...) 	 __attribute__ ((noreturn, format (printf, 1, 2)));static void error_msg(const char *fmt, ...) {	va_list arg;	if (!hide_errors) {		fprintf(stderr, "%s: ", progname);		va_start(arg, fmt);		vfprintf(stderr, fmt, arg);		va_end(arg);	}	exit(EXIT_FAILURE);}int main(int argc, char **argv){	FILE *ifile;	FILE *ofile = stdout;	const char *p;	const char *s;	static const char opt_chars[] = "tfocsl";	                              /* 012345 */	const char *opts[sizeof(opt_chars)]; /* last is infile name */	iconv_t ic;	char ibuf[IBUF];	char obuf[OBUF];	char *pi;	char *po;	size_t ni, no, r, pos;	hide_errors = 0;	for (s = opt_chars ; *s ; s++) {		opts[ s - opt_chars ] = NULL;	}	progname = *argv;	while (--argc) {		p = *++argv;		if ((*p != '-') || (*++p == 0)) {			break;		}		do {			if ((s = strchr(opt_chars,*p)) == NULL) {			USAGE:				s = basename(progname);				fprintf(stderr,						"%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"						"  or\n%s -l\n", s, s);				return EXIT_FAILURE;			}			if ((s - opt_chars) < 3) {				if ((--argc == 0) || opts[s - opt_chars]) {					goto USAGE;				}				opts[s - opt_chars] = *++argv;			} else {				opts[s - opt_chars] = p;			}		} while (*++p);	}	if (opts[5]) {				/* -l */		fprintf(stderr, "Recognized codesets:\n");		for (s = codesets ; *s ; s += *s) {			fprintf(stderr,"  %s\n", s+2);		}		s = CODESET_LIST;		do {			fprintf(stderr,"  %s\n", CODESET_LIST+ (unsigned char)(*s));		} while (*++s);		return EXIT_SUCCESS;	}	if (opts[4]) {		hide_errors = 1;	}	if (!opts[0] || !opts[1]) {		goto USAGE;	}	if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) {		error_msg( "unsupported codeset in %s -> %s conversion\n", opts[0], opts[1]);	}	if (opts[3]) {				/* -c */		((_UC_iconv_t *) ic)->skip_invalid_input = 1;	}	if ((s = opts[2]) != NULL) {		if (!(ofile = fopen(s, "w"))) {			error_msg( "couldn't open %s for writing\n", s);		}	}	pos = ni = 0;	do {		if (!argc || ((**argv == '-') && !((*argv)[1]))) {			ifile = stdin;		/* we don't check for duplicates */		} else if (!(ifile = fopen(*argv, "r"))) {			error_msg( "couldn't open %s for reading\n", *argv);		}		while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) {			pos += r;			ni += r;			no = OBUF;			pi = ibuf;			po = obuf;			if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) {				if ((errno != EINVAL) && (errno != E2BIG)) {					error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni));				}			}			if ((r = OBUF - no) > 0) {				if (fwrite(obuf, 1, OBUF - no, ofile) < r) {					error_msg( "write error\n");				}			}			if (ni) {			/* still bytes in buffer! */				memmove(ibuf, pi, ni);			}		}		if (ferror(ifile)) {			error_msg( "read error\n");		}		++argv;		if (ifile != stdin) {			fclose(ifile);		}	} while (--argc > 0);	iconv_close(ic);	if (ni) {		error_msg( "incomplete sequence\n");	}	return (((_UC_iconv_t *) ic)->skip_invalid_input < 2)		? EXIT_SUCCESS : EXIT_FAILURE;}#endif/**********************************************************************/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -