📄 charcnv.c

📁 samba-3.0.22.tar.gz 编译smb服务器的源码
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*    Unix SMB/CIFS implementation.   Character set conversion Extensions   Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001   Copyright (C) Andrew Tridgell 2001   Copyright (C) Simo Sorce 2001   Copyright (C) Martin Pool 2003      This program is free software; you can redistribute it and/or modify   it under the terms of the GNU General Public License as published by   the Free Software Foundation; either version 2 of the License, or   (at your option) any later version.      This program is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   GNU General Public License for more details.      You should have received a copy of the GNU General Public License   along with this program; if not, write to the Free Software   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*/#include "includes.h"/* We can parameterize this if someone complains.... JRA. */char lp_failed_convert_char(void){	return '_';}/** * @file * * @brief Character-set conversion routines built on our iconv. *  * @note Samba's internal character set (at least in the 3.0 series) * is always the same as the one for the Unix filesystem.  It is * <b>not</b> necessarily UTF-8 and may be different on machines that * need i18n filenames to be compatible with Unix software.  It does * have to be a superset of ASCII.  All multibyte sequences must start * with a byte with the high bit set. * * @sa lib/iconv.c */static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];static BOOL conv_silent; /* Should we do a debug if the conversion fails ? *//** * Return the name of a charset to give to iconv(). **/static const char *charset_name(charset_t ch){	const char *ret = NULL;	if (ch == CH_UCS2) ret = "UTF-16LE";	else if (ch == CH_UNIX) ret = lp_unix_charset();	else if (ch == CH_DOS) ret = lp_dos_charset();	else if (ch == CH_DISPLAY) ret = lp_display_charset();	else if (ch == CH_UTF8) ret = "UTF8";#if defined(HAVE_NL_LANGINFO) && defined(CODESET)	if (ret && !strcmp(ret, "LOCALE")) {		const char *ln = NULL;#ifdef HAVE_SETLOCALE		setlocale(LC_ALL, "");#endif		ln = nl_langinfo(CODESET);		if (ln) {			/* Check whether the charset name is supported			   by iconv */			smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");			if (handle == (smb_iconv_t) -1) {				DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));				ln = NULL;			} else {				DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));				smb_iconv_close(handle);			}		}		ret = ln;	}#ifdef HAVE_SETLOCALE	/* We set back the locale to C to get ASCII-compatible toupper/lower functions.	   For now we do not need any other POSIX localisations anyway. When we should	   really need localized string functions one day we need to write our own	   ascii_tolower etc.	*/	setlocale(LC_ALL, "C"); #endif#endif	if (!ret || !*ret) ret = "ASCII";	return ret;}void lazy_initialize_conv(void){	static int initialized = False;	if (!initialized) {		initialized = True;		load_case_tables();		init_iconv();	}}/** * Initialize iconv conversion descriptors. * * This is called the first time it is needed, and also called again * every time the configuration is reloaded, because the charset or * codepage might have changed. **/void init_iconv(void){	int c1, c2;	BOOL did_reload = False;	/* so that charset_name() works we need to get the UNIX<->UCS2 going	   first */	if (!conv_handles[CH_UNIX][CH_UCS2])		conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open(charset_name(CH_UCS2), "ASCII");	if (!conv_handles[CH_UCS2][CH_UNIX])		conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UCS2));	for (c1=0;c1<NUM_CHARSETS;c1++) {		for (c2=0;c2<NUM_CHARSETS;c2++) {			const char *n1 = charset_name((charset_t)c1);			const char *n2 = charset_name((charset_t)c2);			if (conv_handles[c1][c2] &&			    strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&			    strcmp(n2, conv_handles[c1][c2]->to_name) == 0)				continue;			did_reload = True;			if (conv_handles[c1][c2])				smb_iconv_close(conv_handles[c1][c2]);			conv_handles[c1][c2] = smb_iconv_open(n2,n1);			if (conv_handles[c1][c2] == (smb_iconv_t)-1) {				DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",					 charset_name((charset_t)c1), charset_name((charset_t)c2)));				if (c1 != CH_UCS2) {					n1 = "ASCII";				}				if (c2 != CH_UCS2) {					n2 = "ASCII";				}				DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",					n1, n2 ));				conv_handles[c1][c2] = smb_iconv_open(n2,n1);				if (!conv_handles[c1][c2]) {					DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));					smb_panic("init_iconv: conv_handle initialization failed.");				}			}		}	}	if (did_reload) {		/* XXX: Does this really get called every time the dos		 * codepage changes? */		/* XXX: Is the did_reload test too strict? */		conv_silent = True;		init_doschar_table();		init_valid_table();		conv_silent = False;	}}/** * Convert string from one encoding to another, making error checking etc * Slow path version - uses (slow) iconv. * * @param src pointer to source string (multibyte or singlebyte) * @param srclen length of the source string in bytes * @param dest pointer to destination string (multibyte or singlebyte) * @param destlen maximal length allowed for string * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors) * @returns the number of bytes occupied in the destination * * Ensure the srclen contains the terminating zero. * **/static size_t convert_string_internal(charset_t from, charset_t to,		      void const *src, size_t srclen, 		      void *dest, size_t destlen, BOOL allow_bad_conv){	size_t i_len, o_len;	size_t retval;	const char* inbuf = (const char*)src;	char* outbuf = (char*)dest;	smb_iconv_t descriptor;	lazy_initialize_conv();	descriptor = conv_handles[from][to];	if (srclen == (size_t)-1) {		if (from == CH_UCS2) {			srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;		} else {			srclen = strlen((const char *)src)+1;		}	}	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {		if (!conv_silent)			DEBUG(0,("convert_string_internal: Conversion not supported.\n"));		return (size_t)-1;	}	i_len=srclen;	o_len=destlen; again:	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);	if(retval==(size_t)-1) {	    	const char *reason="unknown error";		switch(errno) {			case EINVAL:				reason="Incomplete multibyte sequence";				if (!conv_silent)					DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));				if (allow_bad_conv)					goto use_as_is;				break;			case E2BIG:				reason="No more room"; 				if (!conv_silent) {					if (from == CH_UNIX) {						DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",							charset_name(from), charset_name(to),							(unsigned int)srclen, (unsigned int)destlen, (const char *)src));					} else {						DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",							charset_name(from), charset_name(to),							(unsigned int)srclen, (unsigned int)destlen));					}				}				break;			case EILSEQ:				reason="Illegal multibyte sequence";				if (!conv_silent)					DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));				if (allow_bad_conv)					goto use_as_is;				break;			default:				if (!conv_silent)					DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));				break;		}		/* smb_panic(reason); */	}	return destlen-o_len; use_as_is:	/* 	 * Conversion not supported. This is actually an error, but there are so	 * many misconfigured iconv systems and smb.conf's out there we can't just	 * fail. Do a very bad conversion instead.... JRA.	 */	{		if (o_len == 0 || i_len == 0)			return destlen - o_len;		if (from == CH_UCS2 && to != CH_UCS2) {			/* Can't convert from ucs2 to multibyte. Replace with the default fail char. */			if (i_len < 2)				return destlen - o_len;			if (i_len >= 2) {				*outbuf = lp_failed_convert_char();				outbuf++;				o_len--;				inbuf += 2;				i_len -= 2;			}			if (o_len == 0 || i_len == 0)				return destlen - o_len;			/* Keep trying with the next char... */			goto again;		} else if (from != CH_UCS2 && to == CH_UCS2) {			/* Can't convert to ucs2 - just widen by adding the default fail char then zero. */			if (o_len < 2)				return destlen - o_len;			outbuf[0] = lp_failed_convert_char();			outbuf[1] = '\0';			inbuf++;			i_len--;			outbuf += 2;			o_len -= 2;			if (o_len == 0 || i_len == 0)				return destlen - o_len;			/* Keep trying with the next char... */			goto again;		} else if (from != CH_UCS2 && to != CH_UCS2) {			/* Failed multibyte to multibyte. Just copy the default fail char and				try again. */			outbuf[0] = lp_failed_convert_char();			inbuf++;			i_len--;			outbuf++;			o_len--;			if (o_len == 0 || i_len == 0)				return destlen - o_len;			/* Keep trying with the next char... */			goto again;		} else {			/* Keep compiler happy.... */			return destlen - o_len;		}	}}/** * Convert string from one encoding to another, making error checking etc * Fast path version - handles ASCII first. * * @param src pointer to source string (multibyte or singlebyte) * @param srclen length of the source string in bytes, or -1 for nul terminated. * @param dest pointer to destination string (multibyte or singlebyte) * @param destlen maximal length allowed for string - *NEVER* -1. * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors) * @returns the number of bytes occupied in the destination * * Ensure the srclen contains the terminating zero. * * This function has been hand-tuned to provide a fast path. * Don't change unless you really know what you are doing. JRA. **/size_t convert_string(charset_t from, charset_t to,		      void const *src, size_t srclen, 		      void *dest, size_t destlen, BOOL allow_bad_conv){	/*	 * NB. We deliberately don't do a strlen here if srclen == -1.	 * This is very expensive over millions of calls and is taken	 * care of in the slow path in convert_string_internal. JRA.	 */#ifdef DEVELOPER	SMB_ASSERT(destlen != (size_t)-1);#endif	if (srclen == 0)		return 0;	if (from != CH_UCS2 && to != CH_UCS2) {		const unsigned char *p = (const unsigned char *)src;		unsigned char *q = (unsigned char *)dest;		size_t slen = srclen;		size_t dlen = destlen;		unsigned char lastp = '\0';		size_t retval = 0;		/* If all characters are ascii, fast path here. */		while (slen && dlen) {			if ((lastp = *p) <= 0x7f) {				*q++ = *p++;				if (slen != (size_t)-1) {					slen--;				}				dlen--;				retval++;				if (!lastp)					break;			} else {#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS				goto general_case;#else				return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);#endif			}		}		if (!dlen) {			/* Even if we fast path we should note if we ran out of room. */			if (((slen != (size_t)-1) && slen) ||					((slen == (size_t)-1) && lastp)) {				errno = E2BIG;			}		}		return retval;	} else if (from == CH_UCS2 && to != CH_UCS2) {		const unsigned char *p = (const unsigned char *)src;		unsigned char *q = (unsigned char *)dest;		size_t retval = 0;		size_t slen = srclen;		size_t dlen = destlen;		unsigned char lastp = '\0';		/* If all characters are ascii, fast path here. */		while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {			if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {				*q++ = *p;				if (slen != (size_t)-1) {					slen -= 2;				}				p += 2;				dlen--;				retval++;				if (!lastp)					break;			} else {#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS				goto general_case;#else				return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);#endif			}		}		if (!dlen) {			/* Even if we fast path we should note if we ran out of room. */			if (((slen != (size_t)-1) && slen) ||					((slen == (size_t)-1) && lastp)) {				errno = E2BIG;			}		}		return retval;	} else if (from != CH_UCS2 && to == CH_UCS2) {		const unsigned char *p = (const unsigned char *)src;		unsigned char *q = (unsigned char *)dest;		size_t retval = 0;		size_t slen = srclen;		size_t dlen = destlen;		unsigned char lastp = '\0';		/* If all characters are ascii, fast path here. */		while (slen && (dlen >= 2)) {			if ((lastp = *p) <= 0x7F) {				*q++ = *p++;				*q++ = '\0';				if (slen != (size_t)-1) {					slen--;				}				dlen -= 2;
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -