iconv.c

来自「在Linux/Unix下面访问WINDOWS SQLSERVER 的ODBC驱动」· C语言 代码 · 共 1,453 行 · 第 1/3 页

C
1,453
字号
	char_conv->from_wire = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[server_canonical]);	if (char_conv->from_wire == (iconv_t) - 1) {		tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", server->name, client->name);	}	/* try indirect conversions */	if (char_conv->to_wire == (iconv_t) - 1 || char_conv->from_wire == (iconv_t) - 1) {		tds_iconv_info_close(char_conv);		/* TODO reuse some conversion, client charset is usually constant in all connection (or ISO8859-1) */		char_conv->to_wire = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[client_canonical]);		char_conv->to_wire2 = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[POS_UTF8]);		char_conv->from_wire = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[server_canonical]);		char_conv->from_wire2 = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[POS_UTF8]);		if (char_conv->to_wire == (iconv_t) - 1 || char_conv->to_wire2 == (iconv_t) - 1		    || char_conv->from_wire == (iconv_t) - 1 || char_conv->from_wire2 == (iconv_t) - 1) {			tds_iconv_info_close(char_conv);			tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\" indirectly\n",				    server->name, client->name);			return 0;		}		char_conv->flags |= TDS_ENCODING_INDIRECT;	}		/* TODO, do some optimizations like UCS2 -> UTF8 min,max = 2,2 (UCS2) and 1,4 (UTF8) */	tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: converting \"%s\"->\"%s\"\n", client->name, server->name);	return 1;}#if HAVE_ICONV_ALWAYSstatic void_iconv_close(iconv_t * cd){	static const iconv_t invalid = (iconv_t) - 1;	if (*cd != invalid) {		tds_sys_iconv_close(*cd);		*cd = invalid;	}}static voidtds_iconv_info_close(TDSICONV * char_conv){	_iconv_close(&char_conv->to_wire);	_iconv_close(&char_conv->to_wire2);	_iconv_close(&char_conv->from_wire);	_iconv_close(&char_conv->from_wire2);}#endifvoidtds_iconv_close(TDSSOCKET * tds){#if HAVE_ICONV_ALWAYS	int i;	for (i = 0; i < tds->char_conv_count; ++i) {		tds_iconv_info_close(tds->char_convs[i]);	}#endif}#define CHUNK_ALLOC 4voidtds_iconv_free(TDSSOCKET * tds){	int i;	if (!tds->char_convs)		return;	tds_iconv_close(tds);	free(tds->char_convs[0]);	for (i = initial_char_conv_count + 1; i < tds->char_conv_count; i += CHUNK_ALLOC)		free(tds->char_convs[i]);	TDS_ZERO_FREE(tds->char_convs);	tds->char_conv_count = 0;}/**  * Wrapper around iconv(3).  Same parameters, with slightly different behavior. * \param tds state information for the socket and the TDS protocol * \param io Enumerated value indicating whether the data are being sent to or received from the server.  * \param conv information about the encodings involved, including the iconv(3) conversion descriptors.  * \param inbuf address of pointer to the input buffer of data to be converted.   * \param inbytesleft address of count of bytes in \a inbuf. * \param outbuf address of pointer to the output buffer.   * \param outbytesleft address of count of bytes in \a outbuf. * \retval number of irreversible conversions performed.  -1 on error, see iconv(3) documentation for  * a description of the possible values of \e errno.   * \remarks Unlike iconv(3), none of the arguments can be nor point to NULL.  Like iconv(3), all pointers will  *  	be updated.  Success is signified by a nonnegative return code and \a *inbytesleft == 0.   * 	If the conversion descriptor in \a iconv is -1 or NULL, \a inbuf is copied to \a outbuf,  *	and all parameters updated accordingly.  *  * 	If a character in \a inbuf cannot be converted because no such cbaracter exists in the * 	\a outbuf character set, we emit messages similar to the ones Sybase emits when it fails such a conversion.  * 	The message varies depending on the direction of the data.   * 	On a read error, we emit Msg 2403, Severity 16 (EX_INFO): * 		"WARNING! Some character(s) could not be converted into client's character set.  *			Unconverted bytes were changed to question marks ('?')." * 	On a write error we emit Msg 2402, Severity 16 (EX_USER): *		"Error converting client characters into server's character set. Some character(s) could not be converted." *  	  and return an error code.  Client libraries relying on this routine should reflect an error back to the application.   * 	 * \todo Check for variable multibyte non-UTF-8 input character set.   * \todo Use more robust error message generation.   * \todo For reads, cope with \a outbuf encodings that don't have the equivalent of an ASCII '?'.   * \todo Support alternative to '?' for the replacement character.   */size_ttds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io,	  const char **inbuf, size_t * inbytesleft, char **outbuf, size_t * outbytesleft){	static const iconv_t invalid = (iconv_t) - 1;	const TDS_ENCODING *input_charset = NULL;	const char *output_charset_name = NULL;	iconv_t cd = invalid, cd2 = invalid;	iconv_t error_cd = invalid;	char quest_mark[] = "?";	/* best to leave non-const; implementations vary */	ICONV_CONST char *pquest_mark = quest_mark;	size_t lquest_mark;	size_t irreversible;	char one_character;	char *p;	int eilseq_raised = 0;	/* cast away const-ness */	TDS_ERRNO_MESSAGE_FLAGS *suppress = (TDS_ERRNO_MESSAGE_FLAGS*) &conv->suppress;	assert(inbuf && inbytesleft && outbuf && outbytesleft);	switch (io) {	case to_server:		cd = conv->to_wire;		cd2 = conv->to_wire2;		input_charset = &conv->client_charset;		output_charset_name = conv->server_charset.name;		break;	case to_client:		cd = conv->from_wire;		cd2 = conv->from_wire2;		input_charset = &conv->server_charset;		output_charset_name = conv->client_charset.name;		break;	default:		tdsdump_log(TDS_DBG_FUNC, "tds_iconv: unable to determine if %d means in or out.  \n", io);		assert(io == to_server || io == to_client);		break;	}	/* silly case, memcpy */	if (conv->flags & TDS_ENCODING_MEMCPY || cd == invalid) {		size_t len = *inbytesleft < *outbytesleft ? *inbytesleft : *outbytesleft;		memcpy(*outbuf, *inbuf, len);		errno = *inbytesleft > *outbytesleft ? E2BIG : 0;		*inbytesleft -= len;		*outbytesleft -= len;		*inbuf += len;		*outbuf += len;		return 0;	}	/*	 * Call iconv() as many times as necessary, until we reach the end of input or exhaust output.  	 */	errno = 0;	p = *outbuf;	for (;;) {		if (conv->flags & TDS_ENCODING_INDIRECT) {#if ENABLE_EXTRA_CHECKS			char tmp[8];#else			char tmp[128];#endif			char *pb = tmp;			size_t l = sizeof(tmp);			int temp_errno;			size_t temp_irreversible;			temp_irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) inbuf, inbytesleft, &pb, &l);			temp_errno = errno;			/* convert partial */			pb = tmp;			l = sizeof(tmp) - l;			for (;;) {				errno = 0;				irreversible = tds_sys_iconv(cd2, (ICONV_CONST char **) &pb, &l, outbuf, outbytesleft);				if (irreversible != (size_t) - 1) {					if (*inbytesleft)						break;					goto end_loop;				}				/* EINVAL should be impossible, all characters came from previous iconv... */				if (errno == E2BIG || errno == EINVAL)					goto end_loop;				/*				 * error should be EILSEQ, not convertible sequence 				 * skip UTF-8 sequence 				 */				/* avoid infinite recursion */				eilseq_raised = 1;				if (*pb == '?')					goto end_loop;				*pb = (char) 0x80;				while(l && (*pb & 0xC0) == 0x80)					++pb, --l;				--pb;				++l;				*pb = '?';			}			if (temp_errno == E2BIG) {				errno = 0;				continue;			}			errno = temp_errno;			irreversible = temp_irreversible;			break;		} else if (io == to_client && conv->flags & TDS_ENCODING_SWAPBYTE) {			/* swap bytes if necessary */#if ENABLE_EXTRA_CHECKS			char tmp[8];#else			char tmp[128];#endif			char *pib = tmp;			size_t il = *inbytesleft > sizeof(tmp) ? sizeof(tmp) : *inbytesleft;			size_t n;			for (n = 0; n < il; n += 2) {				tmp[n] = (*inbuf)[n + 1];				tmp[n + 1] = (*inbuf)[n];			}			irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) &pib, &il, outbuf, outbytesleft);			il = pib - tmp;			*inbuf += il;			*inbytesleft -= il;			if (irreversible != (size_t) - 1 && *inbytesleft)				continue;		} else {			irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) inbuf, inbytesleft, outbuf, outbytesleft);		}		if (irreversible != (size_t) - 1)			break;		if (errno == EILSEQ)			eilseq_raised = 1;		if (errno != EILSEQ || io != to_client)			break;		/* 		 * Invalid input sequence encountered reading from server. 		 * Skip one input sequence, adjusting pointers. 		 */		one_character = skip_one_input_sequence(cd, input_charset, inbuf, inbytesleft);		if (!one_character)			break;		/* 		 * To replace invalid input with '?', we have to convert a UTF-8 '?' into the output character set.  		 * In unimaginably weird circumstances, this might be impossible.		 * We use UTF-8 instead of ASCII because some implementations 		 * do not convert singlebyte <-> singlebyte.		 */		if (error_cd == invalid) {			error_cd = tds_sys_iconv_open(output_charset_name, iconv_names[POS_UTF8]);			if (error_cd == invalid) {				break;	/* what to do? */			}		}		lquest_mark = 1;		pquest_mark = quest_mark;		p = *outbuf;		irreversible = tds_sys_iconv(error_cd, &pquest_mark, &lquest_mark, outbuf, outbytesleft);		if (irreversible == (size_t) - 1)			break;		if (!*inbytesleft)			break;	}end_loop:		/* swap bytes if necessary */	if (io == to_server && conv->flags & TDS_ENCODING_SWAPBYTE) {		assert((*outbuf - p) % 2 == 0);		for (; p < *outbuf; p += 2) {			char tmp = p[0];			p[0] = p[1];			p[1] = tmp;		}	}	if (eilseq_raised && !suppress->eilseq) {		/* invalid multibyte input sequence encountered */		if (io == to_client) {			if (irreversible == (size_t) - 1) {				tdserror(tds->tds_ctx, tds, TDSEICONV2BIG, 0);			} else {				tdserror(tds->tds_ctx, tds, TDSEICONVI, 0);				errno = 0;			}		} else {			tdserror(tds->tds_ctx, tds, TDSEICONVO, 0);		}		suppress->eilseq = 1;	}	switch (errno) {	case EINVAL:		/* incomplete multibyte sequence is encountered */		if (suppress->einval)			break;		/* in chunk conversion this can mean we end a chunk inside a character */		tdserror(tds->tds_ctx, tds, TDSEICONVAVAIL, 0);		suppress->einval = 1;		break;	case E2BIG:		/* output buffer has no more room */		if (suppress->e2big)			break;		tdserror(tds->tds_ctx, tds, TDSEICONVIU, 0);		suppress->e2big = 1;		break;	default:		break;	}	if (error_cd != invalid) {		tds_sys_iconv_close(error_cd);	}	return irreversible;}/** * Read a data file, passing the data through iconv(). * \return Count of bytes either not read, or read but not converted.  Returns zero on success.   */size_ttds_iconv_fread(iconv_t cd, FILE * stream, size_t field_len, size_t term_len, char *outbuf, size_t * outbytesleft){#ifdef ENABLE_EXTRA_CHECKS	char buffer[16];#else	char buffer[16000];#endif	char *ib;	size_t isize = 0, nonreversible_conversions = 0;	/*	 * If cd isn't valid, it's just an indication that this column needs no conversion.  	 */	if (cd == (iconv_t) - 1) {		assert(field_len <= *outbytesleft);		if (field_len > 0) {			if (1 != fread(outbuf, field_len, 1, stream)) {				return field_len + term_len;	/* unable to read */			}		}		/* prepare to read the terminator and return */		*outbytesleft -= field_len;	/* as iconv would have done */		isize = 0;			/* as iconv would have done */		field_len = 0;			/* as the loop would have done */		goto READ_TERMINATOR;	}		/*	 * Read in chunks.  	 * 	field_len  is the total size to read	 * 	isize	   is the size of the current chunk (which might be the whole thing).	 * They are decremented as they are successfully processed.  	 * On success, we exit the loop with both equal to zero, indicating nothing we	 * were asked to read remains unread.	 */	isize = (sizeof(buffer) < field_len) ? sizeof(buffer) : field_len;	for (ib = buffer; isize && (isize = fread(ib, 1, isize, stream)) > 0;) {		tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: read %u of %u bytes; outbuf has %u left.\n", (unsigned int) isize,			    (unsigned int) field_len, (unsigned int) *outbytesleft);		field_len -= isize;		isize += ib - buffer;		ib = buffer;		nonreversible_conversions += tds_sys_iconv(cd, (ICONV_CONST char **) &ib, &isize, &outbuf, outbytesleft);		if (isize != 0) {			memmove(buffer, ib, isize);			switch (errno) {			case EINVAL:	/* incomplete multibyte sequence encountered in input */				break;			case E2BIG:	/* insufficient room in output buffer */			case EILSEQ:	/* invalid multibyte sequence encountered in input */			default:				/* FIXME: emit message */				tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: error %d: %s.\n", errno, strerror(errno));				break;			}		}		ib = buffer + isize;		isize = sizeof(buffer) - isize;		if (isize > field_len)			isize = field_len;	}		READ_TERMINATOR:	if (term_len > 0 && !feof(stream)) {		isize += term_len;		if (term_len && 1 == fread(buffer, term_len, 1, stream)) {			isize -= term_len;		} else {			tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: cannot read %u-byte terminator\n", (unsigned int) term_len);		}	}	return field_len + isize;}/** * Get a iconv info structure, allocate and initialize if needed */static TDSICONV *tds_iconv_get_info(TDSSOCKET * tds, const char *canonic_charset){	TDSICONV *info;	int i;	/* search a charset from already allocated charsets */	for (i = tds->char_conv_count; --i >= initial_char_conv_count;)		if (strcmp(canonic_charset, tds->char_convs[i]->server_charset.name) == 0)			return tds->char_convs[i];	/* allocate a new iconv structure */	if (tds->char_conv_count % CHUNK_ALLOC == ((initial_char_conv_count + 1) % CHUNK_ALLOC)) {		TDSICONV **p;		TDSICONV *infos;		infos = (TDSICONV *) malloc(sizeof(TDSICONV) * CHUNK_ALLOC);		if (!infos)			return NULL;		p = (TDSICONV **) realloc(tds->char_convs, sizeof(TDSICONV *) * (tds->char_conv_count + CHUNK_ALLOC));		if (!p) {			free(infos);			return NULL;		}		tds->char_convs = p;		memset(infos, 0, sizeof(TDSICONV) * CHUNK_ALLOC);		for (i = 0; i < CHUNK_ALLOC; ++i) {			tds->char_convs[i + tds->char_conv_count] = &infos[i];			tds_iconv_reset(&infos[i]);		}	}	info = tds->char_convs[tds->char_conv_count++];	/* init */	/* TODO test allocation */	tds_iconv_info_init(info, tds->char_convs[client2ucs2]->client_charset.name, canonic_charset);	return info;}/* change singlebyte conversions according to server */voidtds_srv_charset_changed(TDSSOCKET * tds, const char *charset){#if HAVE_ICONV_ALWAYS	TDSICONV *char_conv = tds->char_convs[client2server_chardata];

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?