iconv.c

来自「在Linux/Unix下面访问WINDOWS SQLSERVER 的ODBC驱动」· C语言 代码 · 共 1,453 行 · 第 1/3 页

C
1,453
字号
	int canonic_charset_num = tds_canonical_charset(charset);	const char *canonic_charset;	if (tds->major_version >= 7 && canonic_charset_num == TDS_CHARSET_ISO_8859_1)		canonic_charset_num = TDS_CHARSET_CP1252;	/* ignore request to change to unknown charset */	if (canonic_charset_num < 0) {		tdsdump_log(TDS_DBG_FUNC, "tds_srv_charset_changed: what is charset \"%s\"?\n", charset);		return;	}	canonic_charset = canonic_charsets[canonic_charset_num].name;	tdsdump_log(TDS_DBG_FUNC, "setting server single-byte charset to \"%s\"\n", canonic_charset);	if (strcmp(canonic_charset, char_conv->server_charset.name) == 0)		return;	/* find and set conversion */	char_conv = tds_iconv_get_info(tds, canonic_charset);	if (char_conv)		tds->char_convs[client2server_chardata] = char_conv;	/* if sybase change also server conversions */	if (tds->major_version >= 7)		return;	char_conv = tds->char_convs[iso2server_metadata];	tds_iconv_info_close(char_conv);	tds_iconv_info_init(char_conv, "ISO-8859-1", charset);#endif}/* change singlebyte conversions according to server */voidtds7_srv_charset_changed(TDSSOCKET * tds, int sql_collate, int lcid){	tds_srv_charset_changed(tds, collate2charset(sql_collate, lcid));}#if !HAVE_ICONV_ALWAYS/** * Determine byte/char for an iconv character set.   * \retval 0 failed, no such charset. * \retval 1 succeeded, fixed byte/char. * \retval 2 succeeded, variable byte/char. */static intbytes_per_char(TDS_ENCODING * charset){	int i;	assert(charset && strlen(charset->name) < sizeof(charset->name));	for (i = 0; i < sizeof(canonic_charsets) / sizeof(TDS_ENCODING); i++) {		if (canonic_charsets[i].min_bytes_per_char == 0)			break;		if (0 == strcmp(charset->name, canonic_charsets[i].name)) {			charset->min_bytes_per_char = canonic_charsets[i].min_bytes_per_char;			charset->max_bytes_per_char = canonic_charsets[i].max_bytes_per_char;			return (charset->max_bytes_per_char == charset->min_bytes_per_char) ? 1 : 2;		}	}	return 0;}#endif/** * Move the input sequence pointer to the next valid position. * Used when an input character cannot be converted.   * \returns number of bytes to skip. *//* FIXME possible buffer reading overflow ?? */static intskip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size){	int charsize = CHARSIZE(charset);	char ib[16];	char ob[16];	ICONV_CONST char *pib;	char *pob;	size_t il, ol, l;	iconv_t cd2;	/* usually fixed size and UTF-8 do not have state, so do not reset it */	if (charsize) {		*input += charsize;		*input_size -= charsize;		return charsize;	}	if (0 == strcmp(charset->name, "UTF-8")) {		/*		 * Deal with UTF-8.  		 * bytes | bits | representation		 *     1 |    7 | 0vvvvvvv		 *     2 |   11 | 110vvvvv 10vvvvvv		 *     3 |   16 | 1110vvvv 10vvvvvv 10vvvvvv		 *     4 |   21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv		 */		int c = **input;		c = c & (c >> 1);		do {			++charsize;		} while ((c <<= 1) & 0x80);		*input += charsize;		*input_size -= charsize;		return charsize;	}	/* handle state encoding */	/* extract state from iconv */	pob = ib;	ol = sizeof(ib);	tds_sys_iconv(cd, NULL, NULL, &pob, &ol);	/* init destination conversion */	/* TODO use largest fixed size for this platform */	cd2 = tds_sys_iconv_open("UCS-4", charset->name);	if (cd2 == (iconv_t) - 1)		return 0;	/* add part of input */	il = ol;	if (il > *input_size)		il = *input_size;	l = sizeof(ib) - ol;	memcpy(ib + l, *input, il);	il += l;	/* translate a single character */	pib = ib;	pob = ob;	/* TODO use size of largest fixed charset */	ol = 4;	tds_sys_iconv(cd2, &pib, &il, &pob, &ol);	/* adjust input */	l = (pib - ib) - l;	*input += l;	*input_size -= l;	/* extract state */	pob = ib;	ol = sizeof(ib);	tds_sys_iconv(cd, NULL, NULL, &pob, &ol);	/* set input state */	pib = ib;	il = sizeof(ib) - ol;	pob = ob;	ol = sizeof(ob);	tds_sys_iconv(cd, &pib, &il, &pob, &ol);	tds_sys_iconv_close(cd2);	return l;}static intlookup_canonic(const CHARACTER_SET_ALIAS aliases[], const char *charset_name){	int i;	for (i = 0; aliases[i].alias; ++i) {		if (0 == strcmp(charset_name, aliases[i].alias))			return aliases[i].canonic;	}	return -1;}/** * Determine canonical iconv character set. * \returns canonical position, or -1 if lookup failed. * \remarks Returned name can be used in bytes_per_char(), above. */static inttds_canonical_charset(const char *charset_name){	int res;	/* search in alternative */	res = lookup_canonic(iconv_aliases, charset_name);	if (res >= 0)		return res;	/* search in sybase */	return lookup_canonic(sybase_aliases, charset_name);}/** * Determine canonical iconv character set name.   * \returns canonical name, or NULL if lookup failed. * \remarks Returned name can be used in bytes_per_char(), above. */const char *tds_canonical_charset_name(const char *charset_name){	int res;	/* get numeric pos */	res = tds_canonical_charset(charset_name);	if (res >= 0)		return canonic_charsets[res].name;	return NULL;}/** * Determine the name Sybase uses for a character set, given a canonical iconv name.   * \returns Sybase name, or NULL if lookup failed. * \remarks Returned name can be sent to Sybase a server. */const char *tds_sybase_charset_name(const char *charset_name){	int res, i;	/* search in sybase */	res = lookup_canonic(iconv_aliases, charset_name);	if (res < 0)		return NULL;	/* special case, ignore ascii_8, take iso_1 instead, note index start from 1 */	assert(strcmp(sybase_aliases[0].alias, "ascii_8") == 0);	for (i = 1; sybase_aliases[i].alias; ++i) {		if (sybase_aliases[i].canonic == res)			return sybase_aliases[i].alias;	}	return NULL;}static const char *collate2charset(int sql_collate, int lcid){	/*	 * The table from the MSQLServer reference "Windows Collation Designators" 	 * and from " NLS Information for Microsoft Windows XP"	 */	const char *cp = NULL;	switch (sql_collate) {	case 30:		/* SQL_Latin1_General_CP437_BIN */	case 31:		/* SQL_Latin1_General_CP437_CS_AS */	case 32:		/* SQL_Latin1_General_CP437_CI_AS */	case 33:		/* SQL_Latin1_General_Pref_CP437_CI_AS */	case 34:		/* SQL_Latin1_General_CP437_CI_AI */		return "CP437";	case 40:		/* SQL_Latin1_General_CP850_BIN */	case 41:		/* SQL_Latin1_General_CP850_CS_AS */	case 42:		/* SQL_Latin1_General_CP850_CI_AS */	case 43:		/* SQL_Latin1_General_Pref_CP850_CI_AS */	case 44:		/* SQL_Latin1_General_CP850_CI_AI */	case 49:		/* SQL_1xCompat_CP850_CI_AS */	case 55:		/* SQL_AltDiction_CP850_CS_AS */	case 56:		/* SQL_AltDiction_Pref_CP850_CI_AS */	case 57:		/* SQL_AltDiction_CP850_CI_AI */	case 58:		/* SQL_Scandinavian_Pref_CP850_CI_AS */	case 59:		/* SQL_Scandinavian_CP850_CS_AS */	case 60:		/* SQL_Scandinavian_CP850_CI_AS */	case 61:		/* SQL_AltDiction_CP850_CI_AS */		return "CP850";	case 81:		/* SQL_Latin1_General_CP1250_CS_AS */	case 82:		/* SQL_Latin1_General_CP1250_CI_AS */		return "CP1250";	case 105:		/* SQL_Latin1_General_CP1251_CS_AS */	case 106:		/* SQL_Latin1_General_CP1251_CI_AS */		return "CP1251";	case 113:		/* SQL_Latin1_General_CP1253_CS_AS */	case 114:		/* SQL_Latin1_General_CP1253_CI_AS */	case 120:		/* SQL_MixDiction_CP1253_CS_AS */	case 121:		/* SQL_AltDiction_CP1253_CS_AS */	case 124:		/* SQL_Latin1_General_CP1253_CI_AI */		return "CP1253";	case 137:		/* SQL_Latin1_General_CP1255_CS_AS */	case 138:		/* SQL_Latin1_General_CP1255_CI_AS */		return "CP1255";	case 145:		/* SQL_Latin1_General_CP1256_CS_AS */	case 146:		/* SQL_Latin1_General_CP1256_CI_AS */		return "CP1256";	case 153:		/* SQL_Latin1_General_CP1257_CS_AS */	case 154:		/* SQL_Latin1_General_CP1257_CI_AS */		return "CP1257";	}	switch (lcid & 0xffff) {	case 0x405:	case 0x40e:		/* 0x1040e */	case 0x415:	case 0x418:	case 0x41a:	case 0x41b:	case 0x41c:	case 0x424:		/* case 0x81a: seem wrong in XP table TODO check */	case 0x104e:		/* ?? */		cp = "CP1250";		break;	case 0x402:	case 0x419:	case 0x422:	case 0x423:	case 0x42f:	case 0x43f:	case 0x440:	case 0x444:	case 0x450:	case 0x81a:		/* ?? */	case 0x82c:	case 0x843:	case 0xc1a:		cp = "CP1251";		break;	case 0x1007:	case 0x1009:	case 0x100a:	case 0x100c:	case 0x1407:	case 0x1409:	case 0x140a:	case 0x140c:	case 0x1809:	case 0x180a:	case 0x180c:	case 0x1c09:	case 0x1c0a:	case 0x2009:	case 0x200a:	case 0x2409:	case 0x240a:	case 0x2809:	case 0x280a:	case 0x2c09:	case 0x2c0a:	case 0x3009:	case 0x300a:	case 0x3409:	case 0x340a:	case 0x380a:	case 0x3c0a:	case 0x400a:	case 0x403:	case 0x406:	case 0x407:		/* 0x10407 */	case 0x409:	case 0x40a:	case 0x40b:	case 0x40c:	case 0x40f:	case 0x410:	case 0x413:	case 0x414:	case 0x416:	case 0x41d:	case 0x421:	case 0x42d:	case 0x436:	case 0x437:		/* 0x10437 */	case 0x438:		/*case 0x439:  ??? Unicode only */	case 0x43e:	case 0x440a:	case 0x441:	case 0x456:	case 0x480a:	case 0x4c0a:	case 0x500a:	case 0x807:	case 0x809:	case 0x80a:	case 0x80c:	case 0x810:	case 0x813:	case 0x814:	case 0x816:	case 0x81d:	case 0x83e:	case 0xc07:	case 0xc09:	case 0xc0a:	case 0xc0c:		cp = "CP1252";		break;	case 0x408:		cp = "CP1253";		break;	case 0x41f:	case 0x42c:	case 0x443:		cp = "CP1254";		break;	case 0x40d:		cp = "CP1255";		break;	case 0x1001:	case 0x1401:	case 0x1801:	case 0x1c01:	case 0x2001:	case 0x2401:	case 0x2801:	case 0x2c01:	case 0x3001:	case 0x3401:	case 0x3801:	case 0x3c01:	case 0x4001:	case 0x401:	case 0x420:	case 0x429:	case 0x801:	case 0xc01:		cp = "CP1256";		break;	case 0x425:	case 0x426:	case 0x427:	case 0x827:		/* ?? */		cp = "CP1257";		break;	case 0x42a:		cp = "CP1258";		break;	case 0x41e:		cp = "CP874";		break;	case 0x411:		/* 0x10411 */		cp = "CP932";		break;	case 0x1004:	case 0x804:		/* 0x20804 */		cp = "CP936";		break;	case 0x412:		/* 0x10412 */		cp = "CP949";		break;	case 0x1404:	case 0x404:		/* 0x30404 */	case 0xc04:		cp = "CP950";		break;	default:		cp = "CP1252";	}	assert(cp);	return cp;}/** * Get iconv information from a LCID (to support different column encoding under MSSQL2K) */TDSICONV *tds_iconv_from_collate(TDSSOCKET * tds, int sql_collate, int lcid){	const char *charset = collate2charset(sql_collate, lcid);#if ENABLE_EXTRA_CHECKS	assert(strcmp(tds_canonical_charset_name(charset), charset) == 0);#endif	/* same as client (usually this is true, so this improve performance) ? */	if (strcmp(tds->char_convs[client2server_chardata]->server_charset.name, charset) == 0)		return tds->char_convs[client2server_chardata];	return tds_iconv_get_info(tds, charset);}/** @} */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?