iconv.c
来自「在Linux/Unix下面访问WINDOWS SQLSERVER 的ODBC驱动」· C语言 代码 · 共 1,453 行 · 第 1/3 页
C
1,453 行
int canonic_charset_num = tds_canonical_charset(charset); const char *canonic_charset; if (tds->major_version >= 7 && canonic_charset_num == TDS_CHARSET_ISO_8859_1) canonic_charset_num = TDS_CHARSET_CP1252; /* ignore request to change to unknown charset */ if (canonic_charset_num < 0) { tdsdump_log(TDS_DBG_FUNC, "tds_srv_charset_changed: what is charset \"%s\"?\n", charset); return; } canonic_charset = canonic_charsets[canonic_charset_num].name; tdsdump_log(TDS_DBG_FUNC, "setting server single-byte charset to \"%s\"\n", canonic_charset); if (strcmp(canonic_charset, char_conv->server_charset.name) == 0) return; /* find and set conversion */ char_conv = tds_iconv_get_info(tds, canonic_charset); if (char_conv) tds->char_convs[client2server_chardata] = char_conv; /* if sybase change also server conversions */ if (tds->major_version >= 7) return; char_conv = tds->char_convs[iso2server_metadata]; tds_iconv_info_close(char_conv); tds_iconv_info_init(char_conv, "ISO-8859-1", charset);#endif}/* change singlebyte conversions according to server */voidtds7_srv_charset_changed(TDSSOCKET * tds, int sql_collate, int lcid){ tds_srv_charset_changed(tds, collate2charset(sql_collate, lcid));}#if !HAVE_ICONV_ALWAYS/** * Determine byte/char for an iconv character set. * \retval 0 failed, no such charset. * \retval 1 succeeded, fixed byte/char. * \retval 2 succeeded, variable byte/char. */static intbytes_per_char(TDS_ENCODING * charset){ int i; assert(charset && strlen(charset->name) < sizeof(charset->name)); for (i = 0; i < sizeof(canonic_charsets) / sizeof(TDS_ENCODING); i++) { if (canonic_charsets[i].min_bytes_per_char == 0) break; if (0 == strcmp(charset->name, canonic_charsets[i].name)) { charset->min_bytes_per_char = canonic_charsets[i].min_bytes_per_char; charset->max_bytes_per_char = canonic_charsets[i].max_bytes_per_char; return (charset->max_bytes_per_char == charset->min_bytes_per_char) ? 1 : 2; } } return 0;}#endif/** * Move the input sequence pointer to the next valid position. * Used when an input character cannot be converted. * \returns number of bytes to skip. *//* FIXME possible buffer reading overflow ?? */static intskip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size){ int charsize = CHARSIZE(charset); char ib[16]; char ob[16]; ICONV_CONST char *pib; char *pob; size_t il, ol, l; iconv_t cd2; /* usually fixed size and UTF-8 do not have state, so do not reset it */ if (charsize) { *input += charsize; *input_size -= charsize; return charsize; } if (0 == strcmp(charset->name, "UTF-8")) { /* * Deal with UTF-8. * bytes | bits | representation * 1 | 7 | 0vvvvvvv * 2 | 11 | 110vvvvv 10vvvvvv * 3 | 16 | 1110vvvv 10vvvvvv 10vvvvvv * 4 | 21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv */ int c = **input; c = c & (c >> 1); do { ++charsize; } while ((c <<= 1) & 0x80); *input += charsize; *input_size -= charsize; return charsize; } /* handle state encoding */ /* extract state from iconv */ pob = ib; ol = sizeof(ib); tds_sys_iconv(cd, NULL, NULL, &pob, &ol); /* init destination conversion */ /* TODO use largest fixed size for this platform */ cd2 = tds_sys_iconv_open("UCS-4", charset->name); if (cd2 == (iconv_t) - 1) return 0; /* add part of input */ il = ol; if (il > *input_size) il = *input_size; l = sizeof(ib) - ol; memcpy(ib + l, *input, il); il += l; /* translate a single character */ pib = ib; pob = ob; /* TODO use size of largest fixed charset */ ol = 4; tds_sys_iconv(cd2, &pib, &il, &pob, &ol); /* adjust input */ l = (pib - ib) - l; *input += l; *input_size -= l; /* extract state */ pob = ib; ol = sizeof(ib); tds_sys_iconv(cd, NULL, NULL, &pob, &ol); /* set input state */ pib = ib; il = sizeof(ib) - ol; pob = ob; ol = sizeof(ob); tds_sys_iconv(cd, &pib, &il, &pob, &ol); tds_sys_iconv_close(cd2); return l;}static intlookup_canonic(const CHARACTER_SET_ALIAS aliases[], const char *charset_name){ int i; for (i = 0; aliases[i].alias; ++i) { if (0 == strcmp(charset_name, aliases[i].alias)) return aliases[i].canonic; } return -1;}/** * Determine canonical iconv character set. * \returns canonical position, or -1 if lookup failed. * \remarks Returned name can be used in bytes_per_char(), above. */static inttds_canonical_charset(const char *charset_name){ int res; /* search in alternative */ res = lookup_canonic(iconv_aliases, charset_name); if (res >= 0) return res; /* search in sybase */ return lookup_canonic(sybase_aliases, charset_name);}/** * Determine canonical iconv character set name. * \returns canonical name, or NULL if lookup failed. * \remarks Returned name can be used in bytes_per_char(), above. */const char *tds_canonical_charset_name(const char *charset_name){ int res; /* get numeric pos */ res = tds_canonical_charset(charset_name); if (res >= 0) return canonic_charsets[res].name; return NULL;}/** * Determine the name Sybase uses for a character set, given a canonical iconv name. * \returns Sybase name, or NULL if lookup failed. * \remarks Returned name can be sent to Sybase a server. */const char *tds_sybase_charset_name(const char *charset_name){ int res, i; /* search in sybase */ res = lookup_canonic(iconv_aliases, charset_name); if (res < 0) return NULL; /* special case, ignore ascii_8, take iso_1 instead, note index start from 1 */ assert(strcmp(sybase_aliases[0].alias, "ascii_8") == 0); for (i = 1; sybase_aliases[i].alias; ++i) { if (sybase_aliases[i].canonic == res) return sybase_aliases[i].alias; } return NULL;}static const char *collate2charset(int sql_collate, int lcid){ /* * The table from the MSQLServer reference "Windows Collation Designators" * and from " NLS Information for Microsoft Windows XP" */ const char *cp = NULL; switch (sql_collate) { case 30: /* SQL_Latin1_General_CP437_BIN */ case 31: /* SQL_Latin1_General_CP437_CS_AS */ case 32: /* SQL_Latin1_General_CP437_CI_AS */ case 33: /* SQL_Latin1_General_Pref_CP437_CI_AS */ case 34: /* SQL_Latin1_General_CP437_CI_AI */ return "CP437"; case 40: /* SQL_Latin1_General_CP850_BIN */ case 41: /* SQL_Latin1_General_CP850_CS_AS */ case 42: /* SQL_Latin1_General_CP850_CI_AS */ case 43: /* SQL_Latin1_General_Pref_CP850_CI_AS */ case 44: /* SQL_Latin1_General_CP850_CI_AI */ case 49: /* SQL_1xCompat_CP850_CI_AS */ case 55: /* SQL_AltDiction_CP850_CS_AS */ case 56: /* SQL_AltDiction_Pref_CP850_CI_AS */ case 57: /* SQL_AltDiction_CP850_CI_AI */ case 58: /* SQL_Scandinavian_Pref_CP850_CI_AS */ case 59: /* SQL_Scandinavian_CP850_CS_AS */ case 60: /* SQL_Scandinavian_CP850_CI_AS */ case 61: /* SQL_AltDiction_CP850_CI_AS */ return "CP850"; case 81: /* SQL_Latin1_General_CP1250_CS_AS */ case 82: /* SQL_Latin1_General_CP1250_CI_AS */ return "CP1250"; case 105: /* SQL_Latin1_General_CP1251_CS_AS */ case 106: /* SQL_Latin1_General_CP1251_CI_AS */ return "CP1251"; case 113: /* SQL_Latin1_General_CP1253_CS_AS */ case 114: /* SQL_Latin1_General_CP1253_CI_AS */ case 120: /* SQL_MixDiction_CP1253_CS_AS */ case 121: /* SQL_AltDiction_CP1253_CS_AS */ case 124: /* SQL_Latin1_General_CP1253_CI_AI */ return "CP1253"; case 137: /* SQL_Latin1_General_CP1255_CS_AS */ case 138: /* SQL_Latin1_General_CP1255_CI_AS */ return "CP1255"; case 145: /* SQL_Latin1_General_CP1256_CS_AS */ case 146: /* SQL_Latin1_General_CP1256_CI_AS */ return "CP1256"; case 153: /* SQL_Latin1_General_CP1257_CS_AS */ case 154: /* SQL_Latin1_General_CP1257_CI_AS */ return "CP1257"; } switch (lcid & 0xffff) { case 0x405: case 0x40e: /* 0x1040e */ case 0x415: case 0x418: case 0x41a: case 0x41b: case 0x41c: case 0x424: /* case 0x81a: seem wrong in XP table TODO check */ case 0x104e: /* ?? */ cp = "CP1250"; break; case 0x402: case 0x419: case 0x422: case 0x423: case 0x42f: case 0x43f: case 0x440: case 0x444: case 0x450: case 0x81a: /* ?? */ case 0x82c: case 0x843: case 0xc1a: cp = "CP1251"; break; case 0x1007: case 0x1009: case 0x100a: case 0x100c: case 0x1407: case 0x1409: case 0x140a: case 0x140c: case 0x1809: case 0x180a: case 0x180c: case 0x1c09: case 0x1c0a: case 0x2009: case 0x200a: case 0x2409: case 0x240a: case 0x2809: case 0x280a: case 0x2c09: case 0x2c0a: case 0x3009: case 0x300a: case 0x3409: case 0x340a: case 0x380a: case 0x3c0a: case 0x400a: case 0x403: case 0x406: case 0x407: /* 0x10407 */ case 0x409: case 0x40a: case 0x40b: case 0x40c: case 0x40f: case 0x410: case 0x413: case 0x414: case 0x416: case 0x41d: case 0x421: case 0x42d: case 0x436: case 0x437: /* 0x10437 */ case 0x438: /*case 0x439: ??? Unicode only */ case 0x43e: case 0x440a: case 0x441: case 0x456: case 0x480a: case 0x4c0a: case 0x500a: case 0x807: case 0x809: case 0x80a: case 0x80c: case 0x810: case 0x813: case 0x814: case 0x816: case 0x81d: case 0x83e: case 0xc07: case 0xc09: case 0xc0a: case 0xc0c: cp = "CP1252"; break; case 0x408: cp = "CP1253"; break; case 0x41f: case 0x42c: case 0x443: cp = "CP1254"; break; case 0x40d: cp = "CP1255"; break; case 0x1001: case 0x1401: case 0x1801: case 0x1c01: case 0x2001: case 0x2401: case 0x2801: case 0x2c01: case 0x3001: case 0x3401: case 0x3801: case 0x3c01: case 0x4001: case 0x401: case 0x420: case 0x429: case 0x801: case 0xc01: cp = "CP1256"; break; case 0x425: case 0x426: case 0x427: case 0x827: /* ?? */ cp = "CP1257"; break; case 0x42a: cp = "CP1258"; break; case 0x41e: cp = "CP874"; break; case 0x411: /* 0x10411 */ cp = "CP932"; break; case 0x1004: case 0x804: /* 0x20804 */ cp = "CP936"; break; case 0x412: /* 0x10412 */ cp = "CP949"; break; case 0x1404: case 0x404: /* 0x30404 */ case 0xc04: cp = "CP950"; break; default: cp = "CP1252"; } assert(cp); return cp;}/** * Get iconv information from a LCID (to support different column encoding under MSSQL2K) */TDSICONV *tds_iconv_from_collate(TDSSOCKET * tds, int sql_collate, int lcid){ const char *charset = collate2charset(sql_collate, lcid);#if ENABLE_EXTRA_CHECKS assert(strcmp(tds_canonical_charset_name(charset), charset) == 0);#endif /* same as client (usually this is true, so this improve performance) ? */ if (strcmp(tds->char_convs[client2server_chardata]->server_charset.name, charset) == 0) return tds->char_convs[client2server_chardata]; return tds_iconv_get_info(tds, charset);}/** @} */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?