iconv.c
来自「在Linux/Unix下面访问WINDOWS SQLSERVER 的ODBC驱动」· C语言 代码 · 共 1,453 行 · 第 1/3 页
C
1,453 行
/* FreeTDS - Library of routines accessing Sybase and Microsoft databases * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Brian Bruns * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. *//* * iconv.c, handle all the conversion stuff without spreading #if HAVE_ICONV_ALWAYS * all over the other code */#if HAVE_CONFIG_H#include <config.h>#endif#include <stdarg.h>#include <stdio.h>#include <assert.h>#if HAVE_STRING_H#include <string.h>#endif /* HAVE_STRING_H */#if HAVE_ERRNO_H#include <errno.h>#endif#include "tds.h"#include "tdsiconv.h"#if HAVE_ICONV#include <iconv.h>#endif#ifdef DMALLOC#include <dmalloc.h>#endif/* define this for now; remove when done testing */#define HAVE_ICONV_ALWAYS 1TDS_RCSID(var, "$Id: iconv.c,v 1.132 2007/01/02 20:47:05 jklowden Exp $");#define CHARSIZE(charset) ( ((charset)->min_bytes_per_char == (charset)->max_bytes_per_char )? \ (charset)->min_bytes_per_char : 0 )#if !HAVE_ICONV_ALWAYSstatic int bytes_per_char(TDS_ENCODING * charset);#endifstatic const char *collate2charset(int sql_collate, int lcid);static int skip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size);static int tds_iconv_info_init(TDSICONV * char_conv, const char *client_name, const char *server_name);static int tds_iconv_init(void);static int tds_canonical_charset(const char *charset_name);static void _iconv_close(iconv_t * cd);static void tds_iconv_info_close(TDSICONV * char_conv);/** * \ingroup libtds * \defgroup conv Charset conversion * Convert between different charsets. */#include "encodings.h"/* this will contain real iconv names */static const char *iconv_names[sizeof(canonic_charsets) / sizeof(canonic_charsets[0])];static int iconv_initialized = 0;static const char *ucs2name;enum{ POS_ISO1, POS_UTF8, POS_UCS2LE, POS_UCS2BE };/** * Initialize charset searching for UTF-8, UCS-2 and ISO8859-1 */static inttds_iconv_init(void){ int i; iconv_t cd; /* first entries should be constants */ assert(strcmp(canonic_charsets[POS_ISO1].name, "ISO-8859-1") == 0); assert(strcmp(canonic_charsets[POS_UTF8].name, "UTF-8") == 0); assert(strcmp(canonic_charsets[POS_UCS2LE].name, "UCS-2LE") == 0); assert(strcmp(canonic_charsets[POS_UCS2BE].name, "UCS-2BE") == 0); /* fast tests for GNU-iconv */ cd = tds_sys_iconv_open("ISO-8859-1", "UTF-8"); if (cd != (iconv_t) - 1) { iconv_names[POS_ISO1] = "ISO-8859-1"; iconv_names[POS_UTF8] = "UTF-8"; tds_sys_iconv_close(cd); } else { /* search names for ISO8859-1 and UTF-8 */ for (i = 0; iconv_aliases[i].alias; ++i) { int j; if (iconv_aliases[i].canonic != POS_ISO1) continue; for (j = 0; iconv_aliases[j].alias; ++j) { if (iconv_aliases[j].canonic != POS_UTF8) continue; cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_aliases[j].alias); if (cd != (iconv_t) - 1) { iconv_names[POS_ISO1] = iconv_aliases[i].alias; iconv_names[POS_UTF8] = iconv_aliases[j].alias; tds_sys_iconv_close(cd); break; } } if (iconv_names[POS_ISO1]) break; } /* required characters not found !!! */ if (!iconv_names[POS_ISO1]) return 1; } /* now search for UCS-2 */ cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2LE"); if (cd != (iconv_t) - 1) { iconv_names[POS_UCS2LE] = "UCS-2LE"; tds_sys_iconv_close(cd); } cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2BE"); if (cd != (iconv_t) - 1) { iconv_names[POS_UCS2BE] = "UCS-2BE"; tds_sys_iconv_close(cd); } /* long search needed ?? */ if (!iconv_names[POS_UCS2LE] || !iconv_names[POS_UCS2BE]) { for (i = 0; iconv_aliases[i].alias; ++i) { if (strncmp(canonic_charsets[iconv_aliases[i].canonic].name, "UCS-2", 5) != 0) continue; cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_names[POS_ISO1]); if (cd != (iconv_t) - 1) { char ib[1]; char ob[4]; size_t il, ol; ICONV_CONST char *pib; char *pob; int byte_sequence = 0; /* try to convert 'A' and check result */ ib[0] = 0x41; pib = ib; pob = ob; il = 1; ol = 4; ob[0] = ob[1] = 0; if (tds_sys_iconv(cd, &pib, &il, &pob, &ol) != (size_t) - 1) { /* byte order sequence ?? */ if (ol == 0) { ob[0] = ob[2]; byte_sequence = 1; /* TODO save somewhere */ } /* save name without sequence (if present) */ if (ob[0]) il = POS_UCS2LE; else il = POS_UCS2BE; if (!iconv_names[il] || !byte_sequence) iconv_names[il] = iconv_aliases[i].alias; } tds_sys_iconv_close(cd); } } } /* we need a UCS-2 (big endian or little endian) */ if (!iconv_names[POS_UCS2LE] && !iconv_names[POS_UCS2BE]) return 2; ucs2name = iconv_names[POS_UCS2LE] ? iconv_names[POS_UCS2LE] : iconv_names[POS_UCS2BE]; for (i = 0; i < 4; ++i) tdsdump_log(TDS_DBG_INFO1, "names for %s: %s\n", canonic_charsets[i].name, iconv_names[i] ? iconv_names[i] : "(null)"); /* success (it should always occurs) */ return 0;}/** * Get iconv name given canonic */static voidtds_get_iconv_name(int charset){ int i; iconv_t cd; assert(iconv_initialized); /* try using canonic name and UTF-8 and UCS2 */ cd = tds_sys_iconv_open(iconv_names[POS_UTF8], canonic_charsets[charset].name); if (cd != (iconv_t) - 1) { iconv_names[charset] = canonic_charsets[charset].name; tds_sys_iconv_close(cd); return; } cd = tds_sys_iconv_open(ucs2name, canonic_charsets[charset].name); if (cd != (iconv_t) - 1) { iconv_names[charset] = canonic_charsets[charset].name; tds_sys_iconv_close(cd); return; } /* try all alternatives */ for (i = 0; iconv_aliases[i].alias; ++i) { if (iconv_aliases[i].canonic != charset) continue; cd = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_aliases[i].alias); if (cd != (iconv_t) - 1) { iconv_names[charset] = iconv_aliases[i].alias; tds_sys_iconv_close(cd); return; } cd = tds_sys_iconv_open(ucs2name, iconv_aliases[i].alias); if (cd != (iconv_t) - 1) { iconv_names[charset] = iconv_aliases[i].alias; tds_sys_iconv_close(cd); return; } } /* charset not found, use memcpy */ iconv_names[charset] = "";}static voidtds_iconv_reset(TDSICONV *conv){ /* * (min|max)_bytes_per_char can be used to divide * so init to safe values */ conv->server_charset.min_bytes_per_char = 1; conv->server_charset.max_bytes_per_char = 1; conv->client_charset.min_bytes_per_char = 1; conv->client_charset.max_bytes_per_char = 1; conv->server_charset.name = conv->client_charset.name = ""; conv->to_wire = (iconv_t) - 1; conv->to_wire2 = (iconv_t) - 1; conv->from_wire = (iconv_t) - 1; conv->from_wire2 = (iconv_t) - 1;}/** * Allocate iconv stuff * \return 0 for success */inttds_iconv_alloc(TDSSOCKET * tds){ int i; TDSICONV *char_conv; assert(!tds->char_convs); if (!(tds->char_convs = (TDSICONV **) malloc(sizeof(TDSICONV *) * (initial_char_conv_count + 1)))) return 1; char_conv = (TDSICONV *) malloc(sizeof(TDSICONV) * initial_char_conv_count); if (!char_conv) { TDS_ZERO_FREE(tds->char_convs); return 1; } memset(char_conv, 0, sizeof(TDSICONV) * initial_char_conv_count); tds->char_conv_count = initial_char_conv_count + 1; for (i = 0; i < initial_char_conv_count; ++i) { tds->char_convs[i] = &char_conv[i]; tds_iconv_reset(&char_conv[i]); } /* chardata is just a pointer to another iconv info */ tds->char_convs[initial_char_conv_count] = tds->char_convs[client2server_chardata]; return 0;}/** * \addtogroup conv * @{ * Set up the initial iconv conversion descriptors. * When the socket is allocated, three TDSICONV structures are attached to iconv. * They have fixed meanings: * \li 0. Client <-> UCS-2 (client2ucs2) * \li 1. Client <-> server single-byte charset (client2server_chardata) * \li 2. ISO8859-1 <-> server meta data (iso2server_metadata) * * Other designs that use less data are possible, but these three conversion needs are * very often needed. By reserving them, we avoid searching the array for our most common purposes. * * To solve different iconv names and portability problems FreeTDS maintains * a list of aliases each charset. * * First we discover the names of our minimum required charsets (UTF-8, ISO8859-1 and UCS2). * Later, as and when it's needed, we try to discover others. * * There is one list of canonic names (GNU iconv names) and two sets of aliases * (one for other iconv implementations and another for Sybase). For every * canonic charset name we cache the iconv name found during discovery. */voidtds_iconv_open(TDSSOCKET * tds, const char *charset){ static const char UCS_2LE[] = "UCS-2LE"; const char *name; int fOK, ret; TDS_ENCODING *client = &tds->char_convs[client2ucs2]->client_charset; TDS_ENCODING *server = &tds->char_convs[client2ucs2]->server_charset;#if !HAVE_ICONV_ALWAYS strcpy(client->name, "ISO-8859-1"); strcpy(server->name, UCS_2LE); bytes_per_char(client); bytes_per_char(server); return;#else /* initialize */ if (!iconv_initialized) { if ((ret = tds_iconv_init()) > 0) { static const char names[][12] = { "ISO 8859-1", "UTF-8" }; assert(ret < 3); tdsdump_log(TDS_DBG_FUNC, "error: tds_iconv_init() returned %d; " "could not find a name for %s that your iconv accepts.\n" "use: \"configure --disable-libiconv\"", ret, names[ret-1]); assert(ret == 0); return; } iconv_initialized = 1; } /* * Client <-> UCS-2 (client2ucs2) */ tdsdump_log(TDS_DBG_FUNC, "iconv to convert client-side data to the \"%s\" character set\n", charset); fOK = tds_iconv_info_init(tds->char_convs[client2ucs2], charset, UCS_2LE); if (!fOK) return; /* * How many UTF-8 bytes we need is a function of what the input character set is. * TODO This could definitely be more sophisticated, but it deals with the common case. */ if (client->min_bytes_per_char == 1 && client->max_bytes_per_char == 4 && server->max_bytes_per_char == 1) { /* ie client is UTF-8 and server is ISO-8859-1 or variant. */ client->max_bytes_per_char = 3; } /* * Client <-> server single-byte charset * TODO: the server hasn't reported its charset yet, so this logic can't work here. * not sure what to do about that yet. */ tds->char_convs[client2server_chardata]->flags = TDS_ENCODING_MEMCPY; if (tds->env.charset) { fOK = tds_iconv_info_init(tds->char_convs[client2server_chardata], charset, tds->env.charset); if (!fOK) return; } /* * ISO8859-1 <-> server meta data */ name = UCS_2LE; if (tds->major_version < 7) { name = "ISO-8859-1"; if (tds->env.charset) name = tds->env.charset; } fOK = tds_iconv_info_init(tds->char_convs[iso2server_metadata], "ISO-8859-1", name);#endif}/** * Open iconv descriptors to convert between character sets (both directions). * 1. Look up the canonical names of the character sets. * 2. Look up their widths. * 3. Ask iconv to open a conversion descriptor. * 4. Fail if any of the above offer any resistance. * \remarks The charset names written to \a iconv will be the canonical names, * not necessarily the names passed in. */static inttds_iconv_info_init(TDSICONV * char_conv, const char *client_name, const char *server_name){ TDS_ENCODING *client = &char_conv->client_charset; TDS_ENCODING *server = &char_conv->server_charset; int server_canonical, client_canonical; assert(client_name && server_name); assert(char_conv->to_wire == (iconv_t) - 1); assert(char_conv->to_wire2 == (iconv_t) - 1); assert(char_conv->from_wire == (iconv_t) - 1); assert(char_conv->from_wire2 == (iconv_t) - 1); client_canonical = tds_canonical_charset(client_name); server_canonical = tds_canonical_charset(server_name); if (client_canonical < 0) { tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: client charset name \"%s\" unrecognized\n", client->name); return 0; } if (server_canonical < 0) { tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: server charset name \"%s\" unrecognized\n", client->name); return 0; } *client = canonic_charsets[client_canonical]; *server = canonic_charsets[server_canonical]; /* special case, same charset, no conversion */ if (client_canonical == server_canonical) { char_conv->to_wire = (iconv_t) - 1; char_conv->from_wire = (iconv_t) - 1; char_conv->flags = TDS_ENCODING_MEMCPY; return 1; } char_conv->flags = 0; if (!iconv_names[server_canonical]) { switch (server_canonical) { case POS_UCS2LE: server_canonical = POS_UCS2BE; char_conv->flags = TDS_ENCODING_SWAPBYTE; break; case POS_UCS2BE: server_canonical = POS_UCS2LE; char_conv->flags = TDS_ENCODING_SWAPBYTE; break; } } /* get iconv names */ if (!iconv_names[client_canonical]) tds_get_iconv_name(client_canonical); if (!iconv_names[server_canonical]) tds_get_iconv_name(server_canonical); /* names available ?? */ if (!iconv_names[client_canonical][0] || !iconv_names[server_canonical][0]) { char_conv->to_wire = (iconv_t) - 1; char_conv->from_wire = (iconv_t) - 1; char_conv->flags = TDS_ENCODING_MEMCPY; tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: use memcpy to convert \"%s\"->\"%s\"\n", client->name, server->name); return 0; } char_conv->to_wire = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[client_canonical]); if (char_conv->to_wire == (iconv_t) - 1) { tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", client->name, server->name); }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?