iconv.c
来自「在Linux/Unix下面访问WINDOWS SQLSERVER 的ODBC驱动」· C语言 代码 · 共 1,453 行 · 第 1/3 页
C
1,453 行
char_conv->from_wire = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[server_canonical]); if (char_conv->from_wire == (iconv_t) - 1) { tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", server->name, client->name); } /* try indirect conversions */ if (char_conv->to_wire == (iconv_t) - 1 || char_conv->from_wire == (iconv_t) - 1) { tds_iconv_info_close(char_conv); /* TODO reuse some conversion, client charset is usually constant in all connection (or ISO8859-1) */ char_conv->to_wire = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[client_canonical]); char_conv->to_wire2 = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[POS_UTF8]); char_conv->from_wire = tds_sys_iconv_open(iconv_names[POS_UTF8], iconv_names[server_canonical]); char_conv->from_wire2 = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[POS_UTF8]); if (char_conv->to_wire == (iconv_t) - 1 || char_conv->to_wire2 == (iconv_t) - 1 || char_conv->from_wire == (iconv_t) - 1 || char_conv->from_wire2 == (iconv_t) - 1) { tds_iconv_info_close(char_conv); tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\" indirectly\n", server->name, client->name); return 0; } char_conv->flags |= TDS_ENCODING_INDIRECT; } /* TODO, do some optimizations like UCS2 -> UTF8 min,max = 2,2 (UCS2) and 1,4 (UTF8) */ tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: converting \"%s\"->\"%s\"\n", client->name, server->name); return 1;}#if HAVE_ICONV_ALWAYSstatic void_iconv_close(iconv_t * cd){ static const iconv_t invalid = (iconv_t) - 1; if (*cd != invalid) { tds_sys_iconv_close(*cd); *cd = invalid; }}static voidtds_iconv_info_close(TDSICONV * char_conv){ _iconv_close(&char_conv->to_wire); _iconv_close(&char_conv->to_wire2); _iconv_close(&char_conv->from_wire); _iconv_close(&char_conv->from_wire2);}#endifvoidtds_iconv_close(TDSSOCKET * tds){#if HAVE_ICONV_ALWAYS int i; for (i = 0; i < tds->char_conv_count; ++i) { tds_iconv_info_close(tds->char_convs[i]); }#endif}#define CHUNK_ALLOC 4voidtds_iconv_free(TDSSOCKET * tds){ int i; if (!tds->char_convs) return; tds_iconv_close(tds); free(tds->char_convs[0]); for (i = initial_char_conv_count + 1; i < tds->char_conv_count; i += CHUNK_ALLOC) free(tds->char_convs[i]); TDS_ZERO_FREE(tds->char_convs); tds->char_conv_count = 0;}/** * Wrapper around iconv(3). Same parameters, with slightly different behavior. * \param tds state information for the socket and the TDS protocol * \param io Enumerated value indicating whether the data are being sent to or received from the server. * \param conv information about the encodings involved, including the iconv(3) conversion descriptors. * \param inbuf address of pointer to the input buffer of data to be converted. * \param inbytesleft address of count of bytes in \a inbuf. * \param outbuf address of pointer to the output buffer. * \param outbytesleft address of count of bytes in \a outbuf. * \retval number of irreversible conversions performed. -1 on error, see iconv(3) documentation for * a description of the possible values of \e errno. * \remarks Unlike iconv(3), none of the arguments can be nor point to NULL. Like iconv(3), all pointers will * be updated. Success is signified by a nonnegative return code and \a *inbytesleft == 0. * If the conversion descriptor in \a iconv is -1 or NULL, \a inbuf is copied to \a outbuf, * and all parameters updated accordingly. * * If a character in \a inbuf cannot be converted because no such cbaracter exists in the * \a outbuf character set, we emit messages similar to the ones Sybase emits when it fails such a conversion. * The message varies depending on the direction of the data. * On a read error, we emit Msg 2403, Severity 16 (EX_INFO): * "WARNING! Some character(s) could not be converted into client's character set. * Unconverted bytes were changed to question marks ('?')." * On a write error we emit Msg 2402, Severity 16 (EX_USER): * "Error converting client characters into server's character set. Some character(s) could not be converted." * and return an error code. Client libraries relying on this routine should reflect an error back to the application. * * \todo Check for variable multibyte non-UTF-8 input character set. * \todo Use more robust error message generation. * \todo For reads, cope with \a outbuf encodings that don't have the equivalent of an ASCII '?'. * \todo Support alternative to '?' for the replacement character. */size_ttds_iconv(TDSSOCKET * tds, const TDSICONV * conv, TDS_ICONV_DIRECTION io, const char **inbuf, size_t * inbytesleft, char **outbuf, size_t * outbytesleft){ static const iconv_t invalid = (iconv_t) - 1; const TDS_ENCODING *input_charset = NULL; const char *output_charset_name = NULL; iconv_t cd = invalid, cd2 = invalid; iconv_t error_cd = invalid; char quest_mark[] = "?"; /* best to leave non-const; implementations vary */ ICONV_CONST char *pquest_mark = quest_mark; size_t lquest_mark; size_t irreversible; char one_character; char *p; int eilseq_raised = 0; /* cast away const-ness */ TDS_ERRNO_MESSAGE_FLAGS *suppress = (TDS_ERRNO_MESSAGE_FLAGS*) &conv->suppress; assert(inbuf && inbytesleft && outbuf && outbytesleft); switch (io) { case to_server: cd = conv->to_wire; cd2 = conv->to_wire2; input_charset = &conv->client_charset; output_charset_name = conv->server_charset.name; break; case to_client: cd = conv->from_wire; cd2 = conv->from_wire2; input_charset = &conv->server_charset; output_charset_name = conv->client_charset.name; break; default: tdsdump_log(TDS_DBG_FUNC, "tds_iconv: unable to determine if %d means in or out. \n", io); assert(io == to_server || io == to_client); break; } /* silly case, memcpy */ if (conv->flags & TDS_ENCODING_MEMCPY || cd == invalid) { size_t len = *inbytesleft < *outbytesleft ? *inbytesleft : *outbytesleft; memcpy(*outbuf, *inbuf, len); errno = *inbytesleft > *outbytesleft ? E2BIG : 0; *inbytesleft -= len; *outbytesleft -= len; *inbuf += len; *outbuf += len; return 0; } /* * Call iconv() as many times as necessary, until we reach the end of input or exhaust output. */ errno = 0; p = *outbuf; for (;;) { if (conv->flags & TDS_ENCODING_INDIRECT) {#if ENABLE_EXTRA_CHECKS char tmp[8];#else char tmp[128];#endif char *pb = tmp; size_t l = sizeof(tmp); int temp_errno; size_t temp_irreversible; temp_irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) inbuf, inbytesleft, &pb, &l); temp_errno = errno; /* convert partial */ pb = tmp; l = sizeof(tmp) - l; for (;;) { errno = 0; irreversible = tds_sys_iconv(cd2, (ICONV_CONST char **) &pb, &l, outbuf, outbytesleft); if (irreversible != (size_t) - 1) { if (*inbytesleft) break; goto end_loop; } /* EINVAL should be impossible, all characters came from previous iconv... */ if (errno == E2BIG || errno == EINVAL) goto end_loop; /* * error should be EILSEQ, not convertible sequence * skip UTF-8 sequence */ /* avoid infinite recursion */ eilseq_raised = 1; if (*pb == '?') goto end_loop; *pb = (char) 0x80; while(l && (*pb & 0xC0) == 0x80) ++pb, --l; --pb; ++l; *pb = '?'; } if (temp_errno == E2BIG) { errno = 0; continue; } errno = temp_errno; irreversible = temp_irreversible; break; } else if (io == to_client && conv->flags & TDS_ENCODING_SWAPBYTE) { /* swap bytes if necessary */#if ENABLE_EXTRA_CHECKS char tmp[8];#else char tmp[128];#endif char *pib = tmp; size_t il = *inbytesleft > sizeof(tmp) ? sizeof(tmp) : *inbytesleft; size_t n; for (n = 0; n < il; n += 2) { tmp[n] = (*inbuf)[n + 1]; tmp[n + 1] = (*inbuf)[n]; } irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) &pib, &il, outbuf, outbytesleft); il = pib - tmp; *inbuf += il; *inbytesleft -= il; if (irreversible != (size_t) - 1 && *inbytesleft) continue; } else { irreversible = tds_sys_iconv(cd, (ICONV_CONST char **) inbuf, inbytesleft, outbuf, outbytesleft); } if (irreversible != (size_t) - 1) break; if (errno == EILSEQ) eilseq_raised = 1; if (errno != EILSEQ || io != to_client) break; /* * Invalid input sequence encountered reading from server. * Skip one input sequence, adjusting pointers. */ one_character = skip_one_input_sequence(cd, input_charset, inbuf, inbytesleft); if (!one_character) break; /* * To replace invalid input with '?', we have to convert a UTF-8 '?' into the output character set. * In unimaginably weird circumstances, this might be impossible. * We use UTF-8 instead of ASCII because some implementations * do not convert singlebyte <-> singlebyte. */ if (error_cd == invalid) { error_cd = tds_sys_iconv_open(output_charset_name, iconv_names[POS_UTF8]); if (error_cd == invalid) { break; /* what to do? */ } } lquest_mark = 1; pquest_mark = quest_mark; p = *outbuf; irreversible = tds_sys_iconv(error_cd, &pquest_mark, &lquest_mark, outbuf, outbytesleft); if (irreversible == (size_t) - 1) break; if (!*inbytesleft) break; }end_loop: /* swap bytes if necessary */ if (io == to_server && conv->flags & TDS_ENCODING_SWAPBYTE) { assert((*outbuf - p) % 2 == 0); for (; p < *outbuf; p += 2) { char tmp = p[0]; p[0] = p[1]; p[1] = tmp; } } if (eilseq_raised && !suppress->eilseq) { /* invalid multibyte input sequence encountered */ if (io == to_client) { if (irreversible == (size_t) - 1) { tdserror(tds->tds_ctx, tds, TDSEICONV2BIG, 0); } else { tdserror(tds->tds_ctx, tds, TDSEICONVI, 0); errno = 0; } } else { tdserror(tds->tds_ctx, tds, TDSEICONVO, 0); } suppress->eilseq = 1; } switch (errno) { case EINVAL: /* incomplete multibyte sequence is encountered */ if (suppress->einval) break; /* in chunk conversion this can mean we end a chunk inside a character */ tdserror(tds->tds_ctx, tds, TDSEICONVAVAIL, 0); suppress->einval = 1; break; case E2BIG: /* output buffer has no more room */ if (suppress->e2big) break; tdserror(tds->tds_ctx, tds, TDSEICONVIU, 0); suppress->e2big = 1; break; default: break; } if (error_cd != invalid) { tds_sys_iconv_close(error_cd); } return irreversible;}/** * Read a data file, passing the data through iconv(). * \return Count of bytes either not read, or read but not converted. Returns zero on success. */size_ttds_iconv_fread(iconv_t cd, FILE * stream, size_t field_len, size_t term_len, char *outbuf, size_t * outbytesleft){#ifdef ENABLE_EXTRA_CHECKS char buffer[16];#else char buffer[16000];#endif char *ib; size_t isize = 0, nonreversible_conversions = 0; /* * If cd isn't valid, it's just an indication that this column needs no conversion. */ if (cd == (iconv_t) - 1) { assert(field_len <= *outbytesleft); if (field_len > 0) { if (1 != fread(outbuf, field_len, 1, stream)) { return field_len + term_len; /* unable to read */ } } /* prepare to read the terminator and return */ *outbytesleft -= field_len; /* as iconv would have done */ isize = 0; /* as iconv would have done */ field_len = 0; /* as the loop would have done */ goto READ_TERMINATOR; } /* * Read in chunks. * field_len is the total size to read * isize is the size of the current chunk (which might be the whole thing). * They are decremented as they are successfully processed. * On success, we exit the loop with both equal to zero, indicating nothing we * were asked to read remains unread. */ isize = (sizeof(buffer) < field_len) ? sizeof(buffer) : field_len; for (ib = buffer; isize && (isize = fread(ib, 1, isize, stream)) > 0;) { tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: read %u of %u bytes; outbuf has %u left.\n", (unsigned int) isize, (unsigned int) field_len, (unsigned int) *outbytesleft); field_len -= isize; isize += ib - buffer; ib = buffer; nonreversible_conversions += tds_sys_iconv(cd, (ICONV_CONST char **) &ib, &isize, &outbuf, outbytesleft); if (isize != 0) { memmove(buffer, ib, isize); switch (errno) { case EINVAL: /* incomplete multibyte sequence encountered in input */ break; case E2BIG: /* insufficient room in output buffer */ case EILSEQ: /* invalid multibyte sequence encountered in input */ default: /* FIXME: emit message */ tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: error %d: %s.\n", errno, strerror(errno)); break; } } ib = buffer + isize; isize = sizeof(buffer) - isize; if (isize > field_len) isize = field_len; } READ_TERMINATOR: if (term_len > 0 && !feof(stream)) { isize += term_len; if (term_len && 1 == fread(buffer, term_len, 1, stream)) { isize -= term_len; } else { tdsdump_log(TDS_DBG_FUNC, "tds_iconv_fread: cannot read %u-byte terminator\n", (unsigned int) term_len); } } return field_len + isize;}/** * Get a iconv info structure, allocate and initialize if needed */static TDSICONV *tds_iconv_get_info(TDSSOCKET * tds, const char *canonic_charset){ TDSICONV *info; int i; /* search a charset from already allocated charsets */ for (i = tds->char_conv_count; --i >= initial_char_conv_count;) if (strcmp(canonic_charset, tds->char_convs[i]->server_charset.name) == 0) return tds->char_convs[i]; /* allocate a new iconv structure */ if (tds->char_conv_count % CHUNK_ALLOC == ((initial_char_conv_count + 1) % CHUNK_ALLOC)) { TDSICONV **p; TDSICONV *infos; infos = (TDSICONV *) malloc(sizeof(TDSICONV) * CHUNK_ALLOC); if (!infos) return NULL; p = (TDSICONV **) realloc(tds->char_convs, sizeof(TDSICONV *) * (tds->char_conv_count + CHUNK_ALLOC)); if (!p) { free(infos); return NULL; } tds->char_convs = p; memset(infos, 0, sizeof(TDSICONV) * CHUNK_ALLOC); for (i = 0; i < CHUNK_ALLOC; ++i) { tds->char_convs[i + tds->char_conv_count] = &infos[i]; tds_iconv_reset(&infos[i]); } } info = tds->char_convs[tds->char_conv_count++]; /* init */ /* TODO test allocation */ tds_iconv_info_init(info, tds->char_convs[client2ucs2]->client_charset.name, canonic_charset); return info;}/* change singlebyte conversions according to server */voidtds_srv_charset_changed(TDSSOCKET * tds, const char *charset){#if HAVE_ICONV_ALWAYS TDSICONV *char_conv = tds->char_convs[client2server_chardata];
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?