📄 utf.c
字号:
### even if that complicates error handling in the routines below? */static voidput_xlate_handle_node(xlate_handle_node_t *node, const char *userdata_key, apr_pool_t *pool){ assert(node->next == NULL); if (!userdata_key) return; if (xlate_handle_hash) { xlate_handle_node_t **node_p;#if APR_HAS_THREADS if (apr_thread_mutex_lock(xlate_handle_mutex) != APR_SUCCESS) abort();#endif node_p = apr_hash_get(xlate_handle_hash, userdata_key, APR_HASH_KEY_STRING); if (node_p == NULL) { userdata_key = apr_pstrdup(apr_hash_pool_get(xlate_handle_hash), userdata_key); node_p = apr_palloc(apr_hash_pool_get(xlate_handle_hash), sizeof(*node_p)); *node_p = NULL; apr_hash_set(xlate_handle_hash, userdata_key, APR_HASH_KEY_STRING, node_p); } node->next = *node_p; *node_p = node;#if APR_HAS_THREADS if (apr_thread_mutex_unlock(xlate_handle_mutex) != APR_SUCCESS) abort();#endif } else { /* Store it in the per-pool cache. */ apr_pool_userdata_set(node, userdata_key, apr_pool_cleanup_null, pool); }}/* Return the apr_xlate handle for converting native characters to UTF-8. */static svn_error_t *get_ntou_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool){ return get_xlate_handle_node(ret, SVN_APR_UTF8_CHARSET, SVN_APR_LOCALE_CHARSET, SVN_UTF_NTOU_XLATE_HANDLE, pool);}/* Return the apr_xlate handle for converting UTF-8 to native characters. Create one if it doesn't exist. If unable to find a handle, or unable to create one because apr_xlate_open returned APR_EINVAL, then set *RET to null and return SVN_NO_ERROR; if fail for some other reason, return error. */static svn_error_t *get_uton_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool){ return get_xlate_handle_node(ret, SVN_APR_LOCALE_CHARSET, SVN_APR_UTF8_CHARSET, SVN_UTF_UTON_XLATE_HANDLE, pool);}/* Copy LEN bytes of SRC, converting non-ASCII and zero bytes to ?\nnn sequences, allocating the result in POOL. */static const char *fuzzy_escape(const char *src, apr_size_t len, apr_pool_t *pool){ const char *src_orig = src, *src_end = src + len; apr_size_t new_len = 0; char *new; const char *new_orig; /* First count how big a dest string we'll need. */ while (src < src_end) { if (! svn_ctype_isascii(*src) || *src == '\0') new_len += 5; /* 5 slots, for "?\XXX" */ else new_len += 1; /* one slot for the 7-bit char */ src++; } /* Allocate that amount. */ new = apr_palloc(pool, new_len + 1); new_orig = new; /* And fill it up. */ while (src_orig < src_end) { if (! svn_ctype_isascii(*src_orig) || src_orig == '\0') { /* This is the same format as svn_xml_fuzzy_escape uses, but that function escapes different characters. Please keep in sync! ### If we add another fuzzy escape somewhere, we should abstract ### this out to a common function. */ sprintf(new, "?\\%03u", (unsigned char) *src_orig); new += 5; } else { *new = *src_orig; new += 1; } src_orig++; } *new = '\0'; return new_orig;}/* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result in *DEST, which is allocated in POOL. */static svn_error_t *convert_to_stringbuf(xlate_handle_node_t *node, const char *src_data, apr_size_t src_length, svn_stringbuf_t **dest, apr_pool_t *pool){ apr_size_t buflen = src_length; apr_status_t apr_err; apr_size_t srclen = src_length; apr_size_t destlen = 0; char *destbuf; /* Initialize *DEST to an empty stringbuf. */ *dest = svn_stringbuf_create("", pool); destbuf = (*dest)->data; /* Not only does it not make sense to convert an empty string, but apr-iconv is quite unreasonable about not allowing that. */ if (src_length == 0) return SVN_NO_ERROR; do { /* A 1:2 ratio of input characters to output characters should be enough for most translations, and conveniently enough, if it isn't, we'll grow the buffer size by 2 again. */ if (destlen == 0) buflen *= 2; /* Ensure that *DEST has sufficient storage for the translated result. */ svn_stringbuf_ensure(*dest, buflen + 1); /* Update the destination buffer pointer to the first character after already-converted output. */ destbuf = (*dest)->data + (*dest)->len; /* Set up state variables for xlate. */ destlen = buflen - (*dest)->len; /* Attempt the conversion. */ apr_err = apr_xlate_conv_buffer(node->handle, src_data + (src_length - srclen), &srclen, destbuf, &destlen); /* Now, update the *DEST->len to track the amount of output data churned out so far from this loop. */ (*dest)->len += ((buflen - (*dest)->len) - destlen); } while (! apr_err && srclen); /* If we exited the loop with an error, return the error. */ if (apr_err) { const char *errstr; svn_error_t *err; /* Can't use svn_error_wrap_apr here because it calls functions in this file, leading to infinite recursion. */#ifndef AS400 if (node->frompage == SVN_APR_LOCALE_CHARSET) errstr = apr_psprintf (pool, _("Can't convert string from native encoding to '%s':"), node->topage); else if (node->topage == SVN_APR_LOCALE_CHARSET) errstr = apr_psprintf (pool, _("Can't convert string from '%s' to native encoding:"), node->frompage); else errstr = apr_psprintf (pool, _("Can't convert string from '%s' to '%s':"), node->frompage, node->topage);#else /* On OS400 V5R4 every possible node->topage and node->frompage * *really* is an int. */ errstr = apr_psprintf (pool, _("Can't convert string from CCSID '%i' to CCSID '%i'"), node->frompage, node->topage);#endif err = svn_error_create(apr_err, NULL, fuzzy_escape(src_data, src_length, pool)); return svn_error_create(apr_err, err, errstr); } /* Else, exited due to success. Trim the result buffer down to the right length. */ (*dest)->data[(*dest)->len] = '\0'; return SVN_NO_ERROR;}/* Return APR_EINVAL if the first LEN bytes of DATA contain anything other than seven-bit, non-control (except for whitespace) ASCII characters, finding the error pool from POOL. Otherwise, return SVN_NO_ERROR. */static svn_error_t *check_non_ascii(const char *data, apr_size_t len, apr_pool_t *pool){ const char *data_start = data; for (; len > 0; --len, data++) { if ((! apr_isascii(*data)) || ((! apr_isspace(*data)) && apr_iscntrl(*data))) { /* Show the printable part of the data, followed by the decimal code of the questionable character. Because if a user ever gets this error, she's going to have to spend time tracking down the non-ASCII data, so we want to help as much as possible. And yes, we just call the unsafe data "non-ASCII", even though the actual constraint is somewhat more complex than that. */ if (data - data_start) { const char *error_data = apr_pstrndup(pool, data_start, (data - data_start)); return svn_error_createf (APR_EINVAL, NULL, _("Safe data '%s' was followed by non-ASCII byte %d: " "unable to convert to/from UTF-8"), error_data, *((const unsigned char *) data)); } else { return svn_error_createf (APR_EINVAL, NULL, _("Non-ASCII character (code %d) detected, " "and unable to convert to/from UTF-8"), *((const unsigned char *) data)); } } } return SVN_NO_ERROR;}/* Construct an error with a suitable message to describe the invalid UTF-8 * sequence DATA of length LEN (which may have embedded NULLs). We can't * simply print the data, almost by definition we don't really know how it * is encoded. */static svn_error_t *invalid_utf8(const char *data, apr_size_t len, apr_pool_t *pool){ const char *last = svn_utf__last_valid(data, len); const char *valid_txt = "", *invalid_txt = ""; int i, valid, invalid; /* We will display at most 24 valid octets (this may split a leading multi-byte character) as that should fit on one 80 character line. */ valid = last - data; if (valid > 24) valid = 24; for (i = 0; i < valid; ++i) valid_txt = apr_pstrcat(pool, valid_txt, apr_psprintf(pool, " %02x", (unsigned char)last[i-valid]), NULL); /* 4 invalid octets will guarantee that the faulty octet is displayed */ invalid = data + len - last; if (invalid > 4) invalid = 4; for (i = 0; i < invalid; ++i) invalid_txt = apr_pstrcat(pool, invalid_txt, apr_psprintf(pool, " %02x", (unsigned char)last[i]), NULL); return svn_error_createf(APR_EINVAL, NULL, _("Valid UTF-8 data\n(hex:%s)\n" "followed by invalid UTF-8 sequence\n(hex:%s)"), valid_txt, invalid_txt);}/* Verify that the sequence DATA of length LEN is valid UTF-8 */static svn_error_t *check_utf8(const char *data, apr_size_t len, apr_pool_t *pool){ if (! svn_utf__is_valid(data, len)) return invalid_utf8(data, len, pool); return SVN_NO_ERROR;}/* Verify that the NULL terminated sequence DATA is valid UTF-8 */static svn_error_t *check_cstring_utf8(const char *data, apr_pool_t *pool){ if (! svn_utf__cstring_is_valid(data)) return invalid_utf8(data, strlen(data), pool); return SVN_NO_ERROR;}svn_error_t *svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, const svn_stringbuf_t *src, apr_pool_t *pool){ xlate_handle_node_t *node; svn_error_t *err; SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -