utf.c
来自「subversion-1.4.5.tar.gz 配置svn的源码」· C语言 代码 · 共 992 行 · 第 1/3 页
C
992 行
put_xlate_handle_node(xlate_handle_node_t *node, const char *userdata_key, apr_pool_t *pool){ assert(node->next == NULL); if (!userdata_key) return; if (xlate_handle_hash) { xlate_handle_node_t **node_p;#if APR_HAS_THREADS if (apr_thread_mutex_lock(xlate_handle_mutex) != APR_SUCCESS) abort();#endif node_p = apr_hash_get(xlate_handle_hash, userdata_key, APR_HASH_KEY_STRING); if (node_p == NULL) { userdata_key = apr_pstrdup(apr_hash_pool_get(xlate_handle_hash), userdata_key); node_p = apr_palloc(apr_hash_pool_get(xlate_handle_hash), sizeof(*node_p)); *node_p = NULL; apr_hash_set(xlate_handle_hash, userdata_key, APR_HASH_KEY_STRING, node_p); } node->next = *node_p; *node_p = node;#if APR_HAS_THREADS if (apr_thread_mutex_unlock(xlate_handle_mutex) != APR_SUCCESS) abort();#endif } else { /* Store it in the per-pool cache. */ apr_pool_userdata_set(node, userdata_key, apr_pool_cleanup_null, pool); }}/* Return the apr_xlate handle for converting native characters to UTF-8. */static svn_error_t *get_ntou_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool){ return get_xlate_handle_node(ret, SVN_APR_UTF8_CHARSET, SVN_APR_LOCALE_CHARSET, SVN_UTF_NTOU_XLATE_HANDLE, pool);}/* Return the apr_xlate handle for converting UTF-8 to native characters. Create one if it doesn't exist. If unable to find a handle, or unable to create one because apr_xlate_open returned APR_EINVAL, then set *RET to null and return SVN_NO_ERROR; if fail for some other reason, return error. */static svn_error_t *get_uton_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool){ return get_xlate_handle_node(ret, SVN_APR_LOCALE_CHARSET, SVN_APR_UTF8_CHARSET, SVN_UTF_UTON_XLATE_HANDLE, pool);}/* Copy LEN bytes of SRC, converting non-ASCII and zero bytes to ?\nnn sequences, allocating the result in POOL. */static const char *fuzzy_escape(const char *src, apr_size_t len, apr_pool_t *pool){ const char *src_orig = src, *src_end = src + len; apr_size_t new_len = 0; char *new; const char *new_orig; /* First count how big a dest string we'll need. */ while (src < src_end) { if (! svn_ctype_isascii(*src) || *src == '\0') new_len += 5; /* 5 slots, for "?\XXX" */ else new_len += 1; /* one slot for the 7-bit char */ src++; } /* Allocate that amount. */ new = apr_palloc(pool, new_len + 1); new_orig = new; /* And fill it up. */ while (src_orig < src_end) { if (! svn_ctype_isascii(*src_orig) || src_orig == '\0') { /* This is the same format as svn_xml_fuzzy_escape uses, but that function escapes different characters. Please keep in sync! ### If we add another fuzzy escape somewhere, we should abstract ### this out to a common function. */ sprintf(new, "?\\%03u", (unsigned char) *src_orig); new += 5; } else { *new = *src_orig; new += 1; } src_orig++; } *new = '\0'; return new_orig;}/* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result in *DEST, which is allocated in POOL. */static svn_error_t *convert_to_stringbuf(xlate_handle_node_t *node, const char *src_data, apr_size_t src_length, svn_stringbuf_t **dest, apr_pool_t *pool){ apr_size_t buflen = src_length * 2; apr_status_t apr_err; apr_size_t srclen = src_length; apr_size_t destlen = buflen; char *destbuf; /* Initialize *DEST to an empty stringbuf. */ *dest = svn_stringbuf_create("", pool); destbuf = (*dest)->data; /* Not only does it not make sense to convert an empty string, but apr-iconv is quite unreasonable about not allowing that. */ if (src_length == 0) return SVN_NO_ERROR; do { /* A 1:2 ratio of input bytes to output bytes (as assigned above) should be enough for most translations, and if it turns out not to be enough, we'll grow the buffer again, sizing it based on a 1:3 ratio of the remainder of the string. We also want to ensure that the output buffer always has at least 3 bytes spare so that we always have room to convert at least one character (we assume that no encoding uses more than three bytes for a character) */ if (destlen < 3) buflen += (srclen * 3); /* Ensure that *DEST has sufficient storage for the translated result. */ svn_stringbuf_ensure(*dest, buflen + 1); /* Update the destination buffer pointer to the first character after already-converted output. */ destbuf = (*dest)->data + (*dest)->len; /* Set up state variables for xlate. */ destlen = buflen - (*dest)->len; assert(destlen >= 3); /* Attempt the conversion. */ apr_err = apr_xlate_conv_buffer(node->handle, src_data + (src_length - srclen), &srclen, destbuf, &destlen); /* Now, update the *DEST->len to track the amount of output data churned out so far from this loop. */ (*dest)->len += ((buflen - (*dest)->len) - destlen); } while (! apr_err && srclen); /* If we exited the loop with an error, return the error. */ if (apr_err) { const char *errstr; svn_error_t *err; /* Can't use svn_error_wrap_apr here because it calls functions in this file, leading to infinite recursion. */#ifndef AS400 if (node->frompage == SVN_APR_LOCALE_CHARSET) errstr = apr_psprintf (pool, _("Can't convert string from native encoding to '%s':"), node->topage); else if (node->topage == SVN_APR_LOCALE_CHARSET) errstr = apr_psprintf (pool, _("Can't convert string from '%s' to native encoding:"), node->frompage); else errstr = apr_psprintf (pool, _("Can't convert string from '%s' to '%s':"), node->frompage, node->topage);#else /* On OS400 V5R4 every possible node->topage and node->frompage * *really* is an int. */ errstr = apr_psprintf (pool, _("Can't convert string from CCSID '%i' to CCSID '%i'"), node->frompage, node->topage);#endif err = svn_error_create(apr_err, NULL, fuzzy_escape(src_data, src_length, pool)); return svn_error_create(apr_err, err, errstr); } /* Else, exited due to success. Trim the result buffer down to the right length. */ (*dest)->data[(*dest)->len] = '\0'; return SVN_NO_ERROR;}/* Return APR_EINVAL if the first LEN bytes of DATA contain anything other than seven-bit, non-control (except for whitespace) ASCII characters, finding the error pool from POOL. Otherwise, return SVN_NO_ERROR. */static svn_error_t *check_non_ascii(const char *data, apr_size_t len, apr_pool_t *pool){ const char *data_start = data; for (; len > 0; --len, data++) { if ((! apr_isascii(*data)) || ((! apr_isspace(*data)) && apr_iscntrl(*data))) { /* Show the printable part of the data, followed by the decimal code of the questionable character. Because if a user ever gets this error, she's going to have to spend time tracking down the non-ASCII data, so we want to help as much as possible. And yes, we just call the unsafe data "non-ASCII", even though the actual constraint is somewhat more complex than that. */ if (data - data_start) { const char *error_data = apr_pstrndup(pool, data_start, (data - data_start)); return svn_error_createf (APR_EINVAL, NULL, _("Safe data '%s' was followed by non-ASCII byte %d: " "unable to convert to/from UTF-8"), error_data, *((const unsigned char *) data)); } else { return svn_error_createf (APR_EINVAL, NULL, _("Non-ASCII character (code %d) detected, " "and unable to convert to/from UTF-8"), *((const unsigned char *) data)); } } } return SVN_NO_ERROR;}/* Construct an error with a suitable message to describe the invalid UTF-8 * sequence DATA of length LEN (which may have embedded NULLs). We can't * simply print the data, almost by definition we don't really know how it * is encoded. */static svn_error_t *invalid_utf8(const char *data, apr_size_t len, apr_pool_t *pool){ const char *last = svn_utf__last_valid(data, len); const char *valid_txt = "", *invalid_txt = ""; int i, valid, invalid; /* We will display at most 24 valid octets (this may split a leading multi-byte character) as that should fit on one 80 character line. */ valid = last - data; if (valid > 24) valid = 24; for (i = 0; i < valid; ++i) valid_txt = apr_pstrcat(pool, valid_txt, apr_psprintf(pool, " %02x", (unsigned char)last[i-valid]), NULL); /* 4 invalid octets will guarantee that the faulty octet is displayed */ invalid = data + len - last; if (invalid > 4) invalid = 4; for (i = 0; i < invalid; ++i) invalid_txt = apr_pstrcat(pool, invalid_txt, apr_psprintf(pool, " %02x", (unsigned char)last[i]), NULL); return svn_error_createf(APR_EINVAL, NULL, _("Valid UTF-8 data\n(hex:%s)\n" "followed by invalid UTF-8 sequence\n(hex:%s)"), valid_txt, invalid_txt);}/* Verify that the sequence DATA of length LEN is valid UTF-8 */static svn_error_t *check_utf8(const char *data, apr_size_t len, apr_pool_t *pool){ if (! svn_utf__is_valid(data, len)) return invalid_utf8(data, len, pool); return SVN_NO_ERROR;}/* Verify that the NULL terminated sequence DATA is valid UTF-8 */static svn_error_t *check_cstring_utf8(const char *data, apr_pool_t *pool){ if (! svn_utf__cstring_is_valid(data)) return invalid_utf8(data, strlen(data), pool); return SVN_NO_ERROR;}svn_error_t *svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, const svn_stringbuf_t *src, apr_pool_t *pool){ xlate_handle_node_t *node; svn_error_t *err;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?