📄 utf.c
字号:
"... was followed by non-ASCII byte %d.\n"
"\n"
"Non-ASCII character detected (see above), "
"and unable to convert to/from UTF-8",
error_data, *((const unsigned char *) data));
}
else
{
return svn_error_createf
(APR_EINVAL, NULL,
"Non-ASCII character (code %d) detected, "
"and unable to convert to/from UTF-8",
*((const unsigned char *) data));
}
}
}
return SVN_NO_ERROR;
}
/* Construct an error with a suitable message to describe the invalid UTF-8
* sequence DATA of length LEN (which may have embedded NULLs). We can't
* simply print the data, almost by definition we don't really know how it
* is encoded.
*/
static svn_error_t *
invalid_utf8 (const char *data, apr_size_t len, apr_pool_t *pool)
{
const char *last = svn_utf__last_valid (data, len);
const char *msg = "Valid UTF-8 data\n(hex:";
int i, valid, invalid;
/* We will display at most 24 valid octets (this may split a leading
multi-byte character) as that should fit on one 80 character line. */
valid = last - data;
if (valid > 24)
valid = 24;
for (i = 0; i < valid; ++i)
msg = apr_pstrcat (pool, msg, apr_psprintf (pool, " %02x",
(unsigned char)last[i-valid]),
NULL);
msg = apr_pstrcat (pool, msg,
")\nfollowed by invalid UTF-8 sequence\n(hex:", NULL);
/* 4 invalid octets will guarantee that the faulty octet is displayed */
invalid = data + len - last;
if (invalid > 4)
invalid = 4;
for (i = 0; i < invalid; ++i)
msg = apr_pstrcat (pool, msg, apr_psprintf (pool, " %02x",
(unsigned char)last[i]), NULL);
msg = apr_pstrcat (pool, msg, ")", NULL);
return svn_error_create (APR_EINVAL, NULL, msg);
}
/* Verify that the sequence DATA of length LEN is valid UTF-8 */
static svn_error_t *
check_utf8 (const char *data, apr_size_t len, apr_pool_t *pool)
{
if (! svn_utf__is_valid (data, len))
return invalid_utf8 (data, len, pool);
return SVN_NO_ERROR;
}
/* Verify that the NULL terminated sequence DATA is valid UTF-8 */
static svn_error_t *
check_cstring_utf8 (const char *data, apr_pool_t *pool)
{
if (! svn_utf__cstring_is_valid (data))
return invalid_utf8 (data, strlen (data), pool);
return SVN_NO_ERROR;
}
svn_error_t *
svn_utf_stringbuf_to_utf8 (svn_stringbuf_t **dest,
const svn_stringbuf_t *src,
apr_pool_t *pool)
{
xlate_handle_node_t *node;
svn_error_t *err;
SVN_ERR (get_ntou_xlate_handle_node (&node, pool));
if (node->handle)
{
err = convert_to_stringbuf (node->handle, src->data, src->len, dest,
pool);
put_xlate_handle_node (node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
SVN_ERR (err);
return check_utf8 ((*dest)->data, (*dest)->len, pool);
}
else
{
SVN_ERR (check_non_ascii (src->data, src->len, pool));
*dest = svn_stringbuf_dup (src, pool);
return SVN_NO_ERROR;
}
}
svn_error_t *
svn_utf_string_to_utf8 (const svn_string_t **dest,
const svn_string_t *src,
apr_pool_t *pool)
{
svn_stringbuf_t *destbuf;
xlate_handle_node_t *node;
svn_error_t *err;
SVN_ERR (get_ntou_xlate_handle_node (&node, pool));
if (node->handle)
{
err = convert_to_stringbuf (node->handle, src->data, src->len,
&destbuf, pool);
put_xlate_handle_node (node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
SVN_ERR (err);
SVN_ERR (check_utf8 (destbuf->data, destbuf->len, pool));
*dest = svn_string_create_from_buf (destbuf, pool);
}
else
{
SVN_ERR (check_non_ascii (src->data, src->len, pool));
*dest = svn_string_dup (src, pool);
}
return SVN_NO_ERROR;
}
/* Common implementation for svn_utf_cstring_to_utf8,
svn_utf_cstring_to_utf8_ex, svn_utf_cstring_from_utf8 and
svn_utf_cstring_from_utf8_ex. Convert SRC to DEST using CONVSET as
the translator and allocating from POOL. */
static svn_error_t *
convert_cstring (const char **dest,
const char *src,
apr_xlate_t *convset,
apr_pool_t *pool)
{
if (convset)
{
svn_stringbuf_t *destbuf;
SVN_ERR (convert_to_stringbuf (convset, src, strlen (src),
&destbuf, pool));
*dest = destbuf->data;
}
else
{
apr_size_t len = strlen (src);
SVN_ERR (check_non_ascii (src, len, pool));
*dest = apr_pstrmemdup (pool, src, len);
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_utf_cstring_to_utf8 (const char **dest,
const char *src,
apr_pool_t *pool)
{
xlate_handle_node_t *node;
svn_error_t *err;
SVN_ERR (get_ntou_xlate_handle_node (&node, pool));
err = convert_cstring (dest, src, node->handle, pool);
put_xlate_handle_node (node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
SVN_ERR (err);
SVN_ERR (check_cstring_utf8 (*dest, pool));
return SVN_NO_ERROR;
}
svn_error_t *
svn_utf_cstring_to_utf8_ex (const char **dest,
const char *src,
const char *frompage,
const char *convset_key,
apr_pool_t *pool)
{
xlate_handle_node_t *node;
svn_error_t *err;
SVN_ERR (get_xlate_handle_node (&node, "UTF-8", frompage, convset_key, pool));
err = convert_cstring (dest, src, node->handle, pool);
put_xlate_handle_node (node, convset_key, pool);
SVN_ERR (err);
SVN_ERR (check_cstring_utf8 (*dest, pool));
return SVN_NO_ERROR;
}
svn_error_t *
svn_utf_stringbuf_from_utf8 (svn_stringbuf_t **dest,
const svn_stringbuf_t *src,
apr_pool_t *pool)
{
xlate_handle_node_t *node;
svn_error_t *err;
SVN_ERR (get_uton_xlate_handle_node (&node, pool));
if (node->handle)
{
err = convert_to_stringbuf (node->handle, src->data, src->len, dest, pool);
put_xlate_handle_node (node, SVN_UTF_UTON_XLATE_HANDLE, pool);
return err;
}
else
{
SVN_ERR (check_non_ascii (src->data, src->len, pool));
*dest = svn_stringbuf_dup (src, pool);
return SVN_NO_ERROR;
}
}
svn_error_t *
svn_utf_string_from_utf8 (const svn_string_t **dest,
const svn_string_t *src,
apr_pool_t *pool)
{
svn_stringbuf_t *dbuf;
xlate_handle_node_t *node;
svn_error_t *err;
SVN_ERR (get_uton_xlate_handle_node (&node, pool));
if (node->handle)
{
err = convert_to_stringbuf (node->handle, src->data, src->len,
&dbuf, pool);
put_xlate_handle_node (node, SVN_UTF_UTON_XLATE_HANDLE, pool);
SVN_ERR (err);
*dest = svn_string_create_from_buf (dbuf, pool);
}
else
{
SVN_ERR (check_non_ascii (src->data, src->len, pool));
*dest = svn_string_dup (src, pool);
}
return SVN_NO_ERROR;
}
svn_error_t *
svn_utf_cstring_from_utf8 (const char **dest,
const char *src,
apr_pool_t *pool)
{
xlate_handle_node_t *node;
svn_error_t *err;
SVN_ERR (get_uton_xlate_handle_node (&node, pool));
err = convert_cstring (dest, src, node->handle, pool);
put_xlate_handle_node (node, SVN_UTF_UTON_XLATE_HANDLE, pool);
SVN_ERR (err);
return SVN_NO_ERROR;
}
svn_error_t *
svn_utf_cstring_from_utf8_ex (const char **dest,
const char *src,
const char *topage,
const char *convset_key,
apr_pool_t *pool)
{
xlate_handle_node_t *node;
svn_error_t *err;
SVN_ERR (get_xlate_handle_node (&node, topage, "UTF-8", convset_key, pool));
err = convert_cstring (dest, src, node->handle, pool);
put_xlate_handle_node (node, convset_key, pool);
return err;
}
const char *
svn_utf__cstring_from_utf8_fuzzy (const char *src,
apr_pool_t *pool,
svn_error_t *(*convert_from_utf8)
(const char **, const char *, apr_pool_t *))
{
const char *src_orig = src;
apr_size_t new_len = 0;
char *new;
const char *new_orig;
svn_error_t *err;
/* First count how big a dest string we'll need. */
while (*src)
{
if (! apr_isascii (*src))
new_len += 5; /* 5 slots, for "?\XXX" */
else
new_len += 1; /* one slot for the 7-bit char */
src++;
}
/* Allocate that amount. */
new = apr_palloc (pool, new_len + 1);
new_orig = new;
/* And fill it up. */
while (*src_orig)
{
if (! apr_isascii (*src_orig))
{
sprintf (new, "?\\%03u", (unsigned char) *src_orig);
new += 5;
}
else
{
*new = *src_orig;
new += 1;
}
src_orig++;
}
*new = '\0';
/* Okay, now we have a *new* UTF-8 string, one that's guaranteed to
contain only 7-bit bytes :-). Recode to native... */
err = convert_from_utf8 (((const char **) &new), new_orig, pool);
if (err)
{
svn_error_clear (err);
return new_orig;
}
else
return new;
/* ### Check the client locale, maybe we can avoid that second
* conversion! See Ulrich Drepper's patch at
* http://subversion.tigris.org/issues/show_bug.cgi?id=807.
*/
}
const char *
svn_utf_cstring_from_utf8_fuzzy (const char *src,
apr_pool_t *pool)
{
return svn_utf__cstring_from_utf8_fuzzy (src, pool,
svn_utf_cstring_from_utf8);
}
svn_error_t *
svn_utf_cstring_from_utf8_stringbuf (const char **dest,
const svn_stringbuf_t *src,
apr_pool_t *pool)
{
svn_stringbuf_t *destbuf;
SVN_ERR (svn_utf_stringbuf_from_utf8 (&destbuf, src, pool));
*dest = destbuf->data;
return SVN_NO_ERROR;
}
svn_error_t *
svn_utf_cstring_from_utf8_string (const char **dest,
const svn_string_t *src,
apr_pool_t *pool)
{
svn_stringbuf_t *dbuf;
xlate_handle_node_t *node;
svn_error_t *err;
SVN_ERR (get_uton_xlate_handle_node (&node, pool));
if (node->handle)
{
err = convert_to_stringbuf (node->handle, src->data, src->len,
&dbuf, pool);
put_xlate_handle_node (node, SVN_UTF_UTON_XLATE_HANDLE, pool);
SVN_ERR (err);
*dest = dbuf->data;
return SVN_NO_ERROR;
}
else
{
SVN_ERR (check_non_ascii (src->data, src->len, pool));
*dest = apr_pstrmemdup (pool, src->data, src->len);
return SVN_NO_ERROR;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -