📄 utf.c
字号:
/*
* utf.c: UTF-8 conversion routines
*
* ====================================================================
* Copyright (c) 2000-2004 CollabNet. All rights reserved.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at http://subversion.tigris.org/license-1.html.
* If newer versions of this license are posted there, you may use a
* newer version instead, at your option.
*
* This software consists of voluntary contributions made by many
* individuals. For exact contribution history, see the revision
* history and logs, available at http://subversion.tigris.org/.
* ====================================================================
*/
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include <apr_strings.h>
#include <apr_lib.h>
#include <apr_xlate.h>
#include <apr_thread_proc.h>
#include "svn_string.h"
#include "svn_error.h"
#include "svn_pools.h"
#include "svn_utf.h"
#include "utf_impl.h"
#define SVN_UTF_NTOU_XLATE_HANDLE "svn-utf-ntou-xlate-handle"
#define SVN_UTF_UTON_XLATE_HANDLE "svn-utf-uton-xlate-handle"
#if APR_HAS_THREADS
static apr_thread_mutex_t *xlate_handle_mutex = NULL;
#endif
/* The xlate handle cache is a global hash table with linked lists of xlate
* handles. In multi-threaded environments, a thread "borrows" an xlate
* handle from the cache during a translation and puts it back afterwards.
* This avoids holding a global lock for all translations.
* If there is no handle for a particular key when needed, a new is
* handle is created and put in the cache after use.
* This means that there will be at most N handles open for a key, where N
* is the number of simultanous handles in use for that key. */
typedef struct xlate_handle_node_t {
apr_xlate_t *handle;
struct xlate_handle_node_t *next;
} xlate_handle_node_t;
/* This maps userdata_key strings to pointers to pointers to the first entry
in the linked list of xlate handles.
We don't store the pointer to the list head directly in the hash table,
since we remove/insert entries at the head in the list in the code below,
and we can't use apr_hash_set() in each character translation because that
function allocates memory in each call where the value is non-NULL.
Since these allocations take place in a global pool, this would be a
memory leak. */
static apr_hash_t *xlate_handle_hash = NULL;
/* Clean up the xlate handle cache. */
static apr_status_t
xlate_cleanup (void *arg)
{
/* We set the cache variables to NULL so that translation works in other
cleanup functions, even if it isn't cached then. */
#if APR_HAS_THREADS
apr_thread_mutex_destroy (xlate_handle_mutex);
xlate_handle_mutex = NULL;
#endif
xlate_handle_hash = NULL;
return APR_SUCCESS;
}
/* Set the handle of ARG to NULL. */
static apr_status_t
xlate_handle_node_cleanup (void *arg)
{
xlate_handle_node_t *node = arg;
node->handle = NULL;
return APR_SUCCESS;
}
void
svn_utf_initialize (apr_pool_t *pool)
{
apr_pool_t *subpool;
#if APR_HAS_THREADS
apr_thread_mutex_t *mutex;
#endif
if (!xlate_handle_hash)
{
/* We create our own subpool, which we protect with the mutex.
We can't use the pool passed to us by the caller, since we will
use it for xlate handle allocations, possibly in multiple threads,
and pool allocation is not thread-safe. */
subpool = svn_pool_create (pool);
#if APR_HAS_THREADS
if (apr_thread_mutex_create (&mutex, APR_THREAD_MUTEX_DEFAULT, subpool)
== APR_SUCCESS)
xlate_handle_mutex = mutex;
else
return;
#endif
xlate_handle_hash = apr_hash_make (subpool);
apr_pool_cleanup_register (subpool, NULL, xlate_cleanup,
apr_pool_cleanup_null);
}
}
/* Return an apr_xlate handle for converting from FROMPAGE to
TOPAGE. Create one if it doesn't exist in USERDATA_KEY. If
unable to find a handle, or unable to create one because
apr_xlate_open returned APR_EINVAL, then set *RET to null and
return SVN_NO_ERROR; if fail for some other reason, return
error. */
static svn_error_t *
get_xlate_handle_node (xlate_handle_node_t **ret,
const char *topage, const char *frompage,
const char *userdata_key, apr_pool_t *pool)
{
xlate_handle_node_t **old_handle_p;
xlate_handle_node_t *old_handle = NULL;
apr_status_t apr_err;
/* If we already have a handle, just return it. */
if (userdata_key)
{
if (xlate_handle_hash)
{
#if APR_HAS_THREADS
apr_err = apr_thread_mutex_lock (xlate_handle_mutex);
if (apr_err != APR_SUCCESS)
return svn_error_create (apr_err, NULL,
"Can't lock charset translation "
"mutex");
#endif
old_handle_p = apr_hash_get (xlate_handle_hash, userdata_key,
APR_HASH_KEY_STRING);
if (old_handle_p)
old_handle = *old_handle_p;
if (old_handle)
{
/* Ensure that the handle is still valid. */
if (old_handle->handle)
{
/* Remove from the list. */
*old_handle_p = old_handle->next;
old_handle->next = NULL;
#if APR_HAS_THREADS
apr_err = apr_thread_mutex_unlock (xlate_handle_mutex);
if (apr_err != APR_SUCCESS)
return svn_error_create (apr_err, NULL,
"Can't unlock charset "
"translation mutex");
#endif
*ret = old_handle;
return SVN_NO_ERROR;
}
}
}
else
{
void *p;
/* We fall back on a per-pool cache instead. */
apr_pool_userdata_get (&p, userdata_key, pool);
old_handle = p;
/* Ensure that the handle is still valid. */
if (old_handle && old_handle->handle)
{
*ret = old_handle;
return SVN_NO_ERROR;
}
}
}
/* Note that we still have the mutex locked (if it is initialized), so we
can use the global pool for creating the new xlate handle. */
/* Use the correct pool for creating the handle. */
if (userdata_key && xlate_handle_hash)
pool = apr_hash_pool_get (xlate_handle_hash);
/* Try to create a handle. */
*ret = apr_palloc (pool, sizeof(xlate_handle_node_t));
apr_err = apr_xlate_open (&(**ret).handle, topage, frompage, pool);
(**ret).next = NULL;
/* If we are called from inside a pool cleanup handler, the just created
xlate handle will be closed when that handler returns by a newly
registered cleanup handler, however, the handle is still cached by us.
To prevent this, we register a cleanup handler that will reset our
handle, so we don't use an invalid one. */
apr_pool_cleanup_register (pool, *ret, xlate_handle_node_cleanup,
apr_pool_cleanup_null);
/* Don't need the lock anymore. */
#if APR_HAS_THREADS
if (userdata_key && xlate_handle_hash)
{
apr_status_t unlock_err = apr_thread_mutex_unlock (xlate_handle_mutex);
if (unlock_err != APR_SUCCESS)
return svn_error_create (unlock_err, NULL,
"Can't unlock charset translation "
"mutex");
}
#endif
if (APR_STATUS_IS_EINVAL (apr_err) || APR_STATUS_IS_ENOTIMPL (apr_err))
{
(*ret)->handle = NULL;
return SVN_NO_ERROR;
}
if (apr_err != APR_SUCCESS)
/* Can't use svn_error_wrap_apr here because it calls functions in
this file, leading to infinite recursion. */
return svn_error_createf
(apr_err, NULL, "Can't create a converter from '%s' to '%s'",
(topage == APR_LOCALE_CHARSET ? "native" : topage),
(frompage == APR_LOCALE_CHARSET ? "native" : frompage));
return SVN_NO_ERROR;
}
/* Put back NODE into the xlate handle cache for use by other calls.
If there is no global cache, store the handle in POOL.
Ignore errors related to locking/unlocking the mutex.
### Mutex errors here are very weird. Should we handle them "correctly"
### even if that complicates error handling in the routines below? */
static void
put_xlate_handle_node (xlate_handle_node_t *node,
const char *userdata_key,
apr_pool_t *pool)
{
assert (node->next == NULL);
if (!userdata_key)
return;
if (xlate_handle_hash)
{
xlate_handle_node_t **node_p;
#if APR_HAS_THREADS
if (apr_thread_mutex_lock (xlate_handle_mutex) != APR_SUCCESS)
abort ();
#endif
node_p = apr_hash_get (xlate_handle_hash, userdata_key,
APR_HASH_KEY_STRING);
if (node_p == NULL)
{
node_p = apr_palloc (apr_hash_pool_get (xlate_handle_hash),
sizeof (*node_p));
*node_p = NULL;
apr_hash_set (xlate_handle_hash, userdata_key,
APR_HASH_KEY_STRING, node_p);
}
node->next = *node_p;
*node_p = node;
#if APR_HAS_THREADS
if (apr_thread_mutex_unlock (xlate_handle_mutex) != APR_SUCCESS)
abort ();
#endif
}
else
{
/* Store it in the per-pool cache. */
apr_pool_userdata_set (node, userdata_key, apr_pool_cleanup_null, pool);
}
}
/* Return the apr_xlate handle for converting native characters to UTF-8. */
static svn_error_t *
get_ntou_xlate_handle_node (xlate_handle_node_t **ret, apr_pool_t *pool)
{
return get_xlate_handle_node (ret, "UTF-8", APR_LOCALE_CHARSET,
SVN_UTF_NTOU_XLATE_HANDLE, pool);
}
/* Return the apr_xlate handle for converting UTF-8 to native characters.
Create one if it doesn't exist. If unable to find a handle, or
unable to create one because apr_xlate_open returned APR_EINVAL, then
set *RET to null and return SVN_NO_ERROR; if fail for some other
reason, return error. */
static svn_error_t *
get_uton_xlate_handle_node (xlate_handle_node_t **ret, apr_pool_t *pool)
{
return get_xlate_handle_node (ret, APR_LOCALE_CHARSET, "UTF-8",
SVN_UTF_UTON_XLATE_HANDLE, pool);
}
/* Convert SRC_LENGTH bytes of SRC_DATA in CONVSET, store the result
in *DEST, which is allocated in POOL. */
static svn_error_t *
convert_to_stringbuf (apr_xlate_t *convset,
const char *src_data,
apr_size_t src_length,
svn_stringbuf_t **dest,
apr_pool_t *pool)
{
apr_size_t buflen = src_length;
apr_status_t apr_err;
apr_size_t srclen = src_length;
apr_size_t destlen = 0;
char *destbuf;
/* Initialize *DEST to an empty stringbuf. */
*dest = svn_stringbuf_create ("", pool);
destbuf = (*dest)->data;
/* Not only does it not make sense to convert an empty string, but
apr-iconv is quite unreasonable about not allowing that. */
if (src_length == 0)
return SVN_NO_ERROR;
do
{
/* A 1:2 ratio of input characters to output characters should
be enough for most translations, and conveniently enough, if
it isn't, we'll grow the buffer size by 2 again. */
if (destlen == 0)
buflen *= 2;
/* Ensure that *DEST has sufficient storage for the translated
result. */
svn_stringbuf_ensure (*dest, buflen + 1);
/* Update the destination buffer pointer to the first character
after already-converted output. */
destbuf = (*dest)->data + (*dest)->len;
/* Set up state variables for xlate. */
destlen = buflen - (*dest)->len;
/* Attempt the conversion. */
apr_err = apr_xlate_conv_buffer (convset,
src_data + (src_length - srclen),
&srclen,
destbuf,
&destlen);
/* Now, update the *DEST->len to track the amount of output data
churned out so far from this loop. */
(*dest)->len += ((buflen - (*dest)->len) - destlen);
} while (! apr_err && srclen);
/* If we exited the loop with an error, return the error. */
if (apr_err)
/* Can't use svn_error_wrap_apr here because it calls functions in
this file, leading to infinite recursion. */
return svn_error_create (apr_err, NULL, "Can't recode string");
/* Else, exited due to success. Trim the result buffer down to the
right length. */
(*dest)->data[(*dest)->len] = '\0';
return SVN_NO_ERROR;
}
/* Return APR_EINVAL if the first LEN bytes of DATA contain anything
other than seven-bit, non-control (except for whitespace) ASCII
characters, finding the error pool from POOL. Otherwise, return
SVN_NO_ERROR. */
static svn_error_t *
check_non_ascii (const char *data, apr_size_t len, apr_pool_t *pool)
{
const char *data_start = data;
for (; len > 0; --len, data++)
{
if ((! apr_isascii (*data))
|| ((! apr_isspace (*data))
&& apr_iscntrl (*data)))
{
/* Show the printable part of the data, followed by the
decimal code of the questionable character. Because if a
user ever gets this error, she's going to have to spend
time tracking down the non-ASCII data, so we want to help
as much as possible. And yes, we just call the unsafe
data "non-ASCII", even though the actual constraint is
somewhat more complex than that. */
if (data - data_start)
{
const char *error_data
= apr_pstrndup (pool, data_start, (data - data_start));
return svn_error_createf
(APR_EINVAL, NULL,
"Safe data:\n"
"\"%s\"\n"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -