首页 › 资源下载 › 网络 › linux subdivision › 源码查看
utf.c

来自「linux subdivision ying gai ke yi le ba」· C语言代码 · 共 808 行 · 第 1/2 页
808 行
/*
 * utf.c:  UTF-8 conversion routines
 *
 * ====================================================================
 * Copyright (c) 2000-2004 CollabNet.  All rights reserved.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution.  The terms
 * are also available at http://subversion.tigris.org/license-1.html.
 * If newer versions of this license are posted there, you may use a
 * newer version instead, at your option.
 *
 * This software consists of voluntary contributions made by many
 * individuals.  For exact contribution history, see the revision
 * history and logs, available at http://subversion.tigris.org/.
 * ====================================================================
 */



#include <string.h>
#include <ctype.h>
#include <assert.h>

#include <apr_strings.h>
#include <apr_lib.h>
#include <apr_xlate.h>
#include <apr_thread_proc.h>

#include "svn_string.h"
#include "svn_error.h"
#include "svn_pools.h"
#include "svn_utf.h"
#include "utf_impl.h"



#define SVN_UTF_NTOU_XLATE_HANDLE "svn-utf-ntou-xlate-handle"
#define SVN_UTF_UTON_XLATE_HANDLE "svn-utf-uton-xlate-handle"

#if APR_HAS_THREADS
static apr_thread_mutex_t *xlate_handle_mutex = NULL;
#endif

/* The xlate handle cache is a global hash table with linked lists of xlate
 * handles.  In multi-threaded environments, a thread "borrows" an xlate
 * handle from the cache during a translation and puts it back afterwards.
 * This avoids holding a global lock for all translations.
 * If there is no handle for a particular key when needed, a new is
 * handle is created and put in the cache after use.
 * This means that there will be at most N handles open for a key, where N
 * is the number of simultanous handles in use for that key. */

typedef struct xlate_handle_node_t {
  apr_xlate_t *handle;
  struct xlate_handle_node_t *next;
} xlate_handle_node_t;

/* This maps userdata_key strings to pointers to pointers to the first entry
   in the linked list of xlate handles.
   We don't store the pointer to the list head directly in the hash table,
   since we remove/insert entries at the head in the list in the code below,
   and we can't use apr_hash_set() in each character translation because that
   function allocates memory in each call where the value is non-NULL.
   Since these allocations take place in a global pool, this would be a
   memory leak. */
static apr_hash_t *xlate_handle_hash = NULL;

/* Clean up the xlate handle cache. */
static apr_status_t
xlate_cleanup (void *arg)
{
  /* We set the cache variables to NULL so that translation works in other
     cleanup functions, even if it isn't cached then. */
#if APR_HAS_THREADS
  apr_thread_mutex_destroy (xlate_handle_mutex);
  xlate_handle_mutex = NULL;
#endif
  xlate_handle_hash = NULL;

  return APR_SUCCESS;
}

/* Set the handle of ARG to NULL. */
static apr_status_t
xlate_handle_node_cleanup (void *arg)
{
  xlate_handle_node_t *node = arg;

  node->handle = NULL;
  return APR_SUCCESS;
}

void
svn_utf_initialize (apr_pool_t *pool)
{
  apr_pool_t *subpool;
#if APR_HAS_THREADS
  apr_thread_mutex_t *mutex;
#endif

  if (!xlate_handle_hash)
    {
      /* We create our own subpool, which we protect with the mutex.
         We can't use the pool passed to us by the caller, since we will
         use it for xlate handle allocations, possibly in multiple threads,
         and pool allocation is not thread-safe. */
      subpool = svn_pool_create (pool);
#if APR_HAS_THREADS
      if (apr_thread_mutex_create (&mutex, APR_THREAD_MUTEX_DEFAULT, subpool)
          == APR_SUCCESS)
        xlate_handle_mutex = mutex;
      else
        return;
#endif
      
      xlate_handle_hash = apr_hash_make (subpool);
      apr_pool_cleanup_register (subpool, NULL, xlate_cleanup,
                                 apr_pool_cleanup_null);
    }
}

/* Return an apr_xlate handle for converting from FROMPAGE to
   TOPAGE. Create one if it doesn't exist in USERDATA_KEY. If
   unable to find a handle, or unable to create one because
   apr_xlate_open returned APR_EINVAL, then set *RET to null and
   return SVN_NO_ERROR; if fail for some other reason, return
   error. */
static svn_error_t *
get_xlate_handle_node (xlate_handle_node_t **ret,
                       const char *topage, const char *frompage,
                       const char *userdata_key, apr_pool_t *pool)
{
  xlate_handle_node_t **old_handle_p;
  xlate_handle_node_t *old_handle = NULL;
  apr_status_t apr_err;

  /* If we already have a handle, just return it. */
  if (userdata_key)
    {
      if (xlate_handle_hash)
        {
#if APR_HAS_THREADS
          apr_err = apr_thread_mutex_lock (xlate_handle_mutex);
          if (apr_err != APR_SUCCESS)
            return svn_error_create (apr_err, NULL,
                                     "Can't lock charset translation "
                                     "mutex");
#endif
          old_handle_p = apr_hash_get (xlate_handle_hash, userdata_key,
                                       APR_HASH_KEY_STRING);
          if (old_handle_p)
            old_handle = *old_handle_p;
          if (old_handle)
            {
              /* Ensure that the handle is still valid. */
              if (old_handle->handle)
                {
                  /* Remove from the list. */
                  *old_handle_p = old_handle->next;
                  old_handle->next = NULL;
#if APR_HAS_THREADS
                  apr_err = apr_thread_mutex_unlock (xlate_handle_mutex);
                  if (apr_err != APR_SUCCESS)
                    return svn_error_create (apr_err, NULL,
                                             "Can't unlock charset "
                                             "translation mutex");
#endif
                  *ret = old_handle;
                  return SVN_NO_ERROR;
                }
            }
        }
      else
        {
          void *p;
          /* We fall back on a per-pool cache instead. */
          apr_pool_userdata_get (&p, userdata_key, pool);
          old_handle = p;
          /* Ensure that the handle is still valid. */
          if (old_handle && old_handle->handle)
            {
              *ret = old_handle;
              return SVN_NO_ERROR;
            }
        }
    }

  /* Note that we still have the mutex locked (if it is initialized), so we
     can use the global pool for creating the new xlate handle. */

  /* Use the correct pool for creating the handle. */
  if (userdata_key && xlate_handle_hash)
    pool = apr_hash_pool_get (xlate_handle_hash);

  /* Try to create a handle. */
  *ret = apr_palloc (pool, sizeof(xlate_handle_node_t));
  apr_err = apr_xlate_open (&(**ret).handle, topage, frompage, pool);
  (**ret).next = NULL;

  /* If we are called from inside a pool cleanup handler, the just created
     xlate handle will be closed when that handler returns by a newly
     registered cleanup handler, however, the handle is still cached by us.
     To prevent this, we register a cleanup handler that will reset our
     handle, so we don't use an invalid one. */
  apr_pool_cleanup_register (pool, *ret, xlate_handle_node_cleanup,
                             apr_pool_cleanup_null);

  /* Don't need the lock anymore. */
#if APR_HAS_THREADS
  if (userdata_key && xlate_handle_hash)
    {
      apr_status_t unlock_err = apr_thread_mutex_unlock (xlate_handle_mutex);
      if (unlock_err != APR_SUCCESS)
        return svn_error_create (unlock_err, NULL,
                                 "Can't unlock charset translation "
                                 "mutex");
    }
#endif

  if (APR_STATUS_IS_EINVAL (apr_err) || APR_STATUS_IS_ENOTIMPL (apr_err))
    {
      (*ret)->handle = NULL;
      return SVN_NO_ERROR;
    }
  if (apr_err != APR_SUCCESS)
    /* Can't use svn_error_wrap_apr here because it calls functions in
       this file, leading to infinite recursion. */
    return svn_error_createf
      (apr_err, NULL, "Can't create a converter from '%s' to '%s'",
       (topage == APR_LOCALE_CHARSET ? "native" : topage),
       (frompage == APR_LOCALE_CHARSET ? "native" : frompage));

  return SVN_NO_ERROR;
}

/* Put back NODE into the xlate handle cache for use by other calls.
   If there is no global cache, store the handle in POOL.
   Ignore errors related to locking/unlocking the mutex.
   ### Mutex errors here are very weird. Should we handle them "correctly"
   ### even if that complicates error handling in the routines below? */
static void
put_xlate_handle_node (xlate_handle_node_t *node,
                       const char *userdata_key,
                       apr_pool_t *pool)
{
  assert (node->next == NULL);
  if (!userdata_key)
    return;
  if (xlate_handle_hash)
    {
      xlate_handle_node_t **node_p;
#if APR_HAS_THREADS
      if (apr_thread_mutex_lock (xlate_handle_mutex) != APR_SUCCESS)
        abort ();
#endif
      node_p = apr_hash_get (xlate_handle_hash, userdata_key,
                             APR_HASH_KEY_STRING);
      if (node_p == NULL)
        {
          node_p = apr_palloc (apr_hash_pool_get (xlate_handle_hash),
                               sizeof (*node_p));
          *node_p = NULL;
          apr_hash_set (xlate_handle_hash, userdata_key,
                        APR_HASH_KEY_STRING, node_p);
        }
      node->next = *node_p;
      *node_p = node;
#if APR_HAS_THREADS
      if (apr_thread_mutex_unlock (xlate_handle_mutex) != APR_SUCCESS)
        abort ();
#endif
    }
  else
    {
      /* Store it in the per-pool cache. */
      apr_pool_userdata_set (node, userdata_key, apr_pool_cleanup_null, pool);
    }
}

/* Return the apr_xlate handle for converting native characters to UTF-8. */
static svn_error_t *
get_ntou_xlate_handle_node (xlate_handle_node_t **ret, apr_pool_t *pool)
{
  return get_xlate_handle_node (ret, "UTF-8", APR_LOCALE_CHARSET,
                                SVN_UTF_NTOU_XLATE_HANDLE, pool);
}


/* Return the apr_xlate handle for converting UTF-8 to native characters.
   Create one if it doesn't exist.  If unable to find a handle, or
   unable to create one because apr_xlate_open returned APR_EINVAL, then
   set *RET to null and return SVN_NO_ERROR; if fail for some other
   reason, return error. */
static svn_error_t *
get_uton_xlate_handle_node (xlate_handle_node_t **ret, apr_pool_t *pool)
{
  return get_xlate_handle_node (ret, APR_LOCALE_CHARSET, "UTF-8",
                                SVN_UTF_UTON_XLATE_HANDLE, pool);
}


/* Convert SRC_LENGTH bytes of SRC_DATA in CONVSET, store the result
   in *DEST, which is allocated in POOL. */
static svn_error_t *
convert_to_stringbuf (apr_xlate_t *convset,
                      const char *src_data,
                      apr_size_t src_length,
                      svn_stringbuf_t **dest,
                      apr_pool_t *pool)
{
  apr_size_t buflen = src_length;
  apr_status_t apr_err;
  apr_size_t srclen = src_length;
  apr_size_t destlen = 0;
  char *destbuf;

  /* Initialize *DEST to an empty stringbuf. */
  *dest = svn_stringbuf_create ("", pool);
  destbuf = (*dest)->data;

  /* Not only does it not make sense to convert an empty string, but
     apr-iconv is quite unreasonable about not allowing that. */
  if (src_length == 0)
    return SVN_NO_ERROR;

  do 
    {
      /* A 1:2 ratio of input characters to output characters should
         be enough for most translations, and conveniently enough, if
         it isn't, we'll grow the buffer size by 2 again. */
      if (destlen == 0)
        buflen *= 2;

      /* Ensure that *DEST has sufficient storage for the translated
         result. */
      svn_stringbuf_ensure (*dest, buflen + 1);

      /* Update the destination buffer pointer to the first character
         after already-converted output. */
      destbuf = (*dest)->data + (*dest)->len;

      /* Set up state variables for xlate. */
      destlen = buflen - (*dest)->len;

      /* Attempt the conversion. */
      apr_err = apr_xlate_conv_buffer (convset, 
                                       src_data + (src_length - srclen), 
                                       &srclen,
                                       destbuf, 
                                       &destlen);

      /* Now, update the *DEST->len to track the amount of output data
         churned out so far from this loop. */
      (*dest)->len += ((buflen - (*dest)->len) - destlen);

    } while (! apr_err && srclen);

  /* If we exited the loop with an error, return the error. */
  if (apr_err)
    /* Can't use svn_error_wrap_apr here because it calls functions in
       this file, leading to infinite recursion. */
    return svn_error_create (apr_err, NULL, "Can't recode string");
  
  /* Else, exited due to success.  Trim the result buffer down to the
     right length. */
  (*dest)->data[(*dest)->len] = '\0';

  return SVN_NO_ERROR;
}


/* Return APR_EINVAL if the first LEN bytes of DATA contain anything
   other than seven-bit, non-control (except for whitespace) ASCII
   characters, finding the error pool from POOL.  Otherwise, return
   SVN_NO_ERROR. */
static svn_error_t *
check_non_ascii (const char *data, apr_size_t len, apr_pool_t *pool)
{
  const char *data_start = data;

  for (; len > 0; --len, data++)
    {
      if ((! apr_isascii (*data))
          || ((! apr_isspace (*data))
              && apr_iscntrl (*data)))
        {
          /* Show the printable part of the data, followed by the
             decimal code of the questionable character.  Because if a
             user ever gets this error, she's going to have to spend
             time tracking down the non-ASCII data, so we want to help
             as much as possible.  And yes, we just call the unsafe
             data "non-ASCII", even though the actual constraint is
             somewhat more complex than that. */ 

          if (data - data_start)
            {
              const char *error_data
                = apr_pstrndup (pool, data_start, (data - data_start));

              return svn_error_createf
                (APR_EINVAL, NULL,
                 "Safe data:\n"
                 "\"%s\"\n"
utf.c - 源码说明

本页面展示了「linux subdivision ying gai ke yi le ba」中的 utf.c 源码文件，采用 C语言编程语言编写，共 808 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与subdivision相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?