⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 unicodeobject.c

📁 python s60 1.4.5版本的源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
/* Portions Copyright (c) 2005-2007 Nokia Corporation */
/*

Unicode implementation based on original code by Fredrik Lundh,
modified by Marc-Andre Lemburg <mal@lemburg.com> according to the
Unicode Integration Proposal (see file Misc/unicode.txt).

Copyright (c) Corporation for National Research Initiatives.

--------------------------------------------------------------------
The original string type implementation is:

    Copyright (c) 1999 by Secret Labs AB
    Copyright (c) 1999 by Fredrik Lundh

By obtaining, using, and/or copying this software and/or its
associated documentation, you agree that you have read, understood,
and will comply with the following terms and conditions:

Permission to use, copy, modify, and distribute this software and its
associated documentation for any purpose and without fee is hereby
granted, provided that the above copyright notice appears in all
copies, and that both that copyright notice and this permission notice
appear in supporting documentation, and that the name of Secret Labs
AB or the author not be used in advertising or publicity pertaining to
distribution of the software without specific, written prior
permission.

SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
--------------------------------------------------------------------

*/

#include "Python.h"

#include "unicodeobject.h"
#include "ucnhash.h"
#include "python_globals.h"

#ifdef MS_WIN32
#include <windows.h>
#endif

/* Limit for the Unicode object free list */

#define MAX_UNICODE_FREELIST_SIZE       1024

/* Limit for the Unicode object free list stay alive optimization.

   The implementation will keep allocated Unicode memory intact for
   all objects on the free list having a size less than this
   limit. This reduces malloc() overhead for small Unicode objects.

   At worst this will result in MAX_UNICODE_FREELIST_SIZE *
   (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT +
   malloc()-overhead) bytes of unused garbage.

   Setting the limit to 0 effectively turns the feature off.

   Note: This is an experimental feature ! If you get core dumps when
   using Unicode objects, turn this feature off.

*/

#define KEEPALIVE_SIZE_LIMIT       9

/* Endianness switches; defaults to little endian */

#ifdef WORDS_BIGENDIAN
# define BYTEORDER_IS_BIG_ENDIAN
#else
# define BYTEORDER_IS_LITTLE_ENDIAN
#endif

/* --- Globals ------------------------------------------------------------

   The globals are initialized by the _PyUnicode_Init() API and should
   not be used before calling that API.

*/

#ifdef SYMBIAN
#define PY_GLOB(x) (PYTHON_GLOBALS->x)
#else
#define PY_GLOB(x) x
#endif

/* Free list for Unicode objects */
#ifndef SYMBIAN
static PyUnicodeObject *unicode_freelist;
static int unicode_freelist_size;
#endif

/* The empty Unicode object is shared to improve performance. */
#ifndef SYMBIAN
static PyUnicodeObject *unicode_empty;
#endif

/* Single character Unicode strings in the Latin-1 range are being
   shared as well. */
#ifndef SYMBIAN
static PyUnicodeObject *unicode_latin1[256];
#endif

/* Default encoding to use and assume when NULL is passed as encoding
   parameter; it is initialized by _PyUnicode_Init().

   Always use the PyUnicode_SetDefaultEncoding() and
   PyUnicode_GetDefaultEncoding() APIs to access this global.

*/
#ifndef SYMBIAN
static char unicode_default_encoding[100];
#endif

DL_EXPORT(Py_UNICODE)
PyUnicode_GetMax(void)
{
#ifdef Py_UNICODE_WIDE
	return 0x10FFFF;
#else
	/* This is actually an illegal character, so it should
	   not be passed to unichr. */
	return 0xFFFF;
#endif
}

/* --- Unicode Object ----------------------------------------------------- */

static
int unicode_resize(register PyUnicodeObject *unicode,
                      int length)
{
    void *oldstr;

    /* Shortcut if there's nothing much to do. */
    if (unicode->length == length)
	goto reset;

    /* Resizing shared object (unicode_empty or single character
       objects) in-place is not allowed. Use PyUnicode_Resize()
       instead ! */
    if (unicode == PY_GLOB(unicode_empty) ||
	(unicode->length == 1 &&
	 unicode->str[0] < 256 &&
	 (PY_GLOB(unicode_latin1))[unicode->str[0]] == unicode)) {
        PyErr_SetString(PyExc_SystemError,
                        "can't resize shared unicode objects");
        return -1;
    }

    /* We allocate one more byte to make sure the string is
       Ux0000 terminated -- XXX is this needed ? */
    oldstr = unicode->str;
    PyMem_RESIZE(unicode->str, Py_UNICODE, length + 1);
    if (!unicode->str) {
	unicode->str = oldstr;
        PyErr_NoMemory();
        return -1;
    }
    unicode->str[length] = 0;
    unicode->length = length;

 reset:
    /* Reset the object caches */
    if (unicode->defenc) {
        Py_DECREF(unicode->defenc);
        unicode->defenc = NULL;
    }
    unicode->hash = -1;

    return 0;
}

/* We allocate one more byte to make sure the string is
   Ux0000 terminated -- XXX is this needed ?

   XXX This allocator could further be enhanced by assuring that the
       free list never reduces its size below 1.

*/

static
PyUnicodeObject *_PyUnicode_New(int length)
{
    register PyUnicodeObject *unicode;

#ifdef SYMBIAN
    SPy_Python_globals* pyglobals = PYTHON_GLOBALS; // avoid TLS reads
#undef PY_GLOB
#define PY_GLOB(x) (pyglobals->x)
#endif

    /* Optimization for empty strings */
    if (length == 0 && PY_GLOB(unicode_empty) != NULL) {
        Py_INCREF(PY_GLOB(unicode_empty));
        return PY_GLOB(unicode_empty);
    }

    /* Unicode freelist & memory allocation */
    if (PY_GLOB(unicode_freelist)) {
        unicode = PY_GLOB(unicode_freelist);
        PY_GLOB(unicode_freelist) = *(PyUnicodeObject **)unicode;
        PY_GLOB(unicode_freelist_size)--;
	if (unicode->str) {
	    /* Keep-Alive optimization: we only upsize the buffer,
	       never downsize it. */
	    if ((unicode->length < length) &&
		unicode_resize(unicode, length)) {
		PyMem_DEL(unicode->str);
		goto onError;
	    }
	}
        else {
	    unicode->str = PyMem_NEW(Py_UNICODE, length + 1);
        }
        PyObject_INIT(unicode, &PyUnicode_Type);
    }
    else {
        unicode = PyObject_NEW(PyUnicodeObject, &PyUnicode_Type);
        if (unicode == NULL)
            return NULL;
	unicode->str = PyMem_NEW(Py_UNICODE, length + 1);
    }

    if (!unicode->str) {
	PyErr_NoMemory();
	goto onError;
    }
    unicode->str[length] = 0;
    unicode->length = length;
    unicode->hash = -1;
    unicode->defenc = NULL;
    return unicode;

 onError:
    _Py_ForgetReference((PyObject *)unicode);
    PyObject_DEL(unicode);
    return NULL;
#ifdef SYMBIAN
#undef PY_GLOB
#define PY_GLOB(x) (PYTHON_GLOBALS->x)
#endif
}

static
void unicode_dealloc(register PyUnicodeObject *unicode)
{
#ifdef SYMBIAN
    SPy_Python_globals* pyglobals = PYTHON_GLOBALS; // avoid TLS reads
#undef PY_GLOB
#define PY_GLOB(x) (pyglobals->x)
#endif

    if (PyUnicode_CheckExact(unicode) &&
	PY_GLOB(unicode_freelist_size) < MAX_UNICODE_FREELIST_SIZE) {
        /* Keep-Alive optimization */
	if (unicode->length >= KEEPALIVE_SIZE_LIMIT) {
	    PyMem_DEL(unicode->str);
	    unicode->str = NULL;
	    unicode->length = 0;
	}
	if (unicode->defenc) {
	    Py_DECREF(unicode->defenc);
	    unicode->defenc = NULL;
	}
	/* Add to free list */
        *(PyUnicodeObject **)unicode = PY_GLOB(unicode_freelist);
        PY_GLOB(unicode_freelist) = unicode;
        PY_GLOB(unicode_freelist_size)++;
    }
    else {
	PyMem_DEL(unicode->str);
	Py_XDECREF(unicode->defenc);
	unicode->ob_type->tp_free((PyObject *)unicode);
    }
#ifdef SYMBIAN
#undef PY_GLOB
#define PY_GLOB(x) (PYTHON_GLOBALS->x)
#endif
}

DL_EXPORT(int)
PyUnicode_Resize(PyObject **unicode,
		 int length)
{
    register PyUnicodeObject *v;

    /* Argument checks */
    if (unicode == NULL) {
	PyErr_BadInternalCall();
	return -1;
    }
    v = (PyUnicodeObject *)*unicode;
    if (v == NULL || !PyUnicode_Check(v) || v->ob_refcnt != 1 || length < 0) {
	PyErr_BadInternalCall();
	return -1;
    }

    /* Resizing unicode_empty and single character objects is not
       possible since these are being shared. We simply return a fresh
       copy with the same Unicode content. */
    if (v->length != length &&
	(v == PY_GLOB(unicode_empty) || v->length == 1)) {
	PyUnicodeObject *w = _PyUnicode_New(length);
	if (w == NULL)
	    return -1;
	Py_UNICODE_COPY(w->str, v->str,
			length < v->length ? length : v->length);
	*unicode = (PyObject *)w;
	return 0;
    }

    /* Note that we don't have to modify *unicode for unshared Unicode
       objects, since we can modify them in-place. */
    return unicode_resize(v, length);
}

/* Internal API for use in unicodeobject.c only ! */
#define _PyUnicode_Resize(unicodevar, length) \
        PyUnicode_Resize(((PyObject **)(unicodevar)), length)

DL_EXPORT(PyObject *)
PyUnicode_FromUnicode(const Py_UNICODE *u,
		      int size)
{
    PyUnicodeObject *unicode;

#ifdef SYMBIAN
    SPy_Python_globals* pyglobals = PYTHON_GLOBALS; // avoid TLS reads
#undef PY_GLOB
#define PY_GLOB(x) (pyglobals->x)
#endif

    /* If the Unicode data is known at construction time, we can apply
       some optimizations which share commonly used objects. */
    if (u != NULL) {

	/* Optimization for empty strings */
	if (size == 0 && PY_GLOB(unicode_empty) != NULL) {
	    Py_INCREF(PY_GLOB(unicode_empty));
	    return (PyObject *)PY_GLOB(unicode_empty);
	}

	/* Single character Unicode objects in the Latin-1 range are
	   shared when using this constructor */
	if (size == 1 && *u < 256) {
	    unicode = (PY_GLOB(unicode_latin1))[*u];
	    if (!unicode) {
		unicode = _PyUnicode_New(1);
		if (!unicode)
		    return NULL;
		unicode->str[0] = *u;
		(PY_GLOB(unicode_latin1))[*u] = unicode;
	    }
	    Py_INCREF(unicode);
	    return (PyObject *)unicode;
	}
    }

    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;

    /* Copy the Unicode data into the new object */
    if (u != NULL)
	Py_UNICODE_COPY(unicode->str, u, size);

    return (PyObject *)unicode;
#ifdef SYMBIAN
#undef PY_GLOB
#define PY_GLOB(x) (PYTHON_GLOBALS->x)
#endif
}

#ifdef HAVE_WCHAR_H

DL_EXPORT(PyObject *)
PyUnicode_FromWideChar(register const wchar_t *w,
		       int size)
{
    PyUnicodeObject *unicode;

    if (w == NULL) {
	PyErr_BadInternalCall();
	return NULL;
    }

    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;

    /* Copy the wchar_t data into the new object */
#ifdef HAVE_USABLE_WCHAR_T
    memcpy(unicode->str, w, size * sizeof(wchar_t));
#else
    {
	register Py_UNICODE *u;
	register int i;
	u = PyUnicode_AS_UNICODE(unicode);
	for (i = size; i >= 0; i--)
	    *u++ = *w++;
    }
#endif

    return (PyObject *)unicode;
}

DL_EXPORT(int)
PyUnicode_AsWideChar(PyUnicodeObject *unicode,
		     register wchar_t *w,
		     int size)
{
    if (unicode == NULL) {
	PyErr_BadInternalCall();
	return -1;
    }
    if (size > PyUnicode_GET_SIZE(unicode))
	size = PyUnicode_GET_SIZE(unicode);
#ifdef HAVE_USABLE_WCHAR_T
    memcpy(w, unicode->str, size * sizeof(wchar_t));
#else
    {
	register Py_UNICODE *u;
	register int i;
	u = PyUnicode_AS_UNICODE(unicode);
	for (i = size; i >= 0; i--)
	    *w++ = *u++;
    }
#endif

    return size;
}

#endif

DL_EXPORT(PyObject *)
PyUnicode_FromOrdinal(int ordinal)
{
    Py_UNICODE s[2];

#ifdef Py_UNICODE_WIDE
    if (ordinal < 0 || ordinal > 0x10ffff) {
	PyErr_SetString(PyExc_ValueError,
			"unichr() arg not in range(0x110000) "
			"(wide Python build)");
	return NULL;
    }
#else
    if (ordinal < 0 || ordinal > 0xffff) {
	PyErr_SetString(PyExc_ValueError,
			"unichr() arg not in range(0x10000) "
			"(narrow Python build)");
	return NULL;
    }
#endif

    if (ordinal <= 0xffff) {
	/* UCS-2 character */
	s[0] = (Py_UNICODE) ordinal;
	return PyUnicode_FromUnicode(s, 1);
    }
    else {
#ifndef Py_UNICODE_WIDE
	/* UCS-4 character.  store as two surrogate characters */
	ordinal -= 0x10000L;
	s[0] = 0xD800 + (Py_UNICODE) (ordinal >> 10);
	s[1] = 0xDC00 + (Py_UNICODE) (ordinal & 0x03FF);
	return PyUnicode_FromUnicode(s, 2);
#else
	s[0] = (Py_UNICODE)ordinal;
	return PyUnicode_FromUnicode(s, 1);
#endif
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -