⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 unicodeobject.c

📁 python s60 1.4.5版本的源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
DL_EXPORT(PyObject *)
PyUnicode_FromObject(register PyObject *obj)
{
    /* XXX Perhaps we should make this API an alias of
           PyObject_Unicode() instead ?! */
    if (PyUnicode_CheckExact(obj)) {
	Py_INCREF(obj);
	return obj;
    }
    if (PyUnicode_Check(obj)) {
	/* For a Unicode subtype that's not a Unicode object,
	   return a true Unicode object with the same data. */
	return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
				     PyUnicode_GET_SIZE(obj));
    }
    return PyUnicode_FromEncodedObject(obj, NULL, "strict");
}

DL_EXPORT(PyObject *)
PyUnicode_FromEncodedObject(register PyObject *obj,
			    const char *encoding,
			    const char *errors)
{
    const char *s = NULL;
    int len;
    int owned = 0;
    PyObject *v;

    if (obj == NULL) {
	PyErr_BadInternalCall();
	return NULL;
    }

#if 0
    /* For b/w compatibility we also accept Unicode objects provided
       that no encodings is given and then redirect to
       PyObject_Unicode() which then applies the additional logic for
       Unicode subclasses.

       NOTE: This API should really only be used for object which
             represent *encoded* Unicode !

    */
	if (PyUnicode_Check(obj)) {
	    if (encoding) {
		PyErr_SetString(PyExc_TypeError,
				"decoding Unicode is not supported");
	    return NULL;
	    }
	return PyObject_Unicode(obj);
	    }
#else
    if (PyUnicode_Check(obj)) {
	PyErr_SetString(PyExc_TypeError,
			"decoding Unicode is not supported");
	return NULL;
	}
#endif

    /* Coerce object */
    if (PyString_Check(obj)) {
	    s = PyString_AS_STRING(obj);
	    len = PyString_GET_SIZE(obj);
	    }
    else if (PyObject_AsCharBuffer(obj, &s, &len)) {
	/* Overwrite the error message with something more useful in
	   case of a TypeError. */
	if (PyErr_ExceptionMatches(PyExc_TypeError))
	PyErr_Format(PyExc_TypeError,
			 "coercing to Unicode: need string or buffer, "
			 "%.80s found",
		     obj->ob_type->tp_name);
	goto onError;
    }

    /* Convert to Unicode */
    if (len == 0) {
	Py_INCREF(PY_GLOB(unicode_empty));
	v = (PyObject *)PY_GLOB(unicode_empty);
    }
    else
	v = PyUnicode_Decode(s, len, encoding, errors);

    if (owned) {
	Py_DECREF(obj);
    }
    return v;

 onError:
    if (owned) {
	Py_DECREF(obj);
    }
    return NULL;
}

DL_EXPORT(PyObject *)
PyUnicode_Decode(const char *s,
		 int size,
		 const char *encoding,
		 const char *errors)
{
    PyObject *buffer = NULL, *unicode;

    if (encoding == NULL)
	encoding = PyUnicode_GetDefaultEncoding();

    /* Shortcuts for common default encodings */
    if (strcmp(encoding, "utf-8") == 0)
        return PyUnicode_DecodeUTF8(s, size, errors);
    else if (strcmp(encoding, "latin-1") == 0)
        return PyUnicode_DecodeLatin1(s, size, errors);
    else if (strcmp(encoding, "ascii") == 0)
        return PyUnicode_DecodeASCII(s, size, errors);

    /* Decode via the codec registry */
    buffer = PyBuffer_FromMemory((void *)s, size);
    if (buffer == NULL)
        goto onError;
    unicode = PyCodec_Decode(buffer, encoding, errors);
    if (unicode == NULL)
        goto onError;
    if (!PyUnicode_Check(unicode)) {
        PyErr_Format(PyExc_TypeError,
                     "decoder did not return an unicode object (type=%.400s)",
                     unicode->ob_type->tp_name);
        Py_DECREF(unicode);
        goto onError;
    }
    Py_DECREF(buffer);
    return unicode;

 onError:
    Py_XDECREF(buffer);
    return NULL;
}

DL_EXPORT(PyObject *)
PyUnicode_Encode(const Py_UNICODE *s,
		 int size,
		 const char *encoding,
		 const char *errors)
{
    PyObject *v, *unicode;

    unicode = PyUnicode_FromUnicode(s, size);
    if (unicode == NULL)
	return NULL;
    v = PyUnicode_AsEncodedString(unicode, encoding, errors);
    Py_DECREF(unicode);
    return v;
}

DL_EXPORT(PyObject *)
PyUnicode_AsEncodedString(PyObject *unicode,
			  const char *encoding,
			  const char *errors)
{
    PyObject *v;

    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }

    if (encoding == NULL)
	encoding = PyUnicode_GetDefaultEncoding();

    /* Shortcuts for common default encodings */
    if (errors == NULL) {
	if (strcmp(encoding, "utf-8") == 0)
	    return PyUnicode_AsUTF8String(unicode);
	else if (strcmp(encoding, "latin-1") == 0)
	    return PyUnicode_AsLatin1String(unicode);
	else if (strcmp(encoding, "ascii") == 0)
	    return PyUnicode_AsASCIIString(unicode);
    }

    /* Encode via the codec registry */
    v = PyCodec_Encode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
    /* XXX Should we really enforce this ? */
    if (!PyString_Check(v)) {
        PyErr_Format(PyExc_TypeError,
                     "encoder did not return a string object (type=%.400s)",
                     v->ob_type->tp_name);
        Py_DECREF(v);
        goto onError;
    }
    return v;

 onError:
    return NULL;
}

DL_EXPORT(PyObject *)
_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
				  const char *errors)
{
    PyObject *v = ((PyUnicodeObject *)unicode)->defenc;

    if (v)
        return v;
    v = PyUnicode_AsEncodedString(unicode, NULL, errors);
    if (v && errors == NULL)
        ((PyUnicodeObject *)unicode)->defenc = v;
    return v;
}

DL_EXPORT(Py_UNICODE *)
PyUnicode_AsUnicode(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }
    return PyUnicode_AS_UNICODE(unicode);

 onError:
    return NULL;
}

DL_EXPORT(int)
PyUnicode_GetSize(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }
    return PyUnicode_GET_SIZE(unicode);

 onError:
    return -1;
}

DL_EXPORT(const char *)
PyUnicode_GetDefaultEncoding(void)
{
    return PY_GLOB(unicode_default_encoding);
}

DL_EXPORT(int)
PyUnicode_SetDefaultEncoding(const char *encoding)
{
    PyObject *v;

    /* Make sure the encoding is valid. As side effect, this also
       loads the encoding into the codec registry cache. */
    v = _PyCodec_Lookup(encoding);
    if (v == NULL)
	goto onError;
    Py_DECREF(v);
    strncpy(PY_GLOB(unicode_default_encoding),
	    encoding,
	    sizeof(PY_GLOB(unicode_default_encoding)));
    return 0;

 onError:
    return -1;
}

/* --- UTF-7 Codec -------------------------------------------------------- */

/* see RFC2152 for details */

const static
char utf7_special[128] = {
    /* indicate whether a UTF-7 character is special i.e. cannot be directly
       encoded:
	   0 - not special
	   1 - special
	   2 - whitespace (optional)
	   3 - RFC2152 Set O (optional) */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    2, 3, 3, 3, 3, 3, 3, 0, 0, 0, 3, 1, 0, 0, 0, 1,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3,
    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 1, 1,

};

#define SPECIAL(c, encodeO, encodeWS) \
	(((c)>127 || utf7_special[(c)] == 1) || \
	 (encodeWS && (utf7_special[(c)] == 2)) || \
     (encodeO && (utf7_special[(c)] == 3)))

#define B64(n)  ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f])
#define B64CHAR(c) (isalnum(c) || (c) == '+' || (c) == '/')
#define UB64(c)        ((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ? \
                        (c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4)

#define ENCODE(out, ch, bits) \
    while (bits >= 6) { \
        *out++ = B64(ch >> (bits-6)); \
        bits -= 6; \
    }

#define DECODE(out, ch, bits, surrogate) \
    while (bits >= 16) { \
        Py_UNICODE outCh = (Py_UNICODE) ((ch >> (bits-16)) & 0xffff); \
        bits -= 16; \
		if (surrogate) { \
			/* We have already generated an error for the high surrogate
               so let's not bother seeing if the low surrogate is correct or not */\
			surrogate = 0; \
		} else if (0xDC00 <= outCh && outCh <= 0xDFFF) { \
            /* This is a surrogate pair. Unfortunately we can't represent \
               it in a 16-bit character */ \
			surrogate = 1; \
            errmsg = "code pairs are not supported"; \
	        goto utf7Error; \
		} else { \
				*out++ = outCh; \
		} \
    } \

static
int utf7_decoding_error(Py_UNICODE **dest,
                        const char *errors,
                        const char *details)
{
    if ((errors == NULL) ||
        (strcmp(errors,"strict") == 0)) {
        PyErr_Format(PyExc_UnicodeError,
                     "UTF-7 decoding error: %.400s",
                     details);
        return -1;
    }
    else if (strcmp(errors,"ignore") == 0) {
        return 0;
    }
    else if (strcmp(errors,"replace") == 0) {
        if (dest != NULL) {
            **dest = Py_UNICODE_REPLACEMENT_CHARACTER;
            (*dest)++;
        }
        return 0;
    }
    else {
        PyErr_Format(PyExc_ValueError,
                     "UTF-7 decoding error; unknown error handling code: %.400s",
                     errors);
        return -1;
    }
}

DL_EXPORT(PyObject *)
PyUnicode_DecodeUTF7(const char *s,
		     int size,
		     const char *errors)
{
    const char *e;
    PyUnicodeObject *unicode;
    Py_UNICODE *p;
    const char *errmsg = "";
    int inShift = 0;
    unsigned int bitsleft = 0;
    unsigned long charsleft = 0;
	int surrogate = 0;

    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;
    if (size == 0)
        return (PyObject *)unicode;

    p = unicode->str;
    e = s + size;

    while (s < e) {
        Py_UNICODE ch = *s;

        if (inShift) {
            if ((ch == '-') || !B64CHAR(ch)) {
                inShift = 0;
                s++;

                /* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate);
                if (bitsleft >= 6) {
                    /* The shift sequence has a partial character in it. If
                       bitsleft < 6 then we could just classify it as padding
                       but that is not the case here */

                    errmsg = "partial character in shift sequence";
                    goto utf7Error;
                }
                /* According to RFC2152 the remaining bits should be zero. We
                   choose to signal an error/insert a replacement character
                   here so indicate the potential of a misencoded character. */

                /* On x86, a << b == a << (b%32) so make sure that bitsleft != 0 */
                if (bitsleft && charsleft << (sizeof(charsleft) * 8 - bitsleft)) {
                    errmsg = "non-zero padding bits in shift sequence";
                    goto utf7Error;
                }

                if (ch == '-') {
                    if ((s < e) && (*(s) == '-')) {
                        *p++ = '-';
                        inShift = 1;
                    }
                } else if (SPECIAL(ch,0,0)) {
                    errmsg = "unexpected special character";
	                goto utf7Error;
                } else  {
                    *p++ = ch;
                }
            } else {
                charsleft = (charsleft << 6) | UB64(ch);
                bitsleft += 6;
                s++;
                /* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate);
            }
        }
        else if ( ch == '+' ) {
            s++;
            if (s < e && *s == '-') {
                s++;
                *p++ = '+';
            } else
            {
                inShift = 1;
                bitsleft = 0;
            }
        }
        else if (SPECIAL(ch,0,0)) {
            errmsg = "unexpected special character";
            s++;
	        goto utf7Error;
        }
        else {
            *p++ = ch;
            s++;
        }
        continue;
    utf7Error:
      if (utf7_decoding_error(&p, errors, errmsg))
          goto onError;
    }

    if (inShift) {
        if (utf7_decoding_error(&p, errors, "unterminated shift sequence"))
            goto onError;
    }

    if (_PyUnicode_Resize(&unicode, p - unicode->str))
        goto onError;

    return (PyObject *)unicode;

onError:
    Py_DECREF(unicode);
    return NULL;
}


DL_EXPORT(PyObject *)
PyUnicode_EncodeUTF7(const Py_UNICODE *s,
		     int size,
		     int encodeSetO,
		     int encodeWhiteSpace,
		     const char *errors)
{
    PyObject *v;
    /* It might be possible to tighten this worst case */
    unsigned int cbAllocated = 5 * size;
    int inShift = 0;
    int i = 0;
    unsigned int bitsleft = 0;
    unsigned long charsleft = 0;
    char * out;
    char * start;

    if (size == 0)
		return PyString_FromStringAndSize(NULL, 0);

    v = PyString_FromStringAndSize(NULL, cbAllocated);
    if (v == NULL)
        return NULL;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -