⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 unicodeobject.c

📁 python s60 1.4.5版本的源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
		*p++ = 'U';
		*p++ = hexdigit[(ucs >> 28) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 24) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 20) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 16) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 12) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 8) & 0x0000000F];
		*p++ = hexdigit[(ucs >> 4) & 0x0000000F];
		*p++ = hexdigit[ucs & 0x0000000F];
		continue;
	    }
	    /* Fall through: isolated surrogates are copied as-is */
	    s--;
	    size++;
	}

        /* Map 16-bit characters to '\uxxxx' */
        if (ch >= 256) {
            *p++ = '\\';
            *p++ = 'u';
            *p++ = hexdigit[(ch >> 12) & 0x000F];
            *p++ = hexdigit[(ch >> 8) & 0x000F];
            *p++ = hexdigit[(ch >> 4) & 0x000F];
            *p++ = hexdigit[ch & 0x000F];
        }

        /* Map special whitespace to '\t', \n', '\r' */
        else if (ch == '\t') {
            *p++ = '\\';
            *p++ = 't';
        }
        else if (ch == '\n') {
            *p++ = '\\';
            *p++ = 'n';
        }
        else if (ch == '\r') {
            *p++ = '\\';
            *p++ = 'r';
        }

        /* Map non-printable US ASCII to '\xhh' */
        else if (ch < ' ' || ch >= 0x7F) {
            *p++ = '\\';
            *p++ = 'x';
            *p++ = hexdigit[(ch >> 4) & 0x000F];
            *p++ = hexdigit[ch & 0x000F];
        }

        /* Copy everything else as-is */
        else
            *p++ = (char) ch;
    }
    if (quotes)
        *p++ = PyString_AS_STRING(repr)[1];

    *p = '\0';
    _PyString_Resize(&repr, p - PyString_AS_STRING(repr));
    return repr;
}

DL_EXPORT(PyObject *)
PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
			      int size)
{
    return unicodeescape_string(s, size, 0);
}

DL_EXPORT(PyObject *)
PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    return PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(unicode),
					 PyUnicode_GET_SIZE(unicode));
}

/* --- Raw Unicode Escape Codec ------------------------------------------- */

DL_EXPORT(PyObject *)
PyUnicode_DecodeRawUnicodeEscape(const char *s,
				 int size,
				 const char *errors)
{
    PyUnicodeObject *v;
    Py_UNICODE *p, *buf;
    const char *end;
    const char *bs;

    /* Escaped strings will always be longer than the resulting
       Unicode string, so we start with size here and then reduce the
       length after conversion to the true value. */
    v = _PyUnicode_New(size);
    if (v == NULL)
	goto onError;
    if (size == 0)
	return (PyObject *)v;
    p = buf = PyUnicode_AS_UNICODE(v);
    end = s + size;
    while (s < end) {
	unsigned char c;
	Py_UCS4 x;
	int i;

	/* Non-escape characters are interpreted as Unicode ordinals */
	if (*s != '\\') {
	    *p++ = (unsigned char)*s++;
	    continue;
	}

	/* \u-escapes are only interpreted iff the number of leading
	   backslashes if odd */
	bs = s;
	for (;s < end;) {
	    if (*s != '\\')
		break;
	    *p++ = (unsigned char)*s++;
	}
	if (((s - bs) & 1) == 0 ||
	    s >= end ||
	    *s != 'u') {
	    continue;
	}
	p--;
	s++;

	/* \uXXXX with 4 hex digits */
	for (x = 0, i = 0; i < 4; i++) {
	    c = (unsigned char)s[i];
	    if (!isxdigit(c)) {
		if (unicodeescape_decoding_error(&p, errors,
						 "truncated \\uXXXX"))
		    goto onError;
		x = 0xffffffff;
		i++;
		break;
	    }
	    x = (x<<4) & ~0xF;
	    if (c >= '0' && c <= '9')
		x += c - '0';
	    else if (c >= 'a' && c <= 'f')
		x += 10 + c - 'a';
	    else
		x += 10 + c - 'A';
	}
	s += i;
	if (x != 0xffffffff)
		*p++ = x;
    }
    if (_PyUnicode_Resize(&v, (int)(p - buf)))
	goto onError;
    return (PyObject *)v;

 onError:
    Py_XDECREF(v);
    return NULL;
}

DL_EXPORT(PyObject *)
PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
				 int size)
{
    PyObject *repr;
    char *p;
    char *q;

    static const char *const hexdigit = "0123456789abcdef";

    repr = PyString_FromStringAndSize(NULL, 6 * size);
    if (repr == NULL)
        return NULL;
    if (size == 0)
	return repr;

    p = q = PyString_AS_STRING(repr);
    while (size-- > 0) {
        Py_UNICODE ch = *s++;
	/* Map 16-bit characters to '\uxxxx' */
	if (ch >= 256) {
            *p++ = '\\';
            *p++ = 'u';
            *p++ = hexdigit[(ch >> 12) & 0xf];
            *p++ = hexdigit[(ch >> 8) & 0xf];
            *p++ = hexdigit[(ch >> 4) & 0xf];
            *p++ = hexdigit[ch & 15];
        }
	/* Copy everything else as-is */
	else
            *p++ = (char) ch;
    }
    *p = '\0';
    _PyString_Resize(&repr, p - q);
    return repr;
}

DL_EXPORT(PyObject *)
PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
	PyErr_BadArgument();
	return NULL;
    }
    return PyUnicode_EncodeRawUnicodeEscape(PyUnicode_AS_UNICODE(unicode),
					    PyUnicode_GET_SIZE(unicode));
}

/* --- Latin-1 Codec ------------------------------------------------------ */

DL_EXPORT(PyObject *)
PyUnicode_DecodeLatin1(const char *s,
		       int size,
		       const char *errors)
{
    PyUnicodeObject *v;
    Py_UNICODE *p;

    /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
    if (size == 1) {
	Py_UNICODE r = *(unsigned char*)s;
	return PyUnicode_FromUnicode(&r, 1);
    }

    v = _PyUnicode_New(size);
    if (v == NULL)
	goto onError;
    if (size == 0)
	return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    while (size-- > 0)
	*p++ = (unsigned char)*s++;
    return (PyObject *)v;

 onError:
    Py_XDECREF(v);
    return NULL;
}

static
int latin1_encoding_error(const Py_UNICODE **source,
			  char **dest,
			  const char *errors,
			  const char *details)
{
    if ((errors == NULL) ||
	(strcmp(errors,"strict") == 0)) {
	PyErr_Format(PyExc_UnicodeError,
		     "Latin-1 encoding error: %.400s",
		     details);
	return -1;
    }
    else if (strcmp(errors,"ignore") == 0) {
	return 0;
    }
    else if (strcmp(errors,"replace") == 0) {
	**dest = '?';
	(*dest)++;
	return 0;
    }
    else {
	PyErr_Format(PyExc_ValueError,
		     "Latin-1 encoding error; "
		     "unknown error handling code: %.400s",
		     errors);
	return -1;
    }
}

DL_EXPORT(PyObject *)
PyUnicode_EncodeLatin1(const Py_UNICODE *p,
		       int size,
		       const char *errors)
{
    PyObject *repr;
    char *s, *start;

    repr = PyString_FromStringAndSize(NULL, size);
    if (repr == NULL)
        return NULL;
    if (size == 0)
	return repr;

    s = PyString_AS_STRING(repr);
    start = s;
    while (size-- > 0) {
        Py_UNICODE ch = *p++;
	if (ch >= 256) {
	    if (latin1_encoding_error(&p, &s, errors,
				      "ordinal not in range(256)"))
		goto onError;
	}
	else
            *s++ = (char)ch;
    }
    /* Resize if error handling skipped some characters */
    if (s - start < PyString_GET_SIZE(repr))
	_PyString_Resize(&repr, s - start);
    return repr;

 onError:
    Py_DECREF(repr);
    return NULL;
}

DL_EXPORT(PyObject *)
PyUnicode_AsLatin1String(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
	PyErr_BadArgument();
	return NULL;
    }
    return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
				  PyUnicode_GET_SIZE(unicode),
				  NULL);
}

/* --- 7-bit ASCII Codec -------------------------------------------------- */

static
int ascii_decoding_error(const char **source,
			 Py_UNICODE **dest,
			 const char *errors,
			 const char *details)
{
    if ((errors == NULL) ||
	(strcmp(errors,"strict") == 0)) {
	PyErr_Format(PyExc_UnicodeError,
		     "ASCII decoding error: %.400s",
		     details);
	return -1;
    }
    else if (strcmp(errors,"ignore") == 0) {
	return 0;
    }
    else if (strcmp(errors,"replace") == 0) {
	**dest = Py_UNICODE_REPLACEMENT_CHARACTER;
	(*dest)++;
	return 0;
    }
    else {
	PyErr_Format(PyExc_ValueError,
		     "ASCII decoding error; "
		     "unknown error handling code: %.400s",
		     errors);
	return -1;
    }
}

DL_EXPORT(PyObject *)
PyUnicode_DecodeASCII(const char *s,
		      int size,
		      const char *errors)
{
    PyUnicodeObject *v;
    Py_UNICODE *p;

    /* ASCII is equivalent to the first 128 ordinals in Unicode. */
    if (size == 1 && *(unsigned char*)s < 128) {
	Py_UNICODE r = *(unsigned char*)s;
	return PyUnicode_FromUnicode(&r, 1);
    }

    v = _PyUnicode_New(size);
    if (v == NULL)
	goto onError;
    if (size == 0)
	return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
    while (size-- > 0) {
	register unsigned char c;

	c = (unsigned char)*s++;
	if (c < 128)
	    *p++ = c;
	else if (ascii_decoding_error(&s, &p, errors,
				      "ordinal not in range(128)"))
		goto onError;
    }
    if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
	    goto onError;
    return (PyObject *)v;

 onError:
    Py_XDECREF(v);
    return NULL;
}

static
int ascii_encoding_error(const Py_UNICODE **source,
			 char **dest,
			 const char *errors,
			 const char *details)
{
    if ((errors == NULL) ||
	(strcmp(errors,"strict") == 0)) {
	PyErr_Format(PyExc_UnicodeError,
		     "ASCII encoding error: %.400s",
		     details);
	return -1;
    }
    else if (strcmp(errors,"ignore") == 0) {
	return 0;
    }
    else if (strcmp(errors,"replace") == 0) {
	**dest = '?';
	(*dest)++;
	return 0;
    }
    else {
	PyErr_Format(PyExc_ValueError,
		     "ASCII encoding error; "
		     "unknown error handling code: %.400s",
		     errors);
	return -1;
    }
}

DL_EXPORT(PyObject *)
PyUnicode_EncodeASCII(const Py_UNICODE *p,
		      int size,
		      const char *errors)
{
    PyObject *repr;
    char *s, *start;

    repr = PyString_FromStringAndSize(NULL, size);
    if (repr == NULL)
        return NULL;
    if (size == 0)
	return repr;

    s = PyString_AS_STRING(repr);
    start = s;
    while (size-- > 0) {
        Py_UNICODE ch = *p++;
	if (ch >= 128) {
	    if (ascii_encoding_error(&p, &s, errors,
				      "ordinal not in range(128)"))
		goto onError;
	}
	else
            *s++ = (char)ch;
    }
    /* Resize if error handling skipped some characters */
    if (s - start < PyString_GET_SIZE(repr))
	_PyString_Resize(&repr, s - start);
    return repr;

 onError:
    Py_DECREF(repr);
    return NULL;
}

DL_EXPORT(PyObject *)
PyUnicode_AsASCIIString(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
	PyErr_BadArgument();
	return NULL;
    }
    return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
				 PyUnicode_GET_SIZE(unicode),
				 NULL);
}

#if defined(MS_WIN32) && defined(HAVE_USABLE_WCHAR_T)

/* --- MBCS codecs for Windows -------------------------------------------- */

DL_EXPORT(PyObject *)
PyUnicode_DecodeMBCS(const char *s,
		     int size,
		     const char *errors)
{
    PyUnicodeObject *v;
    Py_UNICODE *p;

    /* First get the size of the result */
    DWORD usize = MultiByteToWideChar(CP_AC

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -