📄 unicodeobject.h
字号:
/* Fast access macros */
#define PyUnicode_GET_SIZE(op) \
(((PyUnicodeObject *)(op))->length)
#define PyUnicode_GET_DATA_SIZE(op) \
(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
#define PyUnicode_AS_UNICODE(op) \
(((PyUnicodeObject *)(op))->str)
#define PyUnicode_AS_DATA(op) \
((const char *)((PyUnicodeObject *)(op))->str)
/* --- Constants ---------------------------------------------------------- */
/* This Unicode character will be used as replacement character during
decoding if the errors argument is set to "replace". Note: the
Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
Unicode 3.0. */
#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
/* === Public API ========================================================= */
/* --- Plain Py_UNICODE --------------------------------------------------- */
/* Create a Unicode Object from the Py_UNICODE buffer u of the given
size.
u may be NULL which causes the contents to be undefined. It is the
user's responsibility to fill in the needed data afterwards. Note
that modifying the Unicode object contents after construction is
only allowed if u was set to NULL.
The buffer is copied into the new object. */
extern DL_IMPORT(PyObject*) PyUnicode_FromUnicode(
const Py_UNICODE *u, /* Unicode buffer */
int size /* size of buffer */
);
/* Return a read-only pointer to the Unicode object's internal
Py_UNICODE buffer. */
extern DL_IMPORT(Py_UNICODE *) PyUnicode_AsUnicode(
PyObject *unicode /* Unicode object */
);
/* Get the length of the Unicode object. */
extern DL_IMPORT(int) PyUnicode_GetSize(
PyObject *unicode /* Unicode object */
);
/* Get the maximum ordinal for a Unicode character. */
extern DL_IMPORT(Py_UNICODE) PyUnicode_GetMax(void);
/* Resize an already allocated Unicode object to the new size length.
*unicode is modified to point to the new (resized) object and 0
returned on success.
This API may only be called by the function which also called the
Unicode constructor. The refcount on the object must be 1. Otherwise,
an error is returned.
Error handling is implemented as follows: an exception is set, -1
is returned and *unicode left untouched.
*/
extern DL_IMPORT(int) PyUnicode_Resize(
PyObject **unicode, /* Pointer to the Unicode object */
int length /* New length */
);
/* Coerce obj to an Unicode object and return a reference with
*incremented* refcount.
Coercion is done in the following way:
1. String and other char buffer compatible objects are decoded
under the assumptions that they contain data using the current
default encoding. Decoding is done in "strict" mode.
2. All other objects (including Unicode objects) raise an
exception.
The API returns NULL in case of an error. The caller is responsible
for decref'ing the returned objects.
*/
extern DL_IMPORT(PyObject*) PyUnicode_FromEncodedObject(
register PyObject *obj, /* Object */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* Coerce obj to an Unicode object and return a reference with
*incremented* refcount.
Unicode objects are passed back as-is (subclasses are converted to
true Unicode objects), all other objects are delegated to
PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
using the default encoding as basis for decoding the object.
The API returns NULL in case of an error. The caller is responsible
for decref'ing the returned objects.
*/
extern DL_IMPORT(PyObject*) PyUnicode_FromObject(
register PyObject *obj /* Object */
);
/* --- wchar_t support for platforms which support it --------------------- */
#ifdef HAVE_WCHAR_H
/* Create a Unicode Object from the whcar_t buffer w of the given
size.
The buffer is copied into the new object. */
extern DL_IMPORT(PyObject*) PyUnicode_FromWideChar(
register const wchar_t *w, /* wchar_t buffer */
int size /* size of buffer */
);
/* Copies the Unicode Object contents into the whcar_t buffer w. At
most size wchar_t characters are copied.
Returns the number of wchar_t characters copied or -1 in case of an
error. */
extern DL_IMPORT(int) PyUnicode_AsWideChar(
PyUnicodeObject *unicode, /* Unicode object */
register wchar_t *w, /* wchar_t buffer */
int size /* size of buffer */
);
#endif
/* --- Unicode ordinals --------------------------------------------------- */
/* Create a Unicode Object from the given Unicode code point ordinal.
The ordinal must be in range(0x10000) on narrow Python builds
(UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
raised in case it is not.
*/
extern DL_IMPORT(PyObject*) PyUnicode_FromOrdinal(int ordinal);
/* === Builtin Codecs =====================================================
Many of these APIs take two arguments encoding and errors. These
parameters encoding and errors have the same semantics as the ones
of the builtin unicode() API.
Setting encoding to NULL causes the default encoding to be used.
Error handling is set by errors which may also be set to NULL
meaning to use the default handling defined for the codec. Default
error handling for all builtin codecs is "strict" (ValueErrors are
raised).
The codecs all use a similar interface. Only deviation from the
generic ones are documented.
*/
/* --- Manage the default encoding ---------------------------------------- */
/* Return a Python string holding the default encoded value of the
Unicode object.
The resulting string is cached in the Unicode object for subsequent
usage by this function. The cached version is needed to implement
the character buffer interface and will live (at least) as long as
the Unicode object itself.
The refcount of the string is *not* incremented.
*** Exported for internal use by the interpreter only !!! ***
*/
extern DL_IMPORT(PyObject *) _PyUnicode_AsDefaultEncodedString(
PyObject *, const char *);
/* Returns the currently active default encoding.
The default encoding is currently implemented as run-time settable
process global. This may change in future versions of the
interpreter to become a parameter which is managed on a per-thread
basis.
*/
extern DL_IMPORT(const char*) PyUnicode_GetDefaultEncoding(void);
/* Sets the currently active default encoding.
Returns 0 on success, -1 in case of an error.
*/
extern DL_IMPORT(int) PyUnicode_SetDefaultEncoding(
const char *encoding /* Encoding name in standard form */
);
/* --- Generic Codecs ----------------------------------------------------- */
/* Create a Unicode object by decoding the encoded string s of the
given size. */
extern DL_IMPORT(PyObject*) PyUnicode_Decode(
const char *s, /* encoded string */
int size, /* size of buffer */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* Encodes a Py_UNICODE buffer of the given size and returns a
Python string object. */
extern DL_IMPORT(PyObject*) PyUnicode_Encode(
const Py_UNICODE *s, /* Unicode char buffer */
int size, /* number of Py_UNICODE chars to encode */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* Encodes a Unicode object and returns the result as Python string
object. */
extern DL_IMPORT(PyObject*) PyUnicode_AsEncodedString(
PyObject *unicode, /* Unicode object */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* --- UTF-7 Codecs ------------------------------------------------------- */
extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF7(
const char *string, /* UTF-7 encoded string */
int length, /* size of string */
const char *errors /* error handling */
);
extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF7(
const Py_UNICODE *data, /* Unicode char buffer */
int length, /* number of Py_UNICODE chars to encode */
int encodeSetO, /* force the encoder to encode characters in
Set O, as described in RFC2152 */
int encodeWhiteSpace, /* force the encoder to encode space, tab,
carriage return and linefeed characters */
const char *errors /* error handling */
);
/* --- UTF-8 Codecs ------------------------------------------------------- */
extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF8(
const char *string, /* UTF-8 encoded string */
int length, /* size of string */
const char *errors /* error handling */
);
extern DL_IMPORT(PyObject*) PyUnicode_AsUTF8String(
PyObject *unicode /* Unicode object */
);
extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8(
const Py_UNICODE *data, /* Unicode char buffer */
int length, /* number of Py_UNICODE chars to encode */
const char *errors /* error handling */
);
/* --- UTF-16 Codecs ------------------------------------------------------ */
/* Decodes length bytes from a UTF-16 encoded buffer string and returns
the corresponding Unicode object.
errors (if non-NULL) defines the error handling. It defaults
to "strict".
If byteorder is non-NULL, the decoder starts decoding using the
given byte order:
*byteorder == -1: little endian
*byteorder == 0: native order
*byteorder == 1: big endian
In native mode, the first two bytes of the stream are checked for a
BOM mark. If found, the BOM mark is analysed, the byte order
adjusted and the BOM skipped. In the other modes, no BOM mark
interpretation is done. After completion, *byteorder is set to the
current byte order at the end of input data.
If byteorder is NULL, the codec starts in native order mode.
*/
extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF16(
const char *string, /* UTF-16 encoded string */
int length, /* size of string */
const char *errors, /* error handling */
int *byteorder /* pointer to byteorder to use
0=native;-1=LE,1=BE; updated on
exit */
);
/* Returns a Python string using the UTF-16 encoding in native byte
order. The string always starts with a BOM mark. */
extern DL_IMPORT(PyObject*) PyUnicode_AsUTF16String(
PyObject *unicode /* Unicode object */
);
/* Returns a Python string object holding the UTF-16 encoded value of
the Unicode data.
If byteorder is not 0, output is written according to the following
byte order:
byteorder == -1: little endian
byteorder == 0: native byte order (writes a BOM mark)
byteorder == 1: big endian
If byteorder is 0, the output string will always start with the
Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
prepended.
Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
UCS-2. This trick makes it possible to add full UTF-16 capabilities
at a later point without compromising the APIs.
*/
extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF16(
const Py_UNICODE *data, /* Unicode char buffer */
int length, /* number of Py_UNICODE chars to encode */
const char *errors, /* error handling */
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
);
/* --- Unicode-Escape Codecs ---------------------------------------------- */
extern DL_IMPORT(PyObject*) PyUnicode_DecodeUnicodeEscape(
const char *string, /* Unicode-Escape encoded string */
int length, /* size of string */
const char *errors /* error handling */
);
extern DL_IMPORT(PyObject*) PyUnicode_AsUnicodeEscapeString(
PyObject *unicode /* Unicode object */
);
extern DL_IMPORT(PyObject*) PyUnicode_EncodeUnicodeEscape(
const Py_UNICODE *data, /* Unicode char buffer */
int length /* Number of Py_UNICODE chars to encode */
);
/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
extern DL_IMPORT(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
const char *string, /* Raw-Unicode-Escape encoded string */
int length, /* size of string */
const char *errors /* error handling */
);
extern DL_IMPORT(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
PyObject *unicode /* Unicode object */
);
extern DL_IMPORT(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
const Py_UNICODE *data, /* Unicode char buffer */
int length /* Number of Py_UNICODE chars to encode */
);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -