📄 stringobject.c
字号:
/* Portions Copyright (c) 2005-2007 Nokia Corporation */
/* String object implementation */
#include "Python.h"
#include <ctype.h>
#ifdef COUNT_ALLOCS
int null_strings, one_strings;
#endif
#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
#define UCHAR_MAX 255
#endif
#ifndef SYMBIAN
static PyStringObject *characters[UCHAR_MAX + 1];
#ifndef DONT_SHARE_SHORT_STRINGS
static PyStringObject *nullstring;
#endif
#else
#define characters ((PyStringObject**)(PYTHON_GLOBALS->characters))
#define nullstring ((PyStringObject*)(PYTHON_GLOBALS->nullstring))
#endif /* SYMBIAN */
/*
PyString_FromStringAndSize() and PyString_FromString() try in certain cases
to share string objects. When the size of the string is zero, these
routines always return a pointer to the same string object; when the size
is one, they return a pointer to an already existing object if the contents
of the string is known. For PyString_FromString() this is always the case,
for PyString_FromStringAndSize() this is the case when the first argument
in not NULL.
A common practice of allocating a string and then filling it in or changing
it must be done carefully. It is only allowed to change the contents of
the string if the object was gotten from PyString_FromStringAndSize() with
a NULL first argument, because in the future these routines may try to do
even more sharing of objects.
The string in the `str' parameter does not have to be null-character
terminated. (Therefore it is safe to construct a substring by using
`PyString_FromStringAndSize(origstring, substrlen)'.)
The parameter `size' denotes number of characters to allocate, not
counting the null terminating character. If the `str' argument is
not NULL, then it points to a of length `size'. For
PyString_FromString, this string must be null-terminated.
The member `op->ob_size' denotes the number of bytes of data in the string,
not counting the null terminating character, and is therefore equal to the
`size' parameter.
*/
DL_EXPORT(PyObject *)
PyString_FromStringAndSize(const char *str, int size)
{
register PyStringObject *op;
#ifndef DONT_SHARE_SHORT_STRINGS
if (size == 0 && (op = nullstring) != NULL) {
#ifdef COUNT_ALLOCS
null_strings++;
#endif
Py_INCREF(op);
return (PyObject *)op;
}
if (size == 1 && str != NULL &&
(op = characters[*str & UCHAR_MAX]) != NULL)
{
#ifdef COUNT_ALLOCS
one_strings++;
#endif
Py_INCREF(op);
return (PyObject *)op;
}
#endif /* DONT_SHARE_SHORT_STRINGS */
/* PyObject_NewVar is inlined */
op = (PyStringObject *)
PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
if (op == NULL)
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
#ifdef CACHE_HASH
op->ob_shash = -1;
#endif
#ifdef INTERN_STRINGS
op->ob_sinterned = NULL;
#endif
if (str != NULL)
memcpy(op->ob_sval, str, size);
op->ob_sval[size] = '\0';
#ifndef DONT_SHARE_SHORT_STRINGS
if (size == 0) {
PyObject *t = (PyObject *)op;
PyString_InternInPlace(&t);
op = (PyStringObject *)t;
nullstring = op;
Py_INCREF(op);
} else if (size == 1 && str != NULL) {
PyObject *t = (PyObject *)op;
PyString_InternInPlace(&t);
op = (PyStringObject *)t;
characters[*str & UCHAR_MAX] = op;
Py_INCREF(op);
}
#endif
return (PyObject *) op;
}
DL_EXPORT(PyObject *)
PyString_FromString(const char *str)
{
register size_t size;
register PyStringObject *op;
assert(str != NULL);
size = strlen(str);
if (size > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"string is too long for a Python string");
return NULL;
}
#ifndef DONT_SHARE_SHORT_STRINGS
if (size == 0 && (op = nullstring) != NULL) {
#ifdef COUNT_ALLOCS
null_strings++;
#endif
Py_INCREF(op);
return (PyObject *)op;
}
if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
#ifdef COUNT_ALLOCS
one_strings++;
#endif
Py_INCREF(op);
return (PyObject *)op;
}
#endif /* DONT_SHARE_SHORT_STRINGS */
/* PyObject_NewVar is inlined */
op = (PyStringObject *)
PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
if (op == NULL)
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
#ifdef CACHE_HASH
op->ob_shash = -1;
#endif
#ifdef INTERN_STRINGS
op->ob_sinterned = NULL;
#endif
memcpy(op->ob_sval, str, size+1);
#ifndef DONT_SHARE_SHORT_STRINGS
if (size == 0) {
PyObject *t = (PyObject *)op;
PyString_InternInPlace(&t);
op = (PyStringObject *)t;
nullstring = op;
Py_INCREF(op);
} else if (size == 1) {
PyObject *t = (PyObject *)op;
PyString_InternInPlace(&t);
op = (PyStringObject *)t;
characters[*str & UCHAR_MAX] = op;
Py_INCREF(op);
}
#endif
return (PyObject *) op;
}
DL_EXPORT(PyObject *)
PyString_FromFormatV(const char *format, va_list vargs)
{
va_list count;
int n = 0;
const char* f;
char *s;
PyObject* string;
#ifdef VA_LIST_IS_ARRAY
memcpy(count, vargs, sizeof(va_list));
#else
#ifdef __va_copy
__va_copy(count, vargs);
#else
count = vargs;
#endif
#endif
/* step 1: figure out how large a buffer we need */
for (f = format; *f; f++) {
if (*f == '%') {
const char* p = f;
while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
;
/* skip the 'l' in %ld, since it doesn't change the
width. although only %d is supported (see
"expand" section below), others can be easily
added */
if (*f == 'l' && *(f+1) == 'd')
++f;
switch (*f) {
case 'c':
(void)va_arg(count, int);
/* fall through... */
case '%':
n++;
break;
case 'd': case 'i': case 'x':
(void) va_arg(count, int);
/* 20 bytes is enough to hold a 64-bit
integer. Decimal takes the most space.
This isn't enough for octal. */
n += 20;
break;
case 's':
s = va_arg(count, char*);
n += strlen(s);
break;
case 'p':
(void) va_arg(count, int);
/* maximum 64-bit pointer representation:
* 0xffffffffffffffff
* so 19 characters is enough.
* XXX I count 18 -- what's the extra for?
*/
n += 19;
break;
default:
/* if we stumble upon an unknown
formatting code, copy the rest of
the format string to the output
string. (we cannot just skip the
code, since there's no way to know
what's in the argument list) */
n += strlen(p);
goto expand;
}
} else
n++;
}
expand:
/* step 2: fill the buffer */
/* Since we've analyzed how much space we need for the worst case,
use sprintf directly instead of the slower PyOS_snprintf. */
string = PyString_FromStringAndSize(NULL, n);
if (!string)
return NULL;
s = PyString_AsString(string);
for (f = format; *f; f++) {
if (*f == '%') {
const char* p = f++;
int i, longflag = 0;
/* parse the width.precision part (we're only
interested in the precision value, if any) */
n = 0;
while (isdigit(Py_CHARMASK(*f)))
n = (n*10) + *f++ - '0';
if (*f == '.') {
f++;
n = 0;
while (isdigit(Py_CHARMASK(*f)))
n = (n*10) + *f++ - '0';
}
while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
f++;
/* handle the long flag, but only for %ld. others
can be added when necessary. */
if (*f == 'l' && *(f+1) == 'd') {
longflag = 1;
++f;
}
switch (*f) {
case 'c':
*s++ = va_arg(vargs, int);
break;
case 'd':
if (longflag)
sprintf(s, "%ld", va_arg(vargs, long));
else
sprintf(s, "%d", va_arg(vargs, int));
s += strlen(s);
break;
case 'i':
sprintf(s, "%i", va_arg(vargs, int));
s += strlen(s);
break;
case 'x':
sprintf(s, "%x", va_arg(vargs, int));
s += strlen(s);
break;
case 's':
p = va_arg(vargs, char*);
i = strlen(p);
if (n > 0 && i > n)
i = n;
memcpy(s, p, i);
s += i;
break;
case 'p':
sprintf(s, "%p", va_arg(vargs, void*));
/* %p is ill-defined: ensure leading 0x. */
if (s[1] == 'X')
s[1] = 'x';
else if (s[1] != 'x') {
memmove(s+2, s, strlen(s)+1);
s[0] = '0';
s[1] = 'x';
}
s += strlen(s);
break;
case '%':
*s++ = '%';
break;
default:
strcpy(s, p);
s += strlen(s);
goto end;
}
} else
*s++ = *f;
}
end:
_PyString_Resize(&string, s - PyString_AS_STRING(string));
return string;
}
DL_EXPORT(PyObject *)
PyString_FromFormat(const char *format, ...)
{
PyObject* ret;
va_list vargs;
#ifdef HAVE_STDARG_PROTOTYPES
va_start(vargs, format);
#else
va_start(vargs);
#endif
ret = PyString_FromFormatV(format, vargs);
va_end(vargs);
return ret;
}
DL_EXPORT(PyObject *)
PyString_Decode(const char *s,
int size,
const char *encoding,
const char *errors)
{
PyObject *v, *str;
str = PyString_FromStringAndSize(s, size);
if (str == NULL)
return NULL;
v = PyString_AsDecodedString(str, encoding, errors);
Py_DECREF(str);
return v;
}
DL_EXPORT(PyObject *)
PyString_AsDecodedObject(PyObject *str,
const char *encoding,
const char *errors)
{
PyObject *v;
if (!PyString_Check(str)) {
PyErr_BadArgument();
goto onError;
}
if (encoding == NULL) {
#ifdef Py_USING_UNICODE
encoding = PyUnicode_GetDefaultEncoding();
#else
PyErr_SetString(PyExc_ValueError, "no encoding specified");
goto onError;
#endif
}
/* Decode via the codec registry */
v = PyCodec_Decode(str, encoding, errors);
if (v == NULL)
goto onError;
return v;
onError:
return NULL;
}
DL_EXPORT(PyObject *)
PyString_AsDecodedString(PyObject *str,
const char *encoding,
const char *errors)
{
PyObject *v;
v = PyString_AsDecodedObject(str, encoding, errors);
if (v == NULL)
goto onError;
#ifdef Py_USING_UNICODE
/* Convert Unicode to a string using the default encoding */
if (PyUnicode_Check(v)) {
PyObject *temp = v;
v = PyUnicode_AsEncodedString(v, NULL, NULL);
Py_DECREF(temp);
if (v == NULL)
goto onError;
}
#endif
if (!PyString_Check(v)) {
PyErr_Format(PyExc_TypeError,
"decoder did not return a string object (type=%.400s)",
v->ob_type->tp_name);
Py_DECREF(v);
goto onError;
}
return v;
onError:
return NULL;
}
DL_EXPORT(PyObject *)
PyString_Encode(const char *s,
int size,
const char *encoding,
const char *errors)
{
PyObject *v, *str;
str = PyString_FromStringAndSize(s, size);
if (str == NULL)
return NULL;
v = PyString_AsEncodedString(str, encoding, errors);
Py_DECREF(str);
return v;
}
DL_EXPORT(PyObject *)
PyString_AsEncodedObject(PyObject *str,
const char *encoding,
const char *errors)
{
PyObject *v;
if (!PyString_Check(str)) {
PyErr_BadArgument();
goto onError;
}
if (encoding == NULL) {
#ifdef Py_USING_UNICODE
encoding = PyUnicode_GetDefaultEncoding();
#else
PyErr_SetString(PyExc_ValueError, "no encoding specified");
goto onError;
#endif
}
/* Encode via the codec registry */
v = PyCodec_Encode(str, encoding, errors);
if (v == NULL)
goto onError;
return v;
onError:
return NULL;
}
DL_EXPORT(PyObject *)
PyString_AsEncodedString(PyObject *str,
const char *encoding,
const char *errors)
{
PyObject *v;
v = PyString_AsEncodedObject(str, encoding, errors);
if (v == NULL)
goto onError;
#ifdef Py_USING_UNICODE
/* Convert Unicode to a string using the default encoding */
if (PyUnicode_Check(v)) {
PyObject *temp = v;
v = PyUnicode_AsEncodedString(v, NULL, NULL);
Py_DECREF(temp);
if (v == NULL)
goto onError;
}
#endif
if (!PyString_Check(v)) {
PyErr_Format(PyExc_TypeError,
"encoder did not return a string object (type=%.400s)",
v->ob_type->tp_name);
Py_DECREF(v);
goto onError;
}
return v;
onError:
return NULL;
}
static void
string_dealloc(PyObject *op)
{
op->ob_type->tp_free(op);
}
static int
string_getsize(register PyObject *op)
{
char *s;
int len;
if (PyString_AsStringAndSize(op, &s, &len))
return -1;
return len;
}
static /*const*/ char *
string_getbuffer(register PyObject *op)
{
char *s;
int len;
if (PyString_AsStringAndSize(op, &s, &len))
return NULL;
return s;
}
DL_EXPORT(int)
PyString_Size(register PyObject *op)
{
if (!PyString_Check(op))
return string_getsize(op);
return ((PyStringObject *)op) -> ob_size;
}
DL_EXPORT(/*const*/ char *)
PyString_AsString(register PyObject *op)
{
if (!PyString_Check(op))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -