📄 stringobject.c
字号:
return string_getbuffer(op);
return ((PyStringObject *)op) -> ob_sval;
}
DL_EXPORT(int)
PyString_AsStringAndSize(register PyObject *obj,
register char **s,
register int *len)
{
if (s == NULL) {
PyErr_BadInternalCall();
return -1;
}
if (!PyString_Check(obj)) {
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(obj)) {
obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
if (obj == NULL)
return -1;
}
else
#endif
{
PyErr_Format(PyExc_TypeError,
"expected string or Unicode object, "
"%.200s found", obj->ob_type->tp_name);
return -1;
}
}
*s = PyString_AS_STRING(obj);
if (len != NULL)
*len = PyString_GET_SIZE(obj);
else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
PyErr_SetString(PyExc_TypeError,
"expected string without null bytes");
return -1;
}
return 0;
}
/* Methods */
static int
string_print(PyStringObject *op, FILE *fp, int flags)
{
int i;
char c;
int quote;
/* XXX Ought to check for interrupts when writing long strings */
if (! PyString_CheckExact(op)) {
int ret;
/* A str subclass may have its own __str__ method. */
op = (PyStringObject *) PyObject_Str((PyObject *)op);
if (op == NULL)
return -1;
ret = string_print(op, fp, flags);
Py_DECREF(op);
return ret;
}
if (flags & Py_PRINT_RAW) {
fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
return 0;
}
/* figure out which quote to use; single is preferred */
quote = '\'';
if (strchr(op->ob_sval, '\'') &&
!strchr(op->ob_sval, '"'))
quote = '"';
fputc(quote, fp);
for (i = 0; i < op->ob_size; i++) {
c = op->ob_sval[i];
if (c == quote || c == '\\')
fprintf(fp, "\\%c", c);
else if (c == '\t')
fprintf(fp, "\\t");
else if (c == '\n')
fprintf(fp, "\\n");
else if (c == '\r')
fprintf(fp, "\\r");
else if (c < ' ' || c >= 0x7f)
fprintf(fp, "\\x%02x", c & 0xff);
else
fputc(c, fp);
}
fputc(quote, fp);
return 0;
}
static PyObject *
string_repr(register PyStringObject *op)
{
size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
PyObject *v;
if (newsize > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"string is too large to make repr");
}
v = PyString_FromStringAndSize((char *)NULL, newsize);
if (v == NULL) {
return NULL;
}
else {
register int i;
register char c;
register char *p;
int quote;
/* figure out which quote to use; single is preferred */
quote = '\'';
if (strchr(op->ob_sval, '\'') &&
!strchr(op->ob_sval, '"'))
quote = '"';
p = PyString_AS_STRING(v);
*p++ = quote;
for (i = 0; i < op->ob_size; i++) {
/* There's at least enough room for a hex escape
and a closing quote. */
assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
c = op->ob_sval[i];
if (c == quote || c == '\\')
*p++ = '\\', *p++ = c;
else if (c == '\t')
*p++ = '\\', *p++ = 't';
else if (c == '\n')
*p++ = '\\', *p++ = 'n';
else if (c == '\r')
*p++ = '\\', *p++ = 'r';
else if (c < ' ' || c >= 0x7f) {
/* For performance, we don't want to call
PyOS_snprintf here (extra layers of
function call). */
sprintf(p, "\\x%02x", c & 0xff);
p += 4;
}
else
*p++ = c;
}
assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
*p++ = quote;
*p = '\0';
_PyString_Resize(
&v, (int) (p - PyString_AS_STRING(v)));
return v;
}
}
static PyObject *
string_str(PyObject *s)
{
assert(PyString_Check(s));
if (PyString_CheckExact(s)) {
Py_INCREF(s);
return s;
}
else {
/* Subtype -- return genuine string with the same value. */
PyStringObject *t = (PyStringObject *) s;
return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
}
}
static int
string_length(PyStringObject *a)
{
return a->ob_size;
}
static PyObject *
string_concat(register PyStringObject *a, register PyObject *bb)
{
register unsigned int size;
register PyStringObject *op;
if (!PyString_Check(bb)) {
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(bb))
return PyUnicode_Concat((PyObject *)a, bb);
#endif
PyErr_Format(PyExc_TypeError,
"cannot concatenate 'str' and '%.200s' objects",
bb->ob_type->tp_name);
return NULL;
}
#define b ((PyStringObject *)bb)
/* Optimize cases with empty left or right operand */
if ((a->ob_size == 0 || b->ob_size == 0) &&
PyString_CheckExact(a) && PyString_CheckExact(b)) {
if (a->ob_size == 0) {
Py_INCREF(bb);
return bb;
}
Py_INCREF(a);
return (PyObject *)a;
}
size = a->ob_size + b->ob_size;
/* PyObject_NewVar is inlined */
op = (PyStringObject *)
PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
if (op == NULL)
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
#ifdef CACHE_HASH
op->ob_shash = -1;
#endif
#ifdef INTERN_STRINGS
op->ob_sinterned = NULL;
#endif
memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
op->ob_sval[size] = '\0';
return (PyObject *) op;
#undef b
}
static PyObject *
string_repeat(register PyStringObject *a, register int n)
{
register int i;
register int size;
register PyStringObject *op;
size_t nbytes;
if (n < 0)
n = 0;
/* watch out for overflows: the size can overflow int,
* and the # of bytes needed can overflow size_t
*/
size = a->ob_size * n;
if (n && size / n != a->ob_size) {
PyErr_SetString(PyExc_OverflowError,
"repeated string is too long");
return NULL;
}
if (size == a->ob_size && PyString_CheckExact(a)) {
Py_INCREF(a);
return (PyObject *)a;
}
nbytes = size * sizeof(char);
if (nbytes / sizeof(char) != (size_t)size ||
nbytes + sizeof(PyStringObject) <= nbytes) {
PyErr_SetString(PyExc_OverflowError,
"repeated string is too long");
return NULL;
}
op = (PyStringObject *)
PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
if (op == NULL)
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
#ifdef CACHE_HASH
op->ob_shash = -1;
#endif
#ifdef INTERN_STRINGS
op->ob_sinterned = NULL;
#endif
for (i = 0; i < size; i += a->ob_size)
memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
op->ob_sval[size] = '\0';
return (PyObject *) op;
}
/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
static PyObject *
string_slice(register PyStringObject *a, register int i, register int j)
/* j -- may be negative! */
{
if (i < 0)
i = 0;
if (j < 0)
j = 0; /* Avoid signed/unsigned bug in next line */
if (j > a->ob_size)
j = a->ob_size;
if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
/* It's the same as a */
Py_INCREF(a);
return (PyObject *)a;
}
if (j < i)
j = i;
return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
}
static int
string_contains(PyObject *a, PyObject *el)
{
register char *s, *end;
register char c;
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(el))
return PyUnicode_Contains(a, el);
#endif
if (!PyString_Check(el) || PyString_Size(el) != 1) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires character as left operand");
return -1;
}
c = PyString_AsString(el)[0];
s = PyString_AsString(a);
end = s + PyString_Size(a);
while (s < end) {
if (c == *s++)
return 1;
}
return 0;
}
static PyObject *
string_item(PyStringObject *a, register int i)
{
PyObject *v;
char *pchar;
if (i < 0 || i >= a->ob_size) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return NULL;
}
pchar = a->ob_sval + i;
v = (PyObject *)characters[*pchar & UCHAR_MAX];
if (v == NULL)
v = PyString_FromStringAndSize(pchar, 1);
else {
#ifdef COUNT_ALLOCS
one_strings++;
#endif
Py_INCREF(v);
}
return v;
}
static PyObject*
string_richcompare(PyStringObject *a, PyStringObject *b, int op)
{
int c;
int len_a, len_b;
int min_len;
PyObject *result;
/* Make sure both arguments are strings. */
if (!(PyString_Check(a) && PyString_Check(b))) {
result = Py_NotImplemented;
goto out;
}
if (a == b) {
switch (op) {
case Py_EQ:case Py_LE:case Py_GE:
result = Py_True;
goto out;
case Py_NE:case Py_LT:case Py_GT:
result = Py_False;
goto out;
}
}
if (op == Py_EQ) {
/* Supporting Py_NE here as well does not save
much time, since Py_NE is rarely used. */
if (a->ob_size == b->ob_size
&& (a->ob_sval[0] == b->ob_sval[0]
&& memcmp(a->ob_sval, b->ob_sval,
a->ob_size) == 0)) {
result = Py_True;
} else {
result = Py_False;
}
goto out;
}
len_a = a->ob_size; len_b = b->ob_size;
min_len = (len_a < len_b) ? len_a : len_b;
if (min_len > 0) {
c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
if (c==0)
c = memcmp(a->ob_sval, b->ob_sval, min_len);
}else
c = 0;
if (c == 0)
c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
switch (op) {
case Py_LT: c = c < 0; break;
case Py_LE: c = c <= 0; break;
case Py_EQ: assert(0); break; /* unreachable */
case Py_NE: c = c != 0; break;
case Py_GT: c = c > 0; break;
case Py_GE: c = c >= 0; break;
default:
result = Py_NotImplemented;
goto out;
}
result = c ? Py_True : Py_False;
out:
Py_INCREF(result);
return result;
}
DL_EXPORT(int)
_PyString_Eq(PyObject *o1, PyObject *o2)
{
PyStringObject *a, *b;
a = (PyStringObject*)o1;
b = (PyStringObject*)o2;
return a->ob_size == b->ob_size
&& *a->ob_sval == *b->ob_sval
&& memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
}
static long
string_hash(PyStringObject *a)
{
register int len;
register unsigned char *p;
register long x;
#ifdef CACHE_HASH
if (a->ob_shash != -1)
return a->ob_shash;
#ifdef INTERN_STRINGS
if (a->ob_sinterned != NULL)
return (a->ob_shash =
((PyStringObject *)(a->ob_sinterned))->ob_shash);
#endif
#endif
len = a->ob_size;
p = (unsigned char *) a->ob_sval;
x = *p << 7;
while (--len >= 0)
x = (1000003*x) ^ *p++;
x ^= a->ob_size;
if (x == -1)
x = -2;
#ifdef CACHE_HASH
a->ob_shash = x;
#endif
return x;
}
static int
string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
{
if ( index != 0 ) {
PyErr_SetString(PyExc_SystemError,
"accessing non-existent string segment");
return -1;
}
*ptr = (void *)self->ob_sval;
return self->ob_size;
}
static int
string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
{
PyErr_SetString(PyExc_TypeError,
"Cannot use string as modifiable buffer");
return -1;
}
static int
string_buffer_getsegcount(PyStringObject *self, int *lenp)
{
if ( lenp )
*lenp = self->ob_size;
return 1;
}
static int
string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
{
if ( index != 0 ) {
PyErr_SetString(PyExc_SystemError,
"accessing non-existent string segment");
return -1;
}
*ptr = self->ob_sval;
return self->ob_size;
}
const static PySequenceMethods string_as_sequence = {
(inquiry)string_length, /*sq_length*/
(binaryfunc)string_concat, /*sq_concat*/
(intargfunc)string_repeat, /*sq_repeat*/
(intargfunc)string_item, /*sq_item*/
(intintargfunc)string_slice, /*sq_slice*/
0, /*sq_ass_item*/
0, /*sq_ass_slice*/
(objobjproc)string_contains /*sq_contains*/
};
const static PyBufferProcs string_as_buffer = {
(getreadbufferproc)string_buffer_getreadbuf,
(getwritebufferproc)string_buffer_getwritebuf,
(getsegcountproc)string_buffer_getsegcount,
(getcharbufferproc)string_buffer_getcharbuf,
};
#define LEFTSTRIP 0
#define RIGHTSTRIP 1
#define BOTHSTRIP 2
/* Arrays indexed by above */
static const char *const stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
static PyObject *
split_whitespace(const char *s, int len, int maxsplit)
{
int i, j, err;
PyObject* item;
PyObject *list = PyList_New(0);
if (list == NULL)
return NULL;
for (i = j = 0; i < len; ) {
while (i < len && isspace(Py_CHARMASK(s[i])))
i++;
j = i;
while (i < len && !isspace(Py_CHARMASK(s[i])))
i++;
if (j < i) {
if (maxsplit-- <= 0)
break;
item = PyString_FromStringAndSize(s+j, (int)(i-j));
if (item == NULL)
goto finally;
err = PyList_Append(list, item);
Py_DECREF(item);
if (err < 0)
goto finally;
while (i < len && isspace(Py_CHARMASK(s[i])))
i++;
j = i;
}
}
if (j < len) {
item = PyString_FromStringAndSize(s+j, (int)(len - j));
if (item == NULL)
goto finally;
err = PyList_Append(list, item);
Py_DECREF(item);
if (err < 0)
goto finally;
}
return list;
finally:
Py_DECREF(list);
return NULL;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -