📄 _sre.c
字号:
static PyObject*
join_list(PyObject* list, PyObject* pattern)
{
/* join list elements */
PyObject* joiner;
#if PY_VERSION_HEX >= 0x01060000
PyObject* function;
PyObject* args;
#endif
PyObject* result;
switch (PyList_GET_SIZE(list)) {
case 0:
Py_DECREF(list);
return PyString_FromString("");
case 1:
result = PyList_GET_ITEM(list, 0);
Py_INCREF(result);
Py_DECREF(list);
return result;
}
/* two or more elements: slice out a suitable separator from the
first member, and use that to join the entire list */
joiner = PySequence_GetSlice(pattern, 0, 0);
if (!joiner)
return NULL;
#if PY_VERSION_HEX >= 0x01060000
function = PyObject_GetAttrString(joiner, "join");
if (!function) {
Py_DECREF(joiner);
return NULL;
}
args = PyTuple_New(1);
if (!args) {
Py_DECREF(function);
Py_DECREF(joiner);
return NULL;
}
PyTuple_SET_ITEM(args, 0, list);
result = PyObject_CallObject(function, args);
Py_DECREF(args); /* also removes list */
Py_DECREF(function);
#else
result = call(
"string", "join",
Py_BuildValue("OO", list, joiner)
);
#endif
Py_DECREF(joiner);
return result;
}
static PyObject*
pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE state;
PyObject* list;
int status;
int i, b, e;
PyObject* string;
int start = 0;
int end = INT_MAX;
static const char *const kwlist[] = { "source", "pos", "endpos", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
&string, &start, &end))
return NULL;
string = state_init(&state, self, string, start, end);
if (!string)
return NULL;
list = PyList_New(0);
if (!list) {
state_fini(&state);
return NULL;
}
while (state.start <= state.end) {
PyObject* item;
state_reset(&state);
state.ptr = state.start;
if (state.charsize == 1) {
status = sre_search(&state, PatternObject_GetCode(self));
} else {
#if defined(HAVE_UNICODE)
status = sre_usearch(&state, PatternObject_GetCode(self));
#endif
}
if (status <= 0) {
if (status == 0)
break;
pattern_error(status);
goto error;
}
/* don't bother to build a match object */
switch (self->groups) {
case 0:
b = STATE_OFFSET(&state, state.start);
e = STATE_OFFSET(&state, state.ptr);
item = PySequence_GetSlice(string, b, e);
if (!item)
goto error;
break;
case 1:
item = state_getslice(&state, 1, string, 1);
if (!item)
goto error;
break;
default:
item = PyTuple_New(self->groups);
if (!item)
goto error;
for (i = 0; i < self->groups; i++) {
PyObject* o = state_getslice(&state, i+1, string, 1);
if (!o) {
Py_DECREF(item);
goto error;
}
PyTuple_SET_ITEM(item, i, o);
}
break;
}
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
if (state.ptr == state.start)
state.start = (void*) ((char*) state.ptr + state.charsize);
else
state.start = state.ptr;
}
state_fini(&state);
return list;
error:
Py_DECREF(list);
state_fini(&state);
return NULL;
}
#if PY_VERSION_HEX >= 0x02020000
static PyObject*
pattern_finditer(PatternObject* pattern, PyObject* args)
{
PyObject* scanner;
PyObject* search;
PyObject* iterator;
scanner = pattern_scanner(pattern, args);
if (!scanner)
return NULL;
search = PyObject_GetAttrString(scanner, "search");
Py_DECREF(scanner);
if (!search)
return NULL;
iterator = PyCallIter_New(search, Py_None);
Py_DECREF(search);
return iterator;
}
#endif
static PyObject*
pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE state;
PyObject* list;
PyObject* item;
int status;
int n;
int i;
void* last;
PyObject* string;
int maxsplit = 0;
static const char *const kwlist[] = { "source", "maxsplit", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "O|i:split", kwlist,
&string, &maxsplit))
return NULL;
string = state_init(&state, self, string, 0, INT_MAX);
if (!string)
return NULL;
list = PyList_New(0);
if (!list) {
state_fini(&state);
return NULL;
}
n = 0;
last = state.start;
while (!maxsplit || n < maxsplit) {
state_reset(&state);
state.ptr = state.start;
if (state.charsize == 1) {
status = sre_search(&state, PatternObject_GetCode(self));
} else {
#if defined(HAVE_UNICODE)
status = sre_usearch(&state, PatternObject_GetCode(self));
#endif
}
if (status <= 0) {
if (status == 0)
break;
pattern_error(status);
goto error;
}
if (state.start == state.ptr) {
if (last == state.end)
break;
/* skip one character */
state.start = (void*) ((char*) state.ptr + state.charsize);
continue;
}
/* get segment before this match */
item = PySequence_GetSlice(
string, STATE_OFFSET(&state, last),
STATE_OFFSET(&state, state.start)
);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
/* add groups (if any) */
for (i = 0; i < self->groups; i++) {
item = state_getslice(&state, i+1, string, 0);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
}
n = n + 1;
last = state.start = state.ptr;
}
/* get segment following last match (even if empty) */
item = PySequence_GetSlice(
string, STATE_OFFSET(&state, last), state.endpos
);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
state_fini(&state);
return list;
error:
Py_DECREF(list);
state_fini(&state);
return NULL;
}
static PyObject*
pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
int count, int subn)
{
SRE_STATE state;
PyObject* list;
PyObject* item;
PyObject* filter;
PyObject* args;
PyObject* match;
void* ptr;
int status;
int n;
int i, b, e;
int filter_is_callable;
if (PyCallable_Check(template)) {
/* sub/subn takes either a function or a template */
filter = template;
Py_INCREF(filter);
filter_is_callable = 1;
} else {
/* if not callable, check if it's a literal string */
int literal;
ptr = getstring(template, &n, &b);
if (ptr) {
if (b == 1) {
literal = sre_literal_template(ptr, n);
} else {
#if defined(HAVE_UNICODE)
literal = sre_uliteral_template(ptr, n);
#endif
}
} else {
PyErr_Clear();
literal = 0;
}
if (literal) {
filter = template;
Py_INCREF(filter);
filter_is_callable = 0;
} else {
/* not a literal; hand it over to the template compiler */
filter = call(
SRE_MODULE, "_subx",
Py_BuildValue("OO", self, template)
);
if (!filter)
return NULL;
filter_is_callable = PyCallable_Check(filter);
}
}
string = state_init(&state, self, string, 0, INT_MAX);
if (!string) {
Py_DECREF(filter);
return NULL;
}
list = PyList_New(0);
if (!list) {
Py_DECREF(filter);
state_fini(&state);
return NULL;
}
n = i = 0;
while (!count || n < count) {
state_reset(&state);
state.ptr = state.start;
if (state.charsize == 1) {
status = sre_search(&state, PatternObject_GetCode(self));
} else {
#if defined(HAVE_UNICODE)
status = sre_usearch(&state, PatternObject_GetCode(self));
#endif
}
if (status <= 0) {
if (status == 0)
break;
pattern_error(status);
goto error;
}
b = STATE_OFFSET(&state, state.start);
e = STATE_OFFSET(&state, state.ptr);
if (i < b) {
/* get segment before this match */
item = PySequence_GetSlice(string, i, b);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
} else if (i == b && i == e && n > 0)
/* ignore empty match on latest position */
goto next;
if (filter_is_callable) {
/* pass match object through filter */
match = pattern_new_match(self, &state, 1);
if (!match)
goto error;
args = Py_BuildValue("(O)", match);
if (!args) {
Py_DECREF(match);
goto error;
}
item = PyObject_CallObject(filter, args);
Py_DECREF(args);
Py_DECREF(match);
if (!item)
goto error;
} else {
/* filter is literal string */
item = filter;
Py_INCREF(item);
}
/* add to list */
if (item != Py_None) {
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
}
i = e;
n = n + 1;
next:
/* move on */
if (state.ptr == state.start)
state.start = (void*) ((char*) state.ptr + state.charsize);
else
state.start = state.ptr;
}
/* get segment following last match */
if (i < state.endpos) {
item = PySequence_GetSlice(string, i, state.endpos);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -