📄 _sre.c
字号:
self->flags = flags;
self->groups = groups;
Py_XINCREF(groupindex);
self->groupindex = groupindex;
Py_XINCREF(indexgroup);
self->indexgroup = indexgroup;
return (PyObject*) self;
}
static PyObject *
sre_codesize(PyObject* self, PyObject* args)
{
return Py_BuildValue("i", sizeof(SRE_CODE));
}
static PyObject *
sre_getlower(PyObject* self, PyObject* args)
{
int character, flags;
if (!PyArg_ParseTuple(args, "ii", &character, &flags))
return NULL;
if (flags & SRE_FLAG_LOCALE)
return Py_BuildValue("i", sre_lower_locale(character));
if (flags & SRE_FLAG_UNICODE)
#if defined(HAVE_UNICODE)
return Py_BuildValue("i", sre_lower_unicode(character));
#else
return Py_BuildValue("i", sre_lower_locale(character));
#endif
return Py_BuildValue("i", sre_lower(character));
}
LOCAL(void)
state_reset(SRE_STATE* state)
{
int i;
state->lastmark = 0;
/* FIXME: dynamic! */
for (i = 0; i < SRE_MARK_SIZE; i++)
state->mark[i] = NULL;
state->lastindex = -1;
state->repeat = NULL;
mark_fini(state);
}
static void*
getstring(PyObject* string, int* p_length, int* p_charsize)
{
/* given a python object, return a data pointer, a length (in
characters), and a character size. return NULL if the object
is not a string (or not compatible) */
PyBufferProcs *buffer;
int size, bytes, charsize;
void* ptr;
#if defined(HAVE_UNICODE)
if (PyUnicode_Check(string)) {
/* unicode strings doesn't always support the buffer interface */
ptr = (void*) PyUnicode_AS_DATA(string);
bytes = PyUnicode_GET_DATA_SIZE(string);
size = PyUnicode_GET_SIZE(string);
charsize = sizeof(Py_UNICODE);
} else {
#endif
/* get pointer to string buffer */
// XXX:CW32
buffer = (PyBufferProcs *)string->ob_type->tp_as_buffer;
if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
buffer->bf_getsegcount(string, NULL) != 1) {
PyErr_SetString(PyExc_TypeError, "expected string or buffer");
return NULL;
}
/* determine buffer size */
bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
if (bytes < 0) {
PyErr_SetString(PyExc_TypeError, "buffer has negative size");
return NULL;
}
/* determine character size */
#if PY_VERSION_HEX >= 0x01060000
size = PyObject_Size(string);
#else
size = PyObject_Length(string);
#endif
if (PyString_Check(string) || bytes == size)
charsize = 1;
#if defined(HAVE_UNICODE)
else if (bytes == (int) (size * sizeof(Py_UNICODE)))
charsize = sizeof(Py_UNICODE);
#endif
else {
PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
return NULL;
}
#if defined(HAVE_UNICODE)
}
#endif
*p_length = size;
*p_charsize = charsize;
return ptr;
}
LOCAL(PyObject*)
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
int start, int end)
{
/* prepare state object */
int length;
int charsize;
void* ptr;
memset(state, 0, sizeof(SRE_STATE));
state->lastindex = -1;
ptr = getstring(string, &length, &charsize);
if (!ptr)
return NULL;
/* adjust boundaries */
if (start < 0)
start = 0;
else if (start > length)
start = length;
if (end < 0)
end = 0;
else if (end > length)
end = length;
state->charsize = charsize;
state->beginning = ptr;
state->start = (void*) ((char*) ptr + start * state->charsize);
state->end = (void*) ((char*) ptr + end * state->charsize);
Py_INCREF(string);
state->string = string;
state->pos = start;
state->endpos = end;
if (pattern->flags & SRE_FLAG_LOCALE)
state->lower = sre_lower_locale;
else if (pattern->flags & SRE_FLAG_UNICODE)
#if defined(HAVE_UNICODE)
state->lower = sre_lower_unicode;
#else
state->lower = sre_lower_locale;
#endif
else
state->lower = sre_lower;
return string;
}
LOCAL(void)
state_fini(SRE_STATE* state)
{
Py_XDECREF(state->string);
mark_fini(state);
}
/* calculate offset from start of string */
#define STATE_OFFSET(state, member)\
(((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
LOCAL(PyObject*)
state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
{
int i, j;
index = (index - 1) * 2;
if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
if (empty)
/* want empty string */
i = j = 0;
else {
Py_INCREF(Py_None);
return Py_None;
}
} else {
i = STATE_OFFSET(state, state->mark[index]);
j = STATE_OFFSET(state, state->mark[index+1]);
}
return PySequence_GetSlice(string, i, j);
}
static void
pattern_error(int status)
{
switch (status) {
case SRE_ERROR_RECURSION_LIMIT:
PyErr_SetString(
PyExc_RuntimeError,
"maximum recursion limit exceeded"
);
break;
case SRE_ERROR_MEMORY:
PyErr_NoMemory();
break;
default:
/* other error codes indicate compiler/engine bugs */
PyErr_SetString(
PyExc_RuntimeError,
"internal error in regular expression engine"
);
}
}
static PyObject*
pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
{
/* create match object (from state object) */
MatchObject* match;
int i, j;
char* base;
int n;
if (status > 0) {
/* create match object (with room for extra group marks) */
match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2*(pattern->groups+1));
if (!match)
return NULL;
Py_INCREF(pattern);
match->pattern = pattern;
Py_INCREF(state->string);
match->string = state->string;
match->regs = NULL;
match->groups = pattern->groups+1;
/* fill in group slices */
base = (char*) state->beginning;
n = state->charsize;
match->mark[0] = ((char*) state->start - base) / n;
match->mark[1] = ((char*) state->ptr - base) / n;
for (i = j = 0; i < pattern->groups; i++, j+=2)
if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
match->mark[j+2] = ((char*) state->mark[j] - base) / n;
match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
} else
match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
match->pos = state->pos;
match->endpos = state->endpos;
match->lastindex = state->lastindex;
return (PyObject*) match;
} else if (status == 0) {
/* no match */
Py_INCREF(Py_None);
return Py_None;
}
/* internal error */
pattern_error(status);
return NULL;
}
static PyObject*
pattern_scanner(PatternObject* pattern, PyObject* args)
{
/* create search state object */
ScannerObject* self;
PyObject* string;
int start = 0;
int end = INT_MAX;
if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
return NULL;
/* create scanner object */
self = PyObject_NEW(ScannerObject, &Scanner_Type);
if (!self)
return NULL;
string = state_init(&self->state, pattern, string, start, end);
if (!string) {
PyObject_DEL(self);
return NULL;
}
Py_INCREF(pattern);
self->pattern = (PyObject*) pattern;
return (PyObject*) self;
}
static void
pattern_dealloc(PatternObject* self)
{
Py_XDECREF(self->pattern);
Py_XDECREF(self->groupindex);
Py_XDECREF(self->indexgroup);
PyObject_DEL(self);
}
static PyObject*
pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE state;
int status;
PyObject* string;
int start = 0;
int end = INT_MAX;
static const char *const kwlist[] = { "pattern", "pos", "endpos", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
&string, &start, &end))
return NULL;
string = state_init(&state, self, string, start, end);
if (!string)
return NULL;
state.ptr = state.start;
TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
if (state.charsize == 1) {
status = sre_match(&state, PatternObject_GetCode(self), 1);
} else {
#if defined(HAVE_UNICODE)
status = sre_umatch(&state, PatternObject_GetCode(self), 1);
#endif
}
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
state_fini(&state);
return pattern_new_match(self, &state, status);
}
static PyObject*
pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE state;
int status;
PyObject* string;
int start = 0;
int end = INT_MAX;
static const char *const kwlist[] = { "pattern", "pos", "endpos", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
&string, &start, &end))
return NULL;
string = state_init(&state, self, string, start, end);
if (!string)
return NULL;
TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
if (state.charsize == 1) {
status = sre_search(&state, PatternObject_GetCode(self));
} else {
#if defined(HAVE_UNICODE)
status = sre_usearch(&state, PatternObject_GetCode(self));
#endif
}
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
state_fini(&state);
return pattern_new_match(self, &state, status);
}
static PyObject*
call(char* module, char* function, PyObject* args)
{
PyObject* name;
PyObject* mod;
PyObject* func;
PyObject* result;
if (!args)
return NULL;
name = PyString_FromString(module);
if (!name)
return NULL;
mod = PyImport_Import(name);
Py_DECREF(name);
if (!mod)
return NULL;
func = PyObject_GetAttrString(mod, function);
Py_DECREF(mod);
if (!func)
return NULL;
result = PyObject_CallObject(func, args);
Py_DECREF(func);
Py_DECREF(args);
return result;
}
#ifdef USE_BUILTIN_COPY
static int
deepcopy(PyObject** object, PyObject* memo)
{
PyObject* copy;
copy = call(
"copy", "deepcopy",
Py_BuildValue("OO", *object, memo)
);
if (!copy)
return 0;
Py_DECREF(*object);
*object = copy;
return 1; /* success */
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -