📄 _sre.c
字号:
state->ptr = ptr - pattern[1];
if (state->ptr >= state->beginning) {
i = SRE_MATCH(state, pattern + 2, level + 1);
if (i < 0)
return i;
if (i)
return 0;
}
pattern += pattern[0];
break;
case SRE_OP_BRANCH:
/* alternation */
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
lastmark = state->lastmark;
for (; pattern[0]; pattern += pattern[0]) {
if (pattern[1] == SRE_OP_LITERAL &&
(ptr >= end || (SRE_CODE) *ptr != pattern[2]))
continue;
if (pattern[1] == SRE_OP_IN &&
(ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
continue;
state->ptr = ptr;
i = SRE_MATCH(state, pattern + 1, level + 1);
if (i)
return i;
if (state->lastmark > lastmark) {
memset(
state->mark + lastmark + 1, 0,
(state->lastmark - lastmark) * sizeof(void*)
);
state->lastmark = lastmark;
}
}
return 0;
case SRE_OP_REPEAT_ONE:
/* match repeated sequence (maximizing regexp) */
/* this operator only works if the repeated item is
exactly one character wide, and we're not already
collecting backtracking points. for other cases,
use the MAX_REPEAT operator */
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
pattern[1], pattern[2]));
if (ptr + pattern[1] > end)
return 0; /* cannot match */
state->ptr = ptr;
count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
if (count < 0)
return count;
ptr += count;
/* when we arrive here, count contains the number of
matches, and ptr points to the tail of the target
string. check if the rest of the pattern matches,
and backtrack if not. */
if (count < (int) pattern[1])
return 0;
if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
/* tail is empty. we're finished */
state->ptr = ptr;
return 1;
} else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
/* tail starts with a literal. skip positions where
the rest of the pattern cannot possibly match */
chr = pattern[pattern[0]+1];
for (;;) {
while (count >= (int) pattern[1] &&
(ptr >= end || *ptr != chr)) {
ptr--;
count--;
}
if (count < (int) pattern[1])
break;
state->ptr = ptr;
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
if (i)
return i;
ptr--;
count--;
}
} else {
/* general case */
lastmark = state->lastmark;
while (count >= (int) pattern[1]) {
state->ptr = ptr;
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
if (i)
return i;
ptr--;
count--;
if (state->lastmark > lastmark) {
memset(
state->mark + lastmark + 1, 0,
(state->lastmark - lastmark) * sizeof(void*)
);
state->lastmark = lastmark;
}
}
}
return 0;
case SRE_OP_REPEAT:
/* create repeat context. all the hard work is done
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
/* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
pattern[1], pattern[2]));
rep.count = -1;
rep.pattern = pattern;
/* install new repeat context */
rep.prev = state->repeat;
state->repeat = &rep;
state->ptr = ptr;
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
state->repeat = rep.prev;
return i;
case SRE_OP_MAX_UNTIL:
/* maximizing repeat */
/* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
/* FIXME: we probably need to deal with zero-width
matches in here... */
rp = state->repeat;
if (!rp)
return SRE_ERROR_STATE;
state->ptr = ptr;
count = rp->count + 1;
TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
if (count < rp->pattern[1]) {
/* not enough matches */
rp->count = count;
/* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
rp->count = count - 1;
state->ptr = ptr;
return 0;
}
if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
/* we may have enough matches, but if we can
match another item, do so */
rp->count = count;
lastmark = state->lastmark;
i = mark_save(state, 0, lastmark);
if (i < 0)
return i;
/* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
i = mark_restore(state, 0, lastmark);
state->lastmark = lastmark;
if (i < 0)
return i;
rp->count = count - 1;
state->ptr = ptr;
}
/* cannot match more repeated items here. make sure the
tail matches */
state->repeat = rp->prev;
i = SRE_MATCH(state, pattern, level + 1);
if (i)
return i;
state->repeat = rp;
state->ptr = ptr;
return 0;
case SRE_OP_MIN_UNTIL:
/* minimizing repeat */
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
rp = state->repeat;
if (!rp)
return SRE_ERROR_STATE;
count = rp->count + 1;
TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
rp->pattern));
state->ptr = ptr;
if (count < rp->pattern[1]) {
/* not enough matches */
rp->count = count;
/* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
rp->count = count-1;
state->ptr = ptr;
return 0;
}
/* see if the tail matches */
state->repeat = rp->prev;
i = SRE_MATCH(state, pattern, level + 1);
if (i)
return i;
state->ptr = ptr;
state->repeat = rp;
if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
return 0;
rp->count = count;
/* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
rp->count = count - 1;
state->ptr = ptr;
return 0;
default:
TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
return SRE_ERROR_ILLEGAL;
}
}
/* can't end up here */
/* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
}
LOCAL(int)
SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
{
SRE_CHAR* ptr = state->start;
SRE_CHAR* end = state->end;
int status = 0;
int prefix_len = 0;
int prefix_skip = 0;
SRE_CODE* prefix = NULL;
SRE_CODE* charset = NULL;
SRE_CODE* overlap = NULL;
int flags = 0;
if (pattern[0] == SRE_OP_INFO) {
/* optimization info block */
/* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
flags = pattern[2];
if (pattern[3] > 0) {
/* adjust end point (but make sure we leave at least one
character in there, so literal search will work) */
end -= pattern[3]-1;
if (end <= ptr)
end = ptr+1;
}
if (flags & SRE_INFO_PREFIX) {
/* pattern starts with a known prefix */
/* <length> <skip> <prefix data> <overlap data> */
prefix_len = pattern[5];
prefix_skip = pattern[6];
prefix = pattern + 7;
overlap = prefix + prefix_len - 1;
} else if (flags & SRE_INFO_CHARSET)
/* pattern starts with a character from a known set */
/* <charset> */
charset = pattern + 5;
pattern += 1 + pattern[1];
}
TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
TRACE(("charset = %p\n", charset));
#if defined(USE_FAST_SEARCH)
if (prefix_len > 1) {
/* pattern starts with a known prefix. use the overlap
table to skip forward as fast as we possibly can */
int i = 0;
end = state->end;
while (ptr < end) {
for (;;) {
if ((SRE_CODE) ptr[0] != prefix[i]) {
if (!i)
break;
else
i = overlap[i];
} else {
if (++i == prefix_len) {
/* found a potential match */
TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
state->start = ptr + 1 - prefix_len;
state->ptr = ptr + 1 - prefix_len + prefix_skip;
if (flags & SRE_INFO_LITERAL)
return 1; /* we got all of it */
status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
if (status != 0)
return status;
/* close but no cigar -- try again */
i = overlap[i];
}
break;
}
}
ptr++;
}
return 0;
}
#endif
if (pattern[0] == SRE_OP_LITERAL) {
/* pattern starts with a literal character. this is used
for short prefixes, and if fast search is disabled */
SRE_CODE chr = pattern[1];
end = state->end;
for (;;) {
while (ptr < end && (SRE_CODE) ptr[0] != chr)
ptr++;
if (ptr == end)
return 0;
TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
state->start = ptr;
state->ptr = ++ptr;
if (flags & SRE_INFO_LITERAL)
return 1; /* we got all of it */
status = SRE_MATCH(state, pattern + 2, 1);
if (status != 0)
break;
}
} else if (charset) {
/* pattern starts with a character from a known set */
end = state->end;
for (;;) {
while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
ptr++;
if (ptr == end)
return 0;
TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
state->start = ptr;
state->ptr = ptr;
status = SRE_MATCH(state, pattern, 1);
if (status != 0)
break;
ptr++;
}
} else
/* general case */
while (ptr <= end) {
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
state->start = state->ptr = ptr++;
status = SRE_MATCH(state, pattern, 1);
if (status != 0)
break;
}
return status;
}
LOCAL(int)
SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
{
/* check if given string is a literal template (i.e. no escapes) */
while (len-- > 0)
if (*ptr++ == '\\')
return 0;
return 1;
}
#if !defined(SRE_RECURSIVE)
/* -------------------------------------------------------------------- */
/* factories and destructors */
/* see sre.h for object declarations */
#ifndef SYMBIAN
staticforward PyTypeObject Pattern_Type;
staticforward PyTypeObject Match_Type;
staticforward PyTypeObject Scanner_Type;
#else
#define Pattern_Type ((PYTHON_GLOBALS->tobj).t_Pattern)
#define Match_Type ((PYTHON_GLOBALS->tobj).t_Match)
#define Scanner_Type ((PYTHON_GLOBALS->tobj).t_Scanner)
#endif
static PyObject *
_compile(PyObject* self_, PyObject* args)
{
/* "compile" pattern descriptor to pattern object */
PatternObject* self;
int i, n;
PyObject* pattern;
int flags = 0;
PyObject* code;
int groups = 0;
PyObject* groupindex = NULL;
PyObject* indexgroup = NULL;
if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
&PyList_Type, &code, &groups,
&groupindex, &indexgroup))
return NULL;
n = PyList_GET_SIZE(code);
self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
if (!self)
return NULL;
self->codesize = n;
for (i = 0; i < n; i++) {
PyObject *o = PyList_GET_ITEM(code, i);
self->code[i] = (SRE_CODE) PyInt_AsLong(o);
}
if (PyErr_Occurred()) {
PyObject_DEL(self);
return NULL;
}
Py_INCREF(pattern);
self->pattern = pattern;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -